From e2bbf175a2184bd76f6c54ccf8456babeb1a46fc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 9 Apr 2024 15:16:35 +0200 Subject: Adding upstream version 9.1. Signed-off-by: Daniel Baumann --- zebra/.gitignore | 3 + zebra/Makefile | 10 + zebra/connected.c | 628 +++ zebra/connected.h | 55 + zebra/debug.c | 849 ++++ zebra/debug.h | 151 + zebra/debug_nl.c | 1773 ++++++++ zebra/dpdk/zebra_dplane_dpdk.c | 720 +++ zebra/dpdk/zebra_dplane_dpdk.h | 23 + zebra/dpdk/zebra_dplane_dpdk_private.h | 48 + zebra/dpdk/zebra_dplane_dpdk_vty.c | 70 + zebra/dplane_fpm_nl.c | 1666 +++++++ zebra/if_ioctl.c | 300 ++ zebra/if_netlink.c | 2087 +++++++++ zebra/if_netlink.h | 51 + zebra/if_socket.c | 36 + zebra/if_sysctl.c | 131 + zebra/interface.c | 5731 ++++++++++++++++++++++++ zebra/interface.h | 356 ++ zebra/ioctl.c | 643 +++ zebra/ioctl.h | 32 + zebra/ipforward.h | 25 + zebra/ipforward_proc.c | 169 + zebra/ipforward_sysctl.c | 129 + zebra/irdp.h | 143 + zebra/irdp_interface.c | 716 +++ zebra/irdp_main.c | 335 ++ zebra/irdp_packet.c | 352 ++ zebra/kernel_netlink.c | 1946 +++++++++ zebra/kernel_netlink.h | 151 + zebra/kernel_socket.c | 1644 +++++++ zebra/kernel_socket.h | 34 + zebra/label_manager.c | 499 +++ zebra/label_manager.h | 114 + zebra/main.c | 490 +++ zebra/netconf_netlink.c | 227 + zebra/netconf_netlink.h | 36 + zebra/redistribute.c | 922 ++++ zebra/redistribute.h | 81 + zebra/rib.h | 641 +++ zebra/router-id.c | 607 +++ zebra/router-id.h | 35 + zebra/rt.h | 125 + zebra/rt_netlink.c | 5133 ++++++++++++++++++++++ zebra/rt_netlink.h | 152 + zebra/rt_socket.c | 420 ++ zebra/rtadv.c | 3071 +++++++++++++ zebra/rtadv.h | 443 ++ zebra/rtread_netlink.c | 78 + zebra/rtread_sysctl.c | 110 + zebra/rule_netlink.c | 431 ++ zebra/rule_netlink.h | 36 + zebra/rule_socket.c | 37 + zebra/sample_plugin.c | 120 + zebra/subdir.am | 259 ++ zebra/table_manager.c | 330 ++ zebra/table_manager.h | 66 + zebra/tc_netlink.c | 873 ++++ zebra/tc_netlink.h | 69 + zebra/tc_socket.c | 26 + zebra/testrib.conf | 76 + zebra/zapi_msg.c | 3972 +++++++++++++++++ zebra/zapi_msg.h | 114 + zebra/zebra_affinitymap.c | 144 + zebra/zebra_affinitymap.h | 38 + zebra/zebra_dplane.c | 7473 ++++++++++++++++++++++++++++++++ zebra/zebra_dplane.h | 1220 ++++++ zebra/zebra_errors.c | 800 ++++ zebra/zebra_errors.h | 135 + zebra/zebra_evpn.c | 1713 ++++++++ zebra/zebra_evpn.h | 214 + zebra/zebra_evpn_mac.c | 2560 +++++++++++ zebra/zebra_evpn_mac.h | 273 ++ zebra/zebra_evpn_mh.c | 4138 ++++++++++++++++++ zebra/zebra_evpn_mh.h | 385 ++ zebra/zebra_evpn_neigh.c | 2329 ++++++++++ zebra/zebra_evpn_neigh.h | 280 ++ zebra/zebra_evpn_vxlan.h | 85 + zebra/zebra_fpm.c | 2070 +++++++++ zebra/zebra_fpm_dt.c | 259 ++ zebra/zebra_fpm_netlink.c | 622 +++ zebra/zebra_fpm_private.h | 91 + zebra/zebra_fpm_protobuf.c | 288 ++ zebra/zebra_gr.c | 672 +++ zebra/zebra_l2.c | 567 +++ zebra/zebra_l2.h | 202 + zebra/zebra_l2_bridge_if.c | 382 ++ zebra/zebra_l2_bridge_if.h | 75 + zebra/zebra_mlag.c | 1202 +++++ zebra/zebra_mlag.h | 62 + zebra/zebra_mlag_private.c | 285 ++ zebra/zebra_mlag_vty.c | 49 + zebra/zebra_mlag_vty.h | 23 + zebra/zebra_mpls.c | 4117 ++++++++++++++++++ zebra/zebra_mpls.h | 588 +++ zebra/zebra_mpls_netlink.c | 73 + zebra/zebra_mpls_null.c | 31 + zebra/zebra_mpls_openbsd.c | 555 +++ zebra/zebra_mpls_vty.c | 451 ++ zebra/zebra_mroute.c | 87 + zebra/zebra_mroute.h | 30 + zebra/zebra_nb.c | 702 +++ zebra/zebra_nb.h | 299 ++ zebra/zebra_nb_config.c | 1582 +++++++ zebra/zebra_nb_rpcs.c | 188 + zebra/zebra_nb_state.c | 1048 +++++ zebra/zebra_neigh.c | 286 ++ zebra/zebra_neigh.h | 54 + zebra/zebra_netns_id.c | 351 ++ zebra/zebra_netns_id.h | 21 + zebra/zebra_netns_notify.c | 464 ++ zebra/zebra_netns_notify.h | 24 + zebra/zebra_nhg.c | 3772 ++++++++++++++++ zebra/zebra_nhg.h | 390 ++ zebra/zebra_nhg_private.h | 68 + zebra/zebra_ns.c | 250 ++ zebra/zebra_ns.h | 77 + zebra/zebra_opaque.c | 979 +++++ zebra/zebra_opaque.h | 50 + zebra/zebra_pbr.c | 1495 +++++++ zebra/zebra_pbr.h | 284 ++ zebra/zebra_ptm.c | 1552 +++++++ zebra/zebra_ptm.h | 83 + zebra/zebra_ptm_redistribute.c | 99 + zebra/zebra_ptm_redistribute.h | 22 + zebra/zebra_pw.c | 840 ++++ zebra/zebra_pw.h | 71 + zebra/zebra_rib.c | 5094 ++++++++++++++++++++++ zebra/zebra_rnh.c | 1772 ++++++++ zebra/zebra_rnh.h | 56 + zebra/zebra_routemap.c | 2056 +++++++++ zebra/zebra_routemap.h | 47 + zebra/zebra_routemap_nb.c | 63 + zebra/zebra_routemap_nb.h | 32 + zebra/zebra_routemap_nb_config.c | 368 ++ zebra/zebra_router.c | 332 ++ zebra/zebra_router.h | 326 ++ zebra/zebra_script.c | 423 ++ zebra/zebra_script.h | 28 + zebra/zebra_snmp.c | 556 +++ zebra/zebra_srte.c | 391 ++ zebra/zebra_srte.h | 60 + zebra/zebra_srv6.c | 425 ++ zebra/zebra_srv6.h | 70 + zebra/zebra_srv6_vty.c | 467 ++ zebra/zebra_srv6_vty.h | 16 + zebra/zebra_tc.c | 431 ++ zebra/zebra_tc.h | 66 + zebra/zebra_trace.c | 6 + zebra/zebra_trace.h | 160 + zebra/zebra_vrf.c | 688 +++ zebra/zebra_vrf.h | 267 ++ zebra/zebra_vty.c | 4705 ++++++++++++++++++++ zebra/zebra_vxlan.c | 6195 ++++++++++++++++++++++++++ zebra/zebra_vxlan.h | 229 + zebra/zebra_vxlan_if.c | 1159 +++++ zebra/zebra_vxlan_if.h | 96 + zebra/zebra_vxlan_private.h | 264 ++ zebra/zserv.c | 1320 ++++++ zebra/zserv.h | 393 ++ 160 files changed, 118780 insertions(+) create mode 100644 zebra/.gitignore create mode 100644 zebra/Makefile create mode 100644 zebra/connected.c create mode 100644 zebra/connected.h create mode 100644 zebra/debug.c create mode 100644 zebra/debug.h create mode 100644 zebra/debug_nl.c create mode 100644 zebra/dpdk/zebra_dplane_dpdk.c create mode 100644 zebra/dpdk/zebra_dplane_dpdk.h create mode 100644 zebra/dpdk/zebra_dplane_dpdk_private.h create mode 100644 zebra/dpdk/zebra_dplane_dpdk_vty.c create mode 100644 zebra/dplane_fpm_nl.c create mode 100644 zebra/if_ioctl.c create mode 100644 zebra/if_netlink.c create mode 100644 zebra/if_netlink.h create mode 100644 zebra/if_socket.c create mode 100644 zebra/if_sysctl.c create mode 100644 zebra/interface.c create mode 100644 zebra/interface.h create mode 100644 zebra/ioctl.c create mode 100644 zebra/ioctl.h create mode 100644 zebra/ipforward.h create mode 100644 zebra/ipforward_proc.c create mode 100644 zebra/ipforward_sysctl.c create mode 100644 zebra/irdp.h create mode 100644 zebra/irdp_interface.c create mode 100644 zebra/irdp_main.c create mode 100644 zebra/irdp_packet.c create mode 100644 zebra/kernel_netlink.c create mode 100644 zebra/kernel_netlink.h create mode 100644 zebra/kernel_socket.c create mode 100644 zebra/kernel_socket.h create mode 100644 zebra/label_manager.c create mode 100644 zebra/label_manager.h create mode 100644 zebra/main.c create mode 100644 zebra/netconf_netlink.c create mode 100644 zebra/netconf_netlink.h create mode 100644 zebra/redistribute.c create mode 100644 zebra/redistribute.h create mode 100644 zebra/rib.h create mode 100644 zebra/router-id.c create mode 100644 zebra/router-id.h create mode 100644 zebra/rt.h create mode 100644 zebra/rt_netlink.c create mode 100644 zebra/rt_netlink.h create mode 100644 zebra/rt_socket.c create mode 100644 zebra/rtadv.c create mode 100644 zebra/rtadv.h create mode 100644 zebra/rtread_netlink.c create mode 100644 zebra/rtread_sysctl.c create mode 100644 zebra/rule_netlink.c create mode 100644 zebra/rule_netlink.h create mode 100644 zebra/rule_socket.c create mode 100644 zebra/sample_plugin.c create mode 100644 zebra/subdir.am create mode 100644 zebra/table_manager.c create mode 100644 zebra/table_manager.h create mode 100644 zebra/tc_netlink.c create mode 100644 zebra/tc_netlink.h create mode 100644 zebra/tc_socket.c create mode 100644 zebra/testrib.conf create mode 100644 zebra/zapi_msg.c create mode 100644 zebra/zapi_msg.h create mode 100644 zebra/zebra_affinitymap.c create mode 100644 zebra/zebra_affinitymap.h create mode 100644 zebra/zebra_dplane.c create mode 100644 zebra/zebra_dplane.h create mode 100644 zebra/zebra_errors.c create mode 100644 zebra/zebra_errors.h create mode 100644 zebra/zebra_evpn.c create mode 100644 zebra/zebra_evpn.h create mode 100644 zebra/zebra_evpn_mac.c create mode 100644 zebra/zebra_evpn_mac.h create mode 100644 zebra/zebra_evpn_mh.c create mode 100644 zebra/zebra_evpn_mh.h create mode 100644 zebra/zebra_evpn_neigh.c create mode 100644 zebra/zebra_evpn_neigh.h create mode 100644 zebra/zebra_evpn_vxlan.h create mode 100644 zebra/zebra_fpm.c create mode 100644 zebra/zebra_fpm_dt.c create mode 100644 zebra/zebra_fpm_netlink.c create mode 100644 zebra/zebra_fpm_private.h create mode 100644 zebra/zebra_fpm_protobuf.c create mode 100644 zebra/zebra_gr.c create mode 100644 zebra/zebra_l2.c create mode 100644 zebra/zebra_l2.h create mode 100644 zebra/zebra_l2_bridge_if.c create mode 100644 zebra/zebra_l2_bridge_if.h create mode 100644 zebra/zebra_mlag.c create mode 100644 zebra/zebra_mlag.h create mode 100644 zebra/zebra_mlag_private.c create mode 100644 zebra/zebra_mlag_vty.c create mode 100644 zebra/zebra_mlag_vty.h create mode 100644 zebra/zebra_mpls.c create mode 100644 zebra/zebra_mpls.h create mode 100644 zebra/zebra_mpls_netlink.c create mode 100644 zebra/zebra_mpls_null.c create mode 100644 zebra/zebra_mpls_openbsd.c create mode 100644 zebra/zebra_mpls_vty.c create mode 100644 zebra/zebra_mroute.c create mode 100644 zebra/zebra_mroute.h create mode 100644 zebra/zebra_nb.c create mode 100644 zebra/zebra_nb.h create mode 100644 zebra/zebra_nb_config.c create mode 100644 zebra/zebra_nb_rpcs.c create mode 100644 zebra/zebra_nb_state.c create mode 100644 zebra/zebra_neigh.c create mode 100644 zebra/zebra_neigh.h create mode 100644 zebra/zebra_netns_id.c create mode 100644 zebra/zebra_netns_id.h create mode 100644 zebra/zebra_netns_notify.c create mode 100644 zebra/zebra_netns_notify.h create mode 100644 zebra/zebra_nhg.c create mode 100644 zebra/zebra_nhg.h create mode 100644 zebra/zebra_nhg_private.h create mode 100644 zebra/zebra_ns.c create mode 100644 zebra/zebra_ns.h create mode 100644 zebra/zebra_opaque.c create mode 100644 zebra/zebra_opaque.h create mode 100644 zebra/zebra_pbr.c create mode 100644 zebra/zebra_pbr.h create mode 100644 zebra/zebra_ptm.c create mode 100644 zebra/zebra_ptm.h create mode 100644 zebra/zebra_ptm_redistribute.c create mode 100644 zebra/zebra_ptm_redistribute.h create mode 100644 zebra/zebra_pw.c create mode 100644 zebra/zebra_pw.h create mode 100644 zebra/zebra_rib.c create mode 100644 zebra/zebra_rnh.c create mode 100644 zebra/zebra_rnh.h create mode 100644 zebra/zebra_routemap.c create mode 100644 zebra/zebra_routemap.h create mode 100644 zebra/zebra_routemap_nb.c create mode 100644 zebra/zebra_routemap_nb.h create mode 100644 zebra/zebra_routemap_nb_config.c create mode 100644 zebra/zebra_router.c create mode 100644 zebra/zebra_router.h create mode 100644 zebra/zebra_script.c create mode 100644 zebra/zebra_script.h create mode 100644 zebra/zebra_snmp.c create mode 100644 zebra/zebra_srte.c create mode 100644 zebra/zebra_srte.h create mode 100644 zebra/zebra_srv6.c create mode 100644 zebra/zebra_srv6.h create mode 100644 zebra/zebra_srv6_vty.c create mode 100644 zebra/zebra_srv6_vty.h create mode 100644 zebra/zebra_tc.c create mode 100644 zebra/zebra_tc.h create mode 100644 zebra/zebra_trace.c create mode 100644 zebra/zebra_trace.h create mode 100644 zebra/zebra_vrf.c create mode 100644 zebra/zebra_vrf.h create mode 100644 zebra/zebra_vty.c create mode 100644 zebra/zebra_vxlan.c create mode 100644 zebra/zebra_vxlan.h create mode 100644 zebra/zebra_vxlan_if.c create mode 100644 zebra/zebra_vxlan_if.h create mode 100644 zebra/zebra_vxlan_private.h create mode 100644 zebra/zserv.c create mode 100644 zebra/zserv.h (limited to 'zebra') diff --git a/zebra/.gitignore b/zebra/.gitignore new file mode 100644 index 0000000..41a86e7 --- /dev/null +++ b/zebra/.gitignore @@ -0,0 +1,3 @@ +zebra +zebra.conf +client diff --git a/zebra/Makefile b/zebra/Makefile new file mode 100644 index 0000000..625a716 --- /dev/null +++ b/zebra/Makefile @@ -0,0 +1,10 @@ +all: ALWAYS + @$(MAKE) -s -C .. zebra/zebra +%: ALWAYS + @$(MAKE) -s -C .. zebra/$@ + +Makefile: + #nothing +ALWAYS: +.PHONY: ALWAYS makefiles +.SUFFIXES: diff --git a/zebra/connected.c b/zebra/connected.c new file mode 100644 index 0000000..ee0823f --- /dev/null +++ b/zebra/connected.c @@ -0,0 +1,628 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Address linked list routine. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#include "prefix.h" +#include "linklist.h" +#include "if.h" +#include "table.h" +#include "rib.h" +#include "table.h" +#include "log.h" +#include "memory.h" + +#include "vty.h" +#include "zebra/debug.h" +#include "zebra/zserv.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/connected.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_router.h" + +/* communicate the withdrawal of a connected address */ +static void connected_withdraw(struct connected *ifc) +{ + if (!ifc) + return; + + /* Update interface address information to protocol daemon. */ + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) { + zebra_interface_address_delete_update(ifc->ifp, ifc); + + if (ifc->address->family == AF_INET) + if_subnet_delete(ifc->ifp, ifc); + + connected_down(ifc->ifp, ifc); + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + } + + /* The address is not in the kernel anymore, so clear the flag */ + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) { + listnode_delete(ifc->ifp->connected, ifc); + connected_free(&ifc); + } +} + +static void connected_announce(struct interface *ifp, struct connected *ifc) +{ + if (!ifc) + return; + + if (!if_is_loopback(ifp) && ifc->address->family == AF_INET) { + if (ifc->address->prefixlen == IPV4_MAX_BITLEN) + SET_FLAG(ifc->flags, ZEBRA_IFA_UNNUMBERED); + else + UNSET_FLAG(ifc->flags, ZEBRA_IFA_UNNUMBERED); + } + + listnode_add(ifp->connected, ifc); + + /* Update interface address information to protocol daemon. */ + if (ifc->address->family == AF_INET) + if_subnet_add(ifp, ifc); + + zebra_interface_address_add_update(ifp, ifc); + + if (if_is_operative(ifp)) { + connected_up(ifp, ifc); + } +} + +/* If same interface address is already exist... */ +struct connected *connected_check(struct interface *ifp, + union prefixconstptr pu) +{ + const struct prefix *p = pu.p; + struct connected *ifc; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) + if (prefix_same(ifc->address, p)) + return ifc; + + return NULL; +} + +/* same, but with peer address */ +struct connected *connected_check_ptp(struct interface *ifp, + union prefixconstptr pu, + union prefixconstptr du) +{ + const struct prefix *p = pu.p; + const struct prefix *d = du.p; + struct connected *ifc; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) { + if (!prefix_same(ifc->address, p)) + continue; + if (!CONNECTED_PEER(ifc) && !d) + return ifc; + if (CONNECTED_PEER(ifc) && d + && prefix_same(ifc->destination, d)) + return ifc; + } + + return NULL; +} + +/* Check if two ifc's describe the same address in the same state */ +static int connected_same(struct connected *ifc1, struct connected *ifc2) +{ + if (ifc1->ifp != ifc2->ifp) + return 0; + + if (ifc1->flags != ifc2->flags) + return 0; + + if (ifc1->conf != ifc2->conf) + return 0; + + if (ifc1->destination) + if (!ifc2->destination) + return 0; + if (ifc2->destination) + if (!ifc1->destination) + return 0; + + if (ifc1->destination && ifc2->destination) + if (!prefix_same(ifc1->destination, ifc2->destination)) + return 0; + + return 1; +} + +/* Handle changes to addresses and send the neccesary announcements + * to clients. */ +static void connected_update(struct interface *ifp, struct connected *ifc) +{ + struct connected *current; + + /* Check same connected route. */ + current = connected_check_ptp(ifp, ifc->address, ifc->destination); + if (current) { + if (CHECK_FLAG(current->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* Avoid spurious withdraws, this might be just the kernel + * 'reflecting' + * back an address we have already added. + */ + if (connected_same(current, ifc)) { + /* nothing to do */ + connected_free(&ifc); + return; + } + + /* Clear the configured flag on the old ifc, so it will be freed + * by + * connected withdraw. */ + UNSET_FLAG(current->conf, ZEBRA_IFC_CONFIGURED); + connected_withdraw( + current); /* implicit withdraw - freebsd does this */ + } + + /* If the connected is new or has changed, announce it, if it is usable + */ + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + connected_announce(ifp, ifc); +} + +/* Called from if_up(). */ +void connected_up(struct interface *ifp, struct connected *ifc) +{ + afi_t afi; + struct prefix p; + struct nexthop nh = { + .type = NEXTHOP_TYPE_IFINDEX, + .ifindex = ifp->ifindex, + .vrf_id = ifp->vrf->vrf_id, + }; + struct zebra_vrf *zvrf; + uint32_t metric; + uint32_t flags = 0; + uint32_t count = 0; + struct listnode *cnode; + struct connected *c; + + zvrf = ifp->vrf->info; + if (!zvrf) { + flog_err( + EC_ZEBRA_VRF_NOT_FOUND, + "%s: Received Up for interface but no associated zvrf: %s(%d)", + __func__, ifp->vrf->name, ifp->vrf->vrf_id); + return; + } + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + return; + + /* Ensure 'down' flag is cleared */ + UNSET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + prefix_copy(&p, CONNECTED_PREFIX(ifc)); + + /* Apply mask to the network. */ + apply_mask(&p); + + afi = family2afi(p.family); + + switch (afi) { + case AFI_IP: + /* + * In case of connected address is 0.0.0.0/0 we treat it tunnel + * address. + */ + if (prefix_ipv4_any((struct prefix_ipv4 *)&p)) + return; + break; + case AFI_IP6: +#ifndef GNU_LINUX + /* XXX: It is already done by rib_bogus_ipv6 within rib_add */ + if (IN6_IS_ADDR_UNSPECIFIED(&p.u.prefix6)) + return; +#endif + break; + case AFI_UNSPEC: + case AFI_L2VPN: + case AFI_MAX: + flog_warn(EC_ZEBRA_CONNECTED_AFI_UNKNOWN, + "Received unknown AFI: %s", afi2str(afi)); + return; + break; + } + + metric = (ifc->metric < (uint32_t)METRIC_MAX) ? + ifc->metric : ifp->metric; + + /* + * Since we are hand creating the connected routes + * in our main routing table, *if* we are working + * in an offloaded environment then we need to + * pretend like the route is offloaded so everything + * else will work + */ + if (zrouter.asic_offloaded) + flags |= ZEBRA_FLAG_OFFLOADED; + + /* + * It's possible to add the same network and mask + * to an interface over and over. This would + * result in an equivalent number of connected + * routes. Just add one connected route in + * for all the addresses on an interface that + * resolve to the same network and mask + */ + for (ALL_LIST_ELEMENTS_RO(ifp->connected, cnode, c)) { + struct prefix cp; + + prefix_copy(&cp, CONNECTED_PREFIX(c)); + apply_mask(&cp); + + if (prefix_same(&cp, &p) && + !CHECK_FLAG(c->conf, ZEBRA_IFC_DOWN)) + count++; + + if (count >= 2) + return; + } + + rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0, + false); + + rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0, + false); + + /* Schedule LSP forwarding entries for processing, if appropriate. */ + if (zvrf->vrf->vrf_id == VRF_DEFAULT) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%u: IF %s IP %pFX address add/up, scheduling MPLS processing", + zvrf->vrf->vrf_id, ifp->name, &p); + mpls_mark_lsps_for_processing(zvrf, &p); + } +} + +/* Add connected IPv4 route to the interface. */ +void connected_add_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, uint16_t prefixlen, + const struct in_addr *dest, const char *label, + uint32_t metric) +{ + struct prefix_ipv4 *p; + struct connected *ifc; + + if (ipv4_martian(addr)) + return; + + /* Make connected structure. */ + ifc = connected_new(); + ifc->ifp = ifp; + ifc->flags = flags; + ifc->metric = metric; + /* If we get a notification from the kernel, + * we can safely assume the address is known to the kernel */ + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + if (!if_is_operative(ifp)) + SET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + /* Allocate new connected address. */ + p = prefix_ipv4_new(); + p->family = AF_INET; + p->prefix = *addr; + p->prefixlen = + CHECK_FLAG(flags, ZEBRA_IFA_PEER) ? IPV4_MAX_BITLEN : prefixlen; + ifc->address = (struct prefix *)p; + + /* If there is a peer address. */ + if (CONNECTED_PEER(ifc)) { + /* validate the destination address */ + if (dest) { + p = prefix_ipv4_new(); + p->family = AF_INET; + p->prefix = *dest; + p->prefixlen = prefixlen; + ifc->destination = (struct prefix *)p; + + if (IPV4_ADDR_SAME(addr, dest)) + flog_warn( + EC_ZEBRA_IFACE_SAME_LOCAL_AS_PEER, + "interface %s has same local and peer address %pI4, routing protocols may malfunction", + ifp->name, addr); + } else { + zlog_debug( + "%s called for interface %s with peer flag set, but no peer address supplied", + __func__, ifp->name); + UNSET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + } + } + + /* no destination address was supplied */ + if (!dest && (prefixlen == IPV4_MAX_BITLEN) && if_is_pointopoint(ifp)) + zlog_debug( + "PtP interface %s with addr %pI4/%d needs a peer address", + ifp->name, addr, prefixlen); + + /* Label of this address. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* For all that I know an IPv4 address is always ready when we receive + * the notification. So it should be safe to set the REAL flag here. */ + SET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + + connected_update(ifp, ifc); +} + +void connected_down(struct interface *ifp, struct connected *ifc) +{ + afi_t afi; + struct prefix p; + struct nexthop nh = { + .type = NEXTHOP_TYPE_IFINDEX, + .ifindex = ifp->ifindex, + .vrf_id = ifp->vrf->vrf_id, + }; + struct zebra_vrf *zvrf; + uint32_t count = 0; + struct listnode *cnode; + struct connected *c; + + zvrf = ifp->vrf->info; + if (!zvrf) { + flog_err( + EC_ZEBRA_VRF_NOT_FOUND, + "%s: Received Down for interface but no associated zvrf: %s(%d)", + __func__, ifp->vrf->name, ifp->vrf->vrf_id); + return; + } + + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + return; + + /* Skip if we've already done this; this can happen if we have a + * config change that takes an interface down, then we receive kernel + * notifications about the downed interface and its addresses. + */ + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_DOWN)) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: ifc %p, %pFX already DOWN", + __func__, ifc, ifc->address); + return; + } + + prefix_copy(&p, CONNECTED_PREFIX(ifc)); + + /* Apply mask to the network. */ + apply_mask(&p); + + afi = family2afi(p.family); + + switch (afi) { + case AFI_IP: + /* + * In case of connected address is 0.0.0.0/0 we treat it tunnel + * address. + */ + if (prefix_ipv4_any((struct prefix_ipv4 *)&p)) + return; + break; + case AFI_IP6: + if (IN6_IS_ADDR_UNSPECIFIED(&p.u.prefix6)) + return; + break; + case AFI_UNSPEC: + case AFI_L2VPN: + case AFI_MAX: + zlog_warn("Unknown AFI: %s", afi2str(afi)); + break; + } + + /* Mark the address as 'down' */ + SET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + /* + * It's possible to have X number of addresses + * on a interface that all resolve to the same + * network and mask. Find them and just + * allow the deletion when are removing the last + * one. + */ + for (ALL_LIST_ELEMENTS_RO(ifp->connected, cnode, c)) { + struct prefix cp; + + prefix_copy(&cp, CONNECTED_PREFIX(c)); + apply_mask(&cp); + + if (prefix_same(&p, &cp) && + !CHECK_FLAG(c->conf, ZEBRA_IFC_DOWN)) + count++; + + if (count >= 1) + return; + } + + /* + * Same logic as for connected_up(): push the changes into the + * head. + */ + rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); + + rib_delete(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, + 0, 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); + + /* Schedule LSP forwarding entries for processing, if appropriate. */ + if (zvrf->vrf->vrf_id == VRF_DEFAULT) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%u: IF %s IP %pFX address down, scheduling MPLS processing", + zvrf->vrf->vrf_id, ifp->name, &p); + mpls_mark_lsps_for_processing(zvrf, &p); + } +} + +static void connected_delete_helper(struct connected *ifc, struct prefix *p) +{ + struct interface *ifp; + + if (!ifc) + return; + ifp = ifc->ifp; + + connected_withdraw(ifc); + + /* Schedule LSP forwarding entries for processing, if appropriate. */ + if (ifp->vrf->vrf_id == VRF_DEFAULT) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%u: IF %s IP %pFX address delete, scheduling MPLS processing", + ifp->vrf->vrf_id, ifp->name, p); + mpls_mark_lsps_for_processing(ifp->vrf->info, p); + } +} + +/* Delete connected IPv4 route to the interface. */ +void connected_delete_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, uint16_t prefixlen, + const struct in_addr *dest) +{ + struct prefix p, d; + struct connected *ifc; + + memset(&p, 0, sizeof(p)); + p.family = AF_INET; + p.u.prefix4 = *addr; + p.prefixlen = + CHECK_FLAG(flags, ZEBRA_IFA_PEER) ? IPV4_MAX_BITLEN : prefixlen; + + if (dest) { + memset(&d, 0, sizeof(d)); + d.family = AF_INET; + d.u.prefix4 = *dest; + d.prefixlen = prefixlen; + ifc = connected_check_ptp(ifp, &p, &d); + } else + ifc = connected_check_ptp(ifp, &p, NULL); + + connected_delete_helper(ifc, &p); +} + +/* Add connected IPv6 route to the interface. */ +void connected_add_ipv6(struct interface *ifp, int flags, + const struct in6_addr *addr, + const struct in6_addr *dest, uint16_t prefixlen, + const char *label, uint32_t metric) +{ + struct prefix_ipv6 *p; + struct connected *ifc; + + if (ipv6_martian(addr)) + return; + + /* Make connected structure. */ + ifc = connected_new(); + ifc->ifp = ifp; + ifc->flags = flags; + ifc->metric = metric; + /* If we get a notification from the kernel, + * we can safely assume the address is known to the kernel */ + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + if (!if_is_operative(ifp)) + SET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + /* Allocate new connected address. */ + p = prefix_ipv6_new(); + p->family = AF_INET6; + IPV6_ADDR_COPY(&p->prefix, addr); + p->prefixlen = prefixlen; + ifc->address = (struct prefix *)p; + + /* Add global ipv6 address to the RA prefix list */ + if (!IN6_IS_ADDR_LINKLOCAL(&p->prefix)) + rtadv_add_prefix(ifp->info, p); + + if (dest) { + p = prefix_ipv6_new(); + p->family = AF_INET6; + IPV6_ADDR_COPY(&p->prefix, dest); + p->prefixlen = prefixlen; + ifc->destination = (struct prefix *)p; + } else { + if (CHECK_FLAG(ifc->flags, ZEBRA_IFA_PEER)) { + zlog_debug( + "%s called for interface %s with peer flag set, but no peer address supplied", + __func__, ifp->name); + UNSET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + } + } + + /* Label of this address. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* On Linux, we only get here when DAD is complete, therefore we can set + * ZEBRA_IFC_REAL. + * + * On BSD, there currently doesn't seem to be a way to check for + * completion of + * DAD, so we replicate the old behaviour and set ZEBRA_IFC_REAL, + * although DAD + * might still be running. + */ + SET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + connected_update(ifp, ifc); +} + +void connected_delete_ipv6(struct interface *ifp, + const struct in6_addr *address, + const struct in6_addr *dest, uint16_t prefixlen) +{ + struct prefix p, d; + struct connected *ifc; + + memset(&p, 0, sizeof(p)); + p.family = AF_INET6; + memcpy(&p.u.prefix6, address, sizeof(struct in6_addr)); + p.prefixlen = prefixlen; + + /* Delete global ipv6 address from RA prefix list */ + if (!IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6)) + rtadv_delete_prefix(ifp->info, &p); + + if (dest) { + memset(&d, 0, sizeof(d)); + d.family = AF_INET6; + IPV6_ADDR_COPY(&d.u.prefix6, dest); + d.prefixlen = prefixlen; + ifc = connected_check_ptp(ifp, &p, &d); + } else + ifc = connected_check_ptp(ifp, &p, NULL); + + connected_delete_helper(ifc, &p); +} + +int connected_is_unnumbered(struct interface *ifp) +{ + struct connected *connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && connected->address->family == AF_INET) + return CHECK_FLAG(connected->flags, + ZEBRA_IFA_UNNUMBERED); + } + return 0; +} diff --git a/zebra/connected.h b/zebra/connected.h new file mode 100644 index 0000000..4d714ea --- /dev/null +++ b/zebra/connected.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Interface's address and mask. + * Copyright (C) 1997 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_CONNECTED_H +#define _ZEBRA_CONNECTED_H + +#include +#include + +#include "lib/if.h" +#include "lib/prefix.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern struct connected *connected_check(struct interface *ifp, + union prefixconstptr p); +extern struct connected *connected_check_ptp(struct interface *ifp, + union prefixconstptr p, + union prefixconstptr d); + +extern void connected_add_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, uint16_t prefixlen, + const struct in_addr *dest, const char *label, + uint32_t metric); + +extern void connected_delete_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, + uint16_t prefixlen, + const struct in_addr *dest); + +extern void connected_delete_ipv4_unnumbered(struct connected *ifc); + +extern void connected_up(struct interface *ifp, struct connected *ifc); +extern void connected_down(struct interface *ifp, struct connected *ifc); + +extern void connected_add_ipv6(struct interface *ifp, int flags, + const struct in6_addr *address, + const struct in6_addr *dest, uint16_t prefixlen, + const char *label, uint32_t metric); +extern void connected_delete_ipv6(struct interface *ifp, + const struct in6_addr *address, + const struct in6_addr *dest, + uint16_t prefixlen); + +extern int connected_is_unnumbered(struct interface *); + +#ifdef __cplusplus +} +#endif +#endif /*_ZEBRA_CONNECTED_H */ diff --git a/zebra/debug.c b/zebra/debug.c new file mode 100644 index 0000000..68bedaf --- /dev/null +++ b/zebra/debug.c @@ -0,0 +1,849 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra debug related function + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#include +#include "command.h" +#include "debug.h" + +#include "zebra/debug_clippy.c" + +/* For debug statement. */ +unsigned long zebra_debug_event; +unsigned long zebra_debug_packet; +unsigned long zebra_debug_kernel; +unsigned long zebra_debug_rib; +unsigned long zebra_debug_fpm; +unsigned long zebra_debug_nht; +unsigned long zebra_debug_mpls; +unsigned long zebra_debug_vxlan; +unsigned long zebra_debug_pw; +unsigned long zebra_debug_dplane; +unsigned long zebra_debug_dplane_dpdk; +unsigned long zebra_debug_mlag; +unsigned long zebra_debug_nexthop; +unsigned long zebra_debug_evpn_mh; +unsigned long zebra_debug_pbr; +unsigned long zebra_debug_neigh; +unsigned long zebra_debug_tc; + +DEFINE_HOOK(zebra_debug_show_debugging, (struct vty *vty), (vty)); + +DEFUN_NOSH (show_debugging_zebra, + show_debugging_zebra_cmd, + "show debugging [zebra]", + SHOW_STR + "Debugging information\n" + "Zebra configuration\n") +{ + vty_out(vty, "Zebra debugging status:\n"); + + if (IS_ZEBRA_DEBUG_EVENT) + vty_out(vty, " Zebra event debugging is on\n"); + + if (IS_ZEBRA_DEBUG_PACKET) { + if (IS_ZEBRA_DEBUG_SEND && IS_ZEBRA_DEBUG_RECV) { + vty_out(vty, " Zebra packet%s debugging is on\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + } else { + if (IS_ZEBRA_DEBUG_SEND) + vty_out(vty, + " Zebra packet send%s debugging is on\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + else + vty_out(vty, + " Zebra packet receive%s debugging is on\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) + vty_out(vty, " Zebra kernel debugging is on\n"); + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) + vty_out(vty, + " Zebra kernel netlink message dumps (send) are on\n"); + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) + vty_out(vty, + " Zebra kernel netlink message dumps (recv) are on\n"); + + /* Check here using flags as the 'macro' does an OR */ + if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB_DETAILED)) + vty_out(vty, " Zebra RIB detailed debugging is on\n"); + else if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB)) + vty_out(vty, " Zebra RIB debugging is on\n"); + + if (IS_ZEBRA_DEBUG_FPM) + vty_out(vty, " Zebra FPM debugging is on\n"); + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + vty_out(vty, " Zebra detailed next-hop tracking debugging is on\n"); + else if (IS_ZEBRA_DEBUG_NHT) + vty_out(vty, " Zebra next-hop tracking debugging is on\n"); + if (IS_ZEBRA_DEBUG_MPLS_DETAIL) + vty_out(vty, " Zebra detailed MPLS debugging is on\n"); + else if (IS_ZEBRA_DEBUG_MPLS) + vty_out(vty, " Zebra MPLS debugging is on\n"); + + if (IS_ZEBRA_DEBUG_VXLAN) + vty_out(vty, " Zebra VXLAN debugging is on\n"); + if (IS_ZEBRA_DEBUG_PW) + vty_out(vty, " Zebra pseudowire debugging is on\n"); + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + vty_out(vty, " Zebra detailed dataplane debugging is on\n"); + else if (IS_ZEBRA_DEBUG_DPLANE) + vty_out(vty, " Zebra dataplane debugging is on\n"); + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + vty_out(vty, + " Zebra detailed dpdk dataplane debugging is on\n"); + else if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + vty_out(vty, " Zebra dataplane dpdk debugging is on\n"); + if (IS_ZEBRA_DEBUG_MLAG) + vty_out(vty, " Zebra mlag debugging is on\n"); + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + vty_out(vty, " Zebra detailed nexthop debugging is on\n"); + else if (IS_ZEBRA_DEBUG_NHG) + vty_out(vty, " Zebra nexthop debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + vty_out(vty, " Zebra EVPN-MH ethernet segment debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + vty_out(vty, " Zebra EVPN-MH nexthop debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + vty_out(vty, " Zebra EVPN-MH MAC debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + vty_out(vty, " Zebra EVPN-MH Neigh debugging is on\n"); + + if (IS_ZEBRA_DEBUG_PBR) + vty_out(vty, " Zebra PBR debugging is on\n"); + + hook_call(zebra_debug_show_debugging, vty); + + cmd_show_lib_debugs(vty); + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_events, + debug_zebra_events_cmd, + "debug zebra events", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra events\n") +{ + zebra_debug_event = ZEBRA_DEBUG_EVENT; + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_nht, + debug_zebra_nht_cmd, + "debug zebra nht [detailed]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra next hop tracking\n" + "Debug option set for detailed info\n") +{ + int idx = 0; + + zebra_debug_nht = ZEBRA_DEBUG_NHT; + + if (argv_find(argv, argc, "detailed", &idx)) + zebra_debug_nht |= ZEBRA_DEBUG_NHT_DETAILED; + + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_mpls, + debug_zebra_mpls_cmd, + "debug zebra mpls [detailed$detail]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra MPLS LSPs\n" + "Debug option for detailed info\n") +{ + zebra_debug_mpls = ZEBRA_DEBUG_MPLS; + + if (detail) + zebra_debug_mpls |= ZEBRA_DEBUG_MPLS_DETAILED; + + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_vxlan, + debug_zebra_vxlan_cmd, + "debug zebra vxlan", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra VxLAN (EVPN)\n") +{ + zebra_debug_vxlan = ZEBRA_DEBUG_VXLAN; + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_pw, + debug_zebra_pw_cmd, + "[no] debug zebra pseudowires", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra pseudowires\n") +{ + if (strmatch(argv[0]->text, "no")) + UNSET_FLAG(zebra_debug_pw, ZEBRA_DEBUG_PW); + else + SET_FLAG(zebra_debug_pw, ZEBRA_DEBUG_PW); + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_packet, + debug_zebra_packet_cmd, + "debug zebra packet [] [detail]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra packet\n" + "Debug option set for receive packet\n" + "Debug option set for send packet\n" + "Debug option set for detailed info\n") +{ + int idx = 0; + zebra_debug_packet = ZEBRA_DEBUG_PACKET; + + if (argv_find(argv, argc, "send", &idx)) + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_SEND); + else if (argv_find(argv, argc, "recv", &idx)) + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_RECV); + else { + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_SEND); + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_RECV); + } + + if (argv_find(argv, argc, "detail", &idx)) + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_DETAIL); + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_kernel, + debug_zebra_kernel_cmd, + "debug zebra kernel", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n") +{ + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL); + + return CMD_SUCCESS; +} + +#if defined(HAVE_NETLINK) +DEFUN (debug_zebra_kernel_msgdump, + debug_zebra_kernel_msgdump_cmd, + "debug zebra kernel msgdump []", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n" + "Dump raw netlink messages, sent and received\n" + "Dump raw netlink messages received\n" + "Dump raw netlink messages sent\n") +{ + int idx = 0; + + if (argv_find(argv, argc, "recv", &idx)) + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + else if (argv_find(argv, argc, "send", &idx)) + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + else { + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + } + + return CMD_SUCCESS; +} +#endif + +DEFUN (debug_zebra_rib, + debug_zebra_rib_cmd, + "debug zebra rib [detailed]", + DEBUG_STR + "Zebra configuration\n" + "Debug RIB events\n" + "Detailed debugs\n") +{ + int idx = 0; + SET_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB); + + if (argv_find(argv, argc, "detailed", &idx)) + SET_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB_DETAILED); + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_fpm, + debug_zebra_fpm_cmd, + "debug zebra fpm", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra FPM events\n") +{ + SET_FLAG(zebra_debug_fpm, ZEBRA_DEBUG_FPM); + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_dplane, + debug_zebra_dplane_cmd, + "debug zebra dplane [detailed]", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra dataplane events\n" + "Detailed debug information\n") +{ + int idx = 0; + + SET_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE); + + if (argv_find(argv, argc, "detailed", &idx)) + SET_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE_DETAILED); + + return CMD_SUCCESS; +} + +DEFPY(debug_zebra_dplane_dpdk, debug_zebra_dplane_dpdk_cmd, + "[no$no] debug zebra dplane dpdk [detailed$detail]", + NO_STR DEBUG_STR + "Zebra configuration\n" + "Debug zebra dataplane events\n" + "Debug zebra DPDK offload events\n" + "Detailed debug information\n") +{ + if (no) { + UNSET_FLAG(zebra_debug_dplane_dpdk, ZEBRA_DEBUG_DPLANE_DPDK); + UNSET_FLAG(zebra_debug_dplane_dpdk, + ZEBRA_DEBUG_DPLANE_DPDK_DETAIL); + } else { + SET_FLAG(zebra_debug_dplane_dpdk, ZEBRA_DEBUG_DPLANE_DPDK); + + if (detail) + SET_FLAG(zebra_debug_dplane_dpdk, + ZEBRA_DEBUG_DPLANE_DPDK_DETAIL); + } + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_pbr, + debug_zebra_pbr_cmd, + "debug zebra pbr", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra pbr events\n") +{ + SET_FLAG(zebra_debug_pbr, ZEBRA_DEBUG_PBR); + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_neigh, + debug_zebra_neigh_cmd, + "[no$no] debug zebra neigh", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra neigh events\n") +{ + if (no) + UNSET_FLAG(zebra_debug_neigh, ZEBRA_DEBUG_NEIGH); + else + SET_FLAG(zebra_debug_neigh, ZEBRA_DEBUG_NEIGH); + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_tc, + debug_zebra_tc_cmd, + "debug zebra tc", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra tc events\n") +{ + SET_FLAG(zebra_debug_tc, ZEBRA_DEBUG_TC); + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_mlag, + debug_zebra_mlag_cmd, + "[no$no] debug zebra mlag", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for mlag events\n") +{ + if (no) + UNSET_FLAG(zebra_debug_mlag, ZEBRA_DEBUG_MLAG); + else + SET_FLAG(zebra_debug_mlag, ZEBRA_DEBUG_MLAG); + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_evpn_mh, + debug_zebra_evpn_mh_cmd, + "[no$no] debug zebra evpn mh ", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "EVPN\n" + "Multihoming\n" + "Ethernet Segment Debugging\n" + "MAC Debugging\n" + "Neigh Debugging\n" + "Nexthop Debugging\n") +{ + if (es) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES); + } + + if (mac) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_MAC); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_MAC); + } + + if (neigh) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_NEIGH); + else + SET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_NEIGH); + } + + if (nh) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH); + } + + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_events, + no_debug_zebra_events_cmd, + "no debug zebra events", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra events\n") +{ + zebra_debug_event = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_nht, + no_debug_zebra_nht_cmd, + "no debug zebra nht [detailed]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra next hop tracking\n" + "Debug option set for detailed info\n") +{ + zebra_debug_nht = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_mpls, + no_debug_zebra_mpls_cmd, + "no debug zebra mpls [detailed]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra MPLS LSPs\n" + "Debug option for zebra detailed info\n") +{ + zebra_debug_mpls = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_vxlan, + no_debug_zebra_vxlan_cmd, + "no debug zebra vxlan", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra VxLAN (EVPN)\n") +{ + zebra_debug_vxlan = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_packet, + no_debug_zebra_packet_cmd, + "no debug zebra packet [] [detail]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra packet\n" + "Debug option set for receive packet\n" + "Debug option set for send packet\n" + "Debug option set for detailed info\n") +{ + zebra_debug_packet = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_kernel, + no_debug_zebra_kernel_cmd, + "no debug zebra kernel", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n") +{ + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL); + + return CMD_SUCCESS; +} + +#if defined(HAVE_NETLINK) +DEFUN (no_debug_zebra_kernel_msgdump, + no_debug_zebra_kernel_msgdump_cmd, + "no debug zebra kernel msgdump []", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n" + "Dump raw netlink messages, sent and received\n" + "Dump raw netlink messages received\n" + "Dump raw netlink messages sent\n") +{ + int idx = 0; + + if (argv_find(argv, argc, "recv", &idx)) + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + else if (argv_find(argv, argc, "send", &idx)) + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + else { + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + } + + return CMD_SUCCESS; +} +#endif + +DEFUN (no_debug_zebra_rib, + no_debug_zebra_rib_cmd, + "no debug zebra rib [detailed]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra RIB\n" + "Detailed debugs\n") +{ + zebra_debug_rib = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_fpm, + no_debug_zebra_fpm_cmd, + "no debug zebra fpm", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra FPM events\n") +{ + zebra_debug_fpm = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_dplane, + no_debug_zebra_dplane_cmd, + "no debug zebra dplane", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra dataplane events\n") +{ + zebra_debug_dplane = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_pbr, + no_debug_zebra_pbr_cmd, + "no debug zebra pbr", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra pbr events\n") +{ + zebra_debug_pbr = 0; + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_nexthop, + debug_zebra_nexthop_cmd, + "[no$no] debug zebra nexthop [detail$detail]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra nexthop events\n" + "Detailed information\n") +{ + if (no) + zebra_debug_nexthop = 0; + else { + SET_FLAG(zebra_debug_nexthop, ZEBRA_DEBUG_NHG); + + if (detail) + SET_FLAG(zebra_debug_nexthop, + ZEBRA_DEBUG_NHG_DETAILED); + } + + return CMD_SUCCESS; +} + +/* Debug node. */ +static int config_write_debug(struct vty *vty); +struct cmd_node debug_node = { + .name = "debug", + .node = DEBUG_NODE, + .prompt = "", + .config_write = config_write_debug, +}; + +static int config_write_debug(struct vty *vty) +{ + int write = 0; + + if (IS_ZEBRA_DEBUG_EVENT) { + vty_out(vty, "debug zebra events\n"); + write++; + } + if (IS_ZEBRA_DEBUG_PACKET) { + if (IS_ZEBRA_DEBUG_SEND && IS_ZEBRA_DEBUG_RECV) { + vty_out(vty, "debug zebra packet%s\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + write++; + } else { + if (IS_ZEBRA_DEBUG_SEND) + vty_out(vty, "debug zebra packet send%s\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + else + vty_out(vty, "debug zebra packet recv%s\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + write++; + } + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND + && IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { + vty_out(vty, "debug zebra kernel msgdump\n"); + write++; + } else if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { + vty_out(vty, "debug zebra kernel msgdump recv\n"); + write++; + } else if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) { + vty_out(vty, "debug zebra kernel msgdump send\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + vty_out(vty, "debug zebra kernel\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB_DETAILED)) { + vty_out(vty, "debug zebra rib detailed\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB)) { + vty_out(vty, "debug zebra rib\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_FPM) { + vty_out(vty, "debug zebra fpm\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) { + vty_out(vty, "debug zebra nht detailed\n"); + write++; + } else if (IS_ZEBRA_DEBUG_NHT) { + vty_out(vty, "debug zebra nht\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_MPLS_DETAIL) { + vty_out(vty, "debug zebra mpls detailed\n"); + write++; + } else if (IS_ZEBRA_DEBUG_MPLS) { + vty_out(vty, "debug zebra mpls\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_VXLAN) { + vty_out(vty, "debug zebra vxlan\n"); + write++; + } + if (IS_ZEBRA_DEBUG_MLAG) { + vty_out(vty, "debug zebra mlag\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { + vty_out(vty, "debug zebra evpn mh es\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) { + vty_out(vty, "debug zebra evpn mh nh\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + vty_out(vty, "debug zebra evpn mh mac\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) { + vty_out(vty, "debug zebra evpn mh neigh\n"); + write++; + } + if (IS_ZEBRA_DEBUG_PW) { + vty_out(vty, "debug zebra pseudowires\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE_DETAILED)) { + vty_out(vty, "debug zebra dplane detailed\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE)) { + vty_out(vty, "debug zebra dplane\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_dplane_dpdk, + ZEBRA_DEBUG_DPLANE_DPDK_DETAIL)) { + vty_out(vty, "debug zebra dplane dpdk detailed\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_dplane_dpdk, + ZEBRA_DEBUG_DPLANE_DPDK)) { + vty_out(vty, "debug zebra dplane dpdk\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_nexthop, ZEBRA_DEBUG_NHG_DETAILED)) { + vty_out(vty, "debug zebra nexthop detail\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_nexthop, ZEBRA_DEBUG_NHG)) { + vty_out(vty, "debug zebra nexthop\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_PBR) { + vty_out(vty, "debug zebra pbr\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_NEIGH) { + vty_out(vty, "debug zebra neigh\n"); + write++; + } + + return write; +} + +void zebra_debug_init(void) +{ + zebra_debug_event = 0; + zebra_debug_packet = 0; + zebra_debug_kernel = 0; + zebra_debug_rib = 0; + zebra_debug_fpm = 0; + zebra_debug_mpls = 0; + zebra_debug_vxlan = 0; + zebra_debug_pw = 0; + zebra_debug_dplane = 0; + zebra_debug_dplane_dpdk = 0; + zebra_debug_mlag = 0; + zebra_debug_evpn_mh = 0; + zebra_debug_nht = 0; + zebra_debug_nexthop = 0; + zebra_debug_pbr = 0; + zebra_debug_neigh = 0; + + install_node(&debug_node); + + install_element(ENABLE_NODE, &show_debugging_zebra_cmd); + + install_element(ENABLE_NODE, &debug_zebra_events_cmd); + install_element(ENABLE_NODE, &debug_zebra_nht_cmd); + install_element(ENABLE_NODE, &debug_zebra_mpls_cmd); + install_element(ENABLE_NODE, &debug_zebra_vxlan_cmd); + install_element(ENABLE_NODE, &debug_zebra_pw_cmd); + install_element(ENABLE_NODE, &debug_zebra_packet_cmd); + install_element(ENABLE_NODE, &debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(ENABLE_NODE, &debug_zebra_kernel_msgdump_cmd); +#endif + install_element(ENABLE_NODE, &debug_zebra_rib_cmd); + install_element(ENABLE_NODE, &debug_zebra_fpm_cmd); + install_element(ENABLE_NODE, &debug_zebra_dplane_cmd); + install_element(ENABLE_NODE, &debug_zebra_mlag_cmd); + install_element(ENABLE_NODE, &debug_zebra_nexthop_cmd); + install_element(ENABLE_NODE, &debug_zebra_pbr_cmd); + install_element(ENABLE_NODE, &debug_zebra_neigh_cmd); + install_element(ENABLE_NODE, &debug_zebra_tc_cmd); + install_element(ENABLE_NODE, &debug_zebra_dplane_dpdk_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_events_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_nht_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_mpls_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_vxlan_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_packet_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(ENABLE_NODE, &no_debug_zebra_kernel_msgdump_cmd); +#endif + install_element(ENABLE_NODE, &no_debug_zebra_rib_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_fpm_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_dplane_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_pbr_cmd); + install_element(ENABLE_NODE, &debug_zebra_evpn_mh_cmd); + + install_element(CONFIG_NODE, &debug_zebra_events_cmd); + install_element(CONFIG_NODE, &debug_zebra_nht_cmd); + install_element(CONFIG_NODE, &debug_zebra_mpls_cmd); + install_element(CONFIG_NODE, &debug_zebra_vxlan_cmd); + install_element(CONFIG_NODE, &debug_zebra_pw_cmd); + install_element(CONFIG_NODE, &debug_zebra_packet_cmd); + install_element(CONFIG_NODE, &debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(CONFIG_NODE, &debug_zebra_kernel_msgdump_cmd); +#endif + install_element(CONFIG_NODE, &debug_zebra_rib_cmd); + install_element(CONFIG_NODE, &debug_zebra_fpm_cmd); + install_element(CONFIG_NODE, &debug_zebra_dplane_cmd); + install_element(CONFIG_NODE, &debug_zebra_dplane_dpdk_cmd); + install_element(CONFIG_NODE, &debug_zebra_nexthop_cmd); + install_element(CONFIG_NODE, &debug_zebra_pbr_cmd); + install_element(CONFIG_NODE, &debug_zebra_neigh_cmd); + + install_element(CONFIG_NODE, &no_debug_zebra_events_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_nht_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_mpls_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_vxlan_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_packet_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(CONFIG_NODE, &no_debug_zebra_kernel_msgdump_cmd); +#endif + install_element(CONFIG_NODE, &no_debug_zebra_rib_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_fpm_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_dplane_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_pbr_cmd); + install_element(CONFIG_NODE, &debug_zebra_mlag_cmd); + install_element(CONFIG_NODE, &debug_zebra_evpn_mh_cmd); +} diff --git a/zebra/debug.h b/zebra/debug.h new file mode 100644 index 0000000..075d903 --- /dev/null +++ b/zebra/debug.h @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra debug related function + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_DEBUG_H +#define _ZEBRA_DEBUG_H + +#include "lib/vty.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Debug flags. */ +#define ZEBRA_DEBUG_EVENT 0x01 + +#define ZEBRA_DEBUG_PACKET 0x01 +#define ZEBRA_DEBUG_SEND 0x20 +#define ZEBRA_DEBUG_RECV 0x40 +#define ZEBRA_DEBUG_DETAIL 0x80 + +#define ZEBRA_DEBUG_KERNEL 0x01 +#define ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND 0x20 +#define ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV 0x40 + +#define ZEBRA_DEBUG_RIB 0x01 +#define ZEBRA_DEBUG_RIB_DETAILED 0x02 + +#define ZEBRA_DEBUG_FPM 0x01 + +#define ZEBRA_DEBUG_NHT 0x01 +#define ZEBRA_DEBUG_NHT_DETAILED 0x02 + +#define ZEBRA_DEBUG_MPLS 0x01 +#define ZEBRA_DEBUG_MPLS_DETAILED 0x02 + +#define ZEBRA_DEBUG_VXLAN 0x01 + +#define ZEBRA_DEBUG_PW 0x01 + +#define ZEBRA_DEBUG_DPLANE 0x01 +#define ZEBRA_DEBUG_DPLANE_DETAILED 0x02 + +#define ZEBRA_DEBUG_DPLANE_DPDK 0x01 +#define ZEBRA_DEBUG_DPLANE_DPDK_DETAIL 0x02 + +#define ZEBRA_DEBUG_MLAG 0x01 + +#define ZEBRA_DEBUG_NHG 0x01 +#define ZEBRA_DEBUG_NHG_DETAILED 0x02 + +#define ZEBRA_DEBUG_EVPN_MH_ES 0x01 +#define ZEBRA_DEBUG_EVPN_MH_NH 0x02 +#define ZEBRA_DEBUG_EVPN_MH_MAC 0x04 +#define ZEBRA_DEBUG_EVPN_MH_NEIGH 0x08 + +#define ZEBRA_DEBUG_PBR 0x01 + +#define ZEBRA_DEBUG_NEIGH 0x01 + +#define ZEBRA_DEBUG_TC 0x01 + +/* Debug related macro. */ +#define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT) + +#define IS_ZEBRA_DEBUG_PACKET (zebra_debug_packet & ZEBRA_DEBUG_PACKET) +#define IS_ZEBRA_DEBUG_SEND (zebra_debug_packet & ZEBRA_DEBUG_SEND) +#define IS_ZEBRA_DEBUG_RECV (zebra_debug_packet & ZEBRA_DEBUG_RECV) +#define IS_ZEBRA_DEBUG_DETAIL (zebra_debug_packet & ZEBRA_DEBUG_DETAIL) + +#define IS_ZEBRA_DEBUG_KERNEL (zebra_debug_kernel & ZEBRA_DEBUG_KERNEL) +#define IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND \ + (zebra_debug_kernel & ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) +#define IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV \ + (zebra_debug_kernel & ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) + +#define IS_ZEBRA_DEBUG_RIB \ + (zebra_debug_rib & (ZEBRA_DEBUG_RIB | ZEBRA_DEBUG_RIB_DETAILED)) +#define IS_ZEBRA_DEBUG_RIB_DETAILED (zebra_debug_rib & ZEBRA_DEBUG_RIB_DETAILED) + +#define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM) + +#define IS_ZEBRA_DEBUG_NHT (zebra_debug_nht & ZEBRA_DEBUG_NHT) +#define IS_ZEBRA_DEBUG_NHT_DETAILED (zebra_debug_nht & ZEBRA_DEBUG_NHT_DETAILED) + +#define IS_ZEBRA_DEBUG_MPLS (zebra_debug_mpls & ZEBRA_DEBUG_MPLS) +#define IS_ZEBRA_DEBUG_MPLS_DETAIL \ + (zebra_debug_mpls & ZEBRA_DEBUG_MPLS_DETAILED) +#define IS_ZEBRA_DEBUG_VXLAN (zebra_debug_vxlan & ZEBRA_DEBUG_VXLAN) +#define IS_ZEBRA_DEBUG_PW (zebra_debug_pw & ZEBRA_DEBUG_PW) + +#define IS_ZEBRA_DEBUG_DPLANE (zebra_debug_dplane & ZEBRA_DEBUG_DPLANE) +#define IS_ZEBRA_DEBUG_DPLANE_DETAIL \ + (zebra_debug_dplane & ZEBRA_DEBUG_DPLANE_DETAILED) + +#define IS_ZEBRA_DEBUG_DPLANE_DPDK \ + (zebra_debug_dplane_dpdk & ZEBRA_DEBUG_DPLANE_DPDK) +#define IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL \ + (zebra_debug_dplane_dpdk & ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + +#define IS_ZEBRA_DEBUG_MLAG (zebra_debug_mlag & ZEBRA_DEBUG_MLAG) + +#define IS_ZEBRA_DEBUG_NHG (zebra_debug_nexthop & ZEBRA_DEBUG_NHG) + +#define IS_ZEBRA_DEBUG_NHG_DETAIL \ + (zebra_debug_nexthop & ZEBRA_DEBUG_NHG_DETAILED) + +#define IS_ZEBRA_DEBUG_EVPN_MH_ES \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_ES) +#define IS_ZEBRA_DEBUG_EVPN_MH_NH \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NH) +#define IS_ZEBRA_DEBUG_EVPN_MH_MAC \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_MAC) +#define IS_ZEBRA_DEBUG_EVPN_MH_NEIGH \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NEIGH) + +#define IS_ZEBRA_DEBUG_PBR (zebra_debug_pbr & ZEBRA_DEBUG_PBR) + +#define IS_ZEBRA_DEBUG_NEIGH (zebra_debug_neigh & ZEBRA_DEBUG_NEIGH) + +#define IS_ZEBRA_DEBUG_TC (zebra_debug_tc & ZEBRA_DEBUG_TC) + +extern unsigned long zebra_debug_event; +extern unsigned long zebra_debug_packet; +extern unsigned long zebra_debug_kernel; +extern unsigned long zebra_debug_rib; +extern unsigned long zebra_debug_fpm; +extern unsigned long zebra_debug_nht; +extern unsigned long zebra_debug_mpls; +extern unsigned long zebra_debug_vxlan; +extern unsigned long zebra_debug_pw; +extern unsigned long zebra_debug_dplane; +extern unsigned long zebra_debug_dplane_dpdk; +extern unsigned long zebra_debug_mlag; +extern unsigned long zebra_debug_nexthop; +extern unsigned long zebra_debug_evpn_mh; +extern unsigned long zebra_debug_pbr; +extern unsigned long zebra_debug_neigh; +extern unsigned long zebra_debug_tc; + +extern void zebra_debug_init(void); + +DECLARE_HOOK(zebra_debug_show_debugging, (struct vty *vty), (vty)); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_DEBUG_H */ diff --git a/zebra/debug_nl.c b/zebra/debug_nl.c new file mode 100644 index 0000000..df0b5aa --- /dev/null +++ b/zebra/debug_nl.c @@ -0,0 +1,1773 @@ +// SPDX-License-Identifier: ISC +/* + * Copyright (c) 2018 Rafael Zalamena + */ + +#include + +#if defined(HAVE_NETLINK) && defined(NETLINK_DEBUG) + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "zebra/rt_netlink.h" +#include "zebra/kernel_netlink.h" +#include "lib/vxlan.h" + +const char *nlmsg_type2str(uint16_t type) +{ + switch (type) { + /* Generic */ + case NLMSG_NOOP: + return "NOOP"; + case NLMSG_ERROR: + return "ERROR"; + case NLMSG_DONE: + return "DONE"; + case NLMSG_OVERRUN: + return "OVERRUN"; + + /* RTM */ + case RTM_NEWLINK: + return "NEWLINK"; + case RTM_DELLINK: + return "DELLINK"; + case RTM_GETLINK: + return "GETLINK"; + case RTM_SETLINK: + return "SETLINK"; + + case RTM_NEWADDR: + return "NEWADDR"; + case RTM_DELADDR: + return "DELADDR"; + case RTM_GETADDR: + return "GETADDR"; + + case RTM_NEWROUTE: + return "NEWROUTE"; + case RTM_DELROUTE: + return "DELROUTE"; + case RTM_GETROUTE: + return "GETROUTE"; + + case RTM_NEWNEIGH: + return "NEWNEIGH"; + case RTM_DELNEIGH: + return "DELNEIGH"; + case RTM_GETNEIGH: + return "GETNEIGH"; + + case RTM_NEWRULE: + return "NEWRULE"; + case RTM_DELRULE: + return "DELRULE"; + case RTM_GETRULE: + return "GETRULE"; + + case RTM_NEWNEXTHOP: + return "NEWNEXTHOP"; + case RTM_DELNEXTHOP: + return "DELNEXTHOP"; + case RTM_GETNEXTHOP: + return "GETNEXTHOP"; + + case RTM_NEWTUNNEL: + return "NEWTUNNEL"; + case RTM_DELTUNNEL: + return "DELTUNNEL"; + case RTM_GETTUNNEL: + return "GETTUNNEL"; + + case RTM_NEWNETCONF: + return "RTM_NEWNETCONF"; + case RTM_DELNETCONF: + return "RTM_DELNETCONF"; + + default: + return "UNKNOWN"; + } +} + +const char *af_type2str(int type) +{ + switch (type) { + case AF_UNSPEC: + return "AF_UNSPEC"; + case AF_UNIX: + return "AF_UNIX"; + case AF_INET: + return "AF_INET"; + case AF_INET6: + return "AF_INET6"; + case AF_BRIDGE: + return "AF_BRIDGE"; + case AF_NETLINK: + return "AF_NETLINK"; +#ifdef AF_MPLS + case AF_MPLS: + return "AF_MPLS"; +#endif /* AF_MPLS */ + case AF_BLUETOOTH: + return "AF_BLUETOOTH"; + case AF_VSOCK: + return "AF_VSOCK"; + case AF_KEY: + return "AF_KEY"; + case AF_PACKET: + return "AF_PACKET"; + default: + return "UNKNOWN"; + } +} + +const char *ifi_type2str(int type) +{ + switch (type) { + case ARPHRD_ETHER: + return "ETHER"; + case ARPHRD_EETHER: + return "EETHER"; + case ARPHRD_NETROM: + return "NETROM"; + case ARPHRD_AX25: + return "AX25"; + case ARPHRD_PRONET: + return "PRONET"; + case ARPHRD_CHAOS: + return "CHAOS"; + case ARPHRD_IEEE802: + return "IEEE802"; + case ARPHRD_ARCNET: + return "ARCNET"; + case ARPHRD_APPLETLK: + return "APPLETLK"; + case ARPHRD_DLCI: + return "DLCI"; + case ARPHRD_ATM: + return "ATM"; + case ARPHRD_METRICOM: + return "METRICOM"; + case ARPHRD_IEEE1394: + return "IEEE1394"; + case ARPHRD_EUI64: + return "EUI64"; + case ARPHRD_INFINIBAND: + return "INFINIBAND"; + case ARPHRD_SLIP: + return "SLIP"; + case ARPHRD_CSLIP: + return "CSLIP"; + case ARPHRD_SLIP6: + return "SLIP6"; + case ARPHRD_CSLIP6: + return "CSLIP6"; + case ARPHRD_RSRVD: + return "RSRVD"; + case ARPHRD_ADAPT: + return "ADAPT"; + case ARPHRD_ROSE: + return "ROSE"; + case ARPHRD_X25: + return "X25"; + case ARPHRD_PPP: + return "PPP"; + case ARPHRD_HDLC: + return "HDLC"; + case ARPHRD_LAPB: + return "LAPB"; + case ARPHRD_DDCMP: + return "DDCMP"; + case ARPHRD_RAWHDLC: + return "RAWHDLC"; + case ARPHRD_TUNNEL: + return "TUNNEL"; + case ARPHRD_TUNNEL6: + return "TUNNEL6"; + case ARPHRD_FRAD: + return "FRAD"; + case ARPHRD_SKIP: + return "SKIP"; + case ARPHRD_LOOPBACK: + return "LOOPBACK"; + case ARPHRD_LOCALTLK: + return "LOCALTLK"; + case ARPHRD_FDDI: + return "FDDI"; + case ARPHRD_BIF: + return "BIF"; + case ARPHRD_SIT: + return "SIT"; + case ARPHRD_IPDDP: + return "IPDDP"; + case ARPHRD_IPGRE: + return "IPGRE"; + case ARPHRD_PIMREG: + return "PIMREG"; + case ARPHRD_HIPPI: + return "HIPPI"; + case ARPHRD_ASH: + return "ASH"; + case ARPHRD_ECONET: + return "ECONET"; + case ARPHRD_IRDA: + return "IRDA"; + case ARPHRD_FCPP: + return "FCPP"; + case ARPHRD_FCAL: + return "FCAL"; + case ARPHRD_FCPL: + return "FCPL"; + case ARPHRD_FCFABRIC: + return "FCFABRIC"; + case ARPHRD_IEEE802_TR: + return "IEEE802_TR"; + case ARPHRD_IEEE80211: + return "IEEE80211"; + case ARPHRD_IEEE80211_PRISM: + return "IEEE80211_PRISM"; + case ARPHRD_IEEE80211_RADIOTAP: + return "IEEE80211_RADIOTAP"; + case ARPHRD_IEEE802154: + return "IEEE802154"; +#ifdef ARPHRD_VSOCKMON + case ARPHRD_VSOCKMON: + return "VSOCKMON"; +#endif /* ARPHRD_VSOCKMON */ + case ARPHRD_VOID: + return "VOID"; + case ARPHRD_NONE: + return "NONE"; + default: + return "UNKNOWN"; + } +} + +const char *ifla_pdr_type2str(int type) +{ + switch (type) { + case IFLA_PROTO_DOWN_REASON_UNSPEC: + return "UNSPEC"; + case IFLA_PROTO_DOWN_REASON_MASK: + return "MASK"; + case IFLA_PROTO_DOWN_REASON_VALUE: + return "VALUE"; + default: + return "UNKNOWN"; + } +} + +const char *ifla_info_type2str(int type) +{ + switch (type) { + case IFLA_INFO_UNSPEC: + return "UNSPEC"; + case IFLA_INFO_KIND: + return "KIND"; + case IFLA_INFO_DATA: + return "DATA"; + case IFLA_INFO_XSTATS: + return "XSTATS"; + case IFLA_INFO_SLAVE_KIND: + return "SLAVE_KIND"; + case IFLA_INFO_SLAVE_DATA: + return "SLAVE_DATA"; + default: + return "UNKNOWN"; + } +} + +const char *rta_type2str(int type) +{ + switch (type) { + case IFLA_UNSPEC: + return "UNSPEC"; + case IFLA_ADDRESS: + return "ADDRESS"; + case IFLA_BROADCAST: + return "BROADCAST"; + case IFLA_IFNAME: + return "IFNAME"; + case IFLA_MTU: + return "MTU"; + case IFLA_LINK: + return "LINK"; + case IFLA_QDISC: + return "QDISC"; + case IFLA_STATS: + return "STATS"; + case IFLA_COST: + return "COST"; + case IFLA_PRIORITY: + return "PRIORITY"; + case IFLA_MASTER: + return "MASTER"; + case IFLA_WIRELESS: + return "WIRELESS"; + case IFLA_PROTINFO: + return "PROTINFO"; + case IFLA_TXQLEN: + return "TXQLEN"; + case IFLA_MAP: + return "MAP"; + case IFLA_WEIGHT: + return "WEIGHT"; + case IFLA_OPERSTATE: + return "OPERSTATE"; + case IFLA_LINKMODE: + return "LINKMODE"; + case IFLA_LINKINFO: + return "LINKINFO"; + case IFLA_NET_NS_PID: + return "NET_NS_PID"; + case IFLA_IFALIAS: + return "IFALIAS"; + case IFLA_NUM_VF: + return "NUM_VF"; + case IFLA_VFINFO_LIST: + return "VFINFO_LIST"; + case IFLA_STATS64: + return "STATS64"; + case IFLA_VF_PORTS: + return "VF_PORTS"; + case IFLA_PORT_SELF: + return "PORT_SELF"; + case IFLA_AF_SPEC: + return "AF_SPEC"; + case IFLA_GROUP: + return "GROUP"; + case IFLA_NET_NS_FD: + return "NET_NS_FD"; + case IFLA_EXT_MASK: + return "EXT_MASK"; + case IFLA_PROMISCUITY: + return "PROMISCUITY"; + case IFLA_NUM_TX_QUEUES: + return "NUM_TX_QUEUES"; + case IFLA_NUM_RX_QUEUES: + return "NUM_RX_QUEUES"; + case IFLA_CARRIER: + return "CARRIER"; + case IFLA_PHYS_PORT_ID: + return "PHYS_PORT_ID"; + case IFLA_CARRIER_CHANGES: + return "CARRIER_CHANGES"; + case IFLA_PHYS_SWITCH_ID: + return "PHYS_SWITCH_ID"; + case IFLA_LINK_NETNSID: + return "LINK_NETNSID"; + case IFLA_PHYS_PORT_NAME: + return "PHYS_PORT_NAME"; + case IFLA_PROTO_DOWN: + return "PROTO_DOWN"; +#ifdef IFLA_GSO_MAX_SEGS + case IFLA_GSO_MAX_SEGS: + return "GSO_MAX_SEGS"; +#endif /* IFLA_GSO_MAX_SEGS */ +#ifdef IFLA_GSO_MAX_SIZE + case IFLA_GSO_MAX_SIZE: + return "GSO_MAX_SIZE"; +#endif /* IFLA_GSO_MAX_SIZE */ +#ifdef IFLA_PAD + case IFLA_PAD: + return "PAD"; +#endif /* IFLA_PAD */ +#ifdef IFLA_XDP + case IFLA_XDP: + return "XDP"; +#endif /* IFLA_XDP */ +#ifdef IFLA_EVENT + case IFLA_EVENT: + return "EVENT"; +#endif /* IFLA_EVENT */ + case IFLA_PROTO_DOWN_REASON: + return "PROTO_DOWN_REASON"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_type2str(int type) +{ + switch (type) { + case RTN_UNSPEC: + return "UNSPEC"; + case RTN_UNICAST: + return "UNICAST"; + case RTN_LOCAL: + return "LOCAL"; + case RTN_BROADCAST: + return "BROADCAST"; + case RTN_ANYCAST: + return "ANYCAST"; + case RTN_MULTICAST: + return "MULTICAST"; + case RTN_BLACKHOLE: + return "BLACKHOLE"; + case RTN_UNREACHABLE: + return "UNREACHABLE"; + case RTN_PROHIBIT: + return "PROHIBIT"; + case RTN_THROW: + return "THROW"; + case RTN_NAT: + return "NAT"; + case RTN_XRESOLVE: + return "XRESOLVE"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_protocol2str(int type) +{ + switch (type) { + case RTPROT_UNSPEC: + return "UNSPEC"; + case RTPROT_REDIRECT: + return "REDIRECT"; + case RTPROT_KERNEL: + return "KERNEL"; + case RTPROT_BOOT: + return "BOOT"; + case RTPROT_STATIC: + return "STATIC"; + case RTPROT_GATED: + return "GATED"; + case RTPROT_RA: + return "RA"; + case RTPROT_MRT: + return "MRT"; + case RTPROT_ZEBRA: + return "ZEBRA"; + case RTPROT_BGP: + return "BGP"; + case RTPROT_ISIS: + return "ISIS"; + case RTPROT_OSPF: + return "OSPF"; + case RTPROT_BIRD: + return "BIRD"; + case RTPROT_DNROUTED: + return "DNROUTED"; + case RTPROT_XORP: + return "XORP"; + case RTPROT_NTK: + return "NTK"; + case RTPROT_DHCP: + return "DHCP"; + case RTPROT_MROUTED: + return "MROUTED"; + case RTPROT_BABEL: + return "BABEL"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_scope2str(int type) +{ + switch (type) { + case RT_SCOPE_UNIVERSE: + return "UNIVERSE"; + case RT_SCOPE_SITE: + return "SITE"; + case RT_SCOPE_LINK: + return "LINK"; + case RT_SCOPE_HOST: + return "HOST"; + case RT_SCOPE_NOWHERE: + return "NOWHERE"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_rta2str(int type) +{ + switch (type) { + case RTA_UNSPEC: + return "UNSPEC"; + case RTA_DST: + return "DST"; + case RTA_SRC: + return "SRC"; + case RTA_IIF: + return "IIF"; + case RTA_OIF: + return "OIF"; + case RTA_GATEWAY: + return "GATEWAY"; + case RTA_PRIORITY: + return "PRIORITY"; + case RTA_PREF: + return "PREF"; + case RTA_PREFSRC: + return "PREFSRC"; + case RTA_MARK: + return "MARK"; + case RTA_METRICS: + return "METRICS"; + case RTA_MULTIPATH: + return "MULTIPATH"; + case RTA_PROTOINFO: + return "PROTOINFO"; + case RTA_FLOW: + return "FLOW"; + case RTA_CACHEINFO: + return "CACHEINFO"; + case RTA_TABLE: + return "TABLE"; + case RTA_MFC_STATS: + return "MFC_STATS"; + case RTA_NH_ID: + return "NH_ID"; + case RTA_EXPIRES: + return "EXPIRES"; + default: + return "UNKNOWN"; + } +} + +const char *neigh_rta2str(int type) +{ + switch (type) { + case NDA_UNSPEC: + return "UNSPEC"; + case NDA_DST: + return "DST"; + case NDA_LLADDR: + return "LLADDR"; + case NDA_CACHEINFO: + return "CACHEINFO"; + case NDA_PROBES: + return "PROBES"; + case NDA_VLAN: + return "VLAN"; + case NDA_PORT: + return "PORT"; + case NDA_VNI: + return "VNI"; + case NDA_IFINDEX: + return "IFINDEX"; + case NDA_MASTER: + return "MASTER"; + case NDA_LINK_NETNSID: + return "LINK_NETNSID"; + default: + return "UNKNOWN"; + } +} + +const char *ifa_rta2str(int type) +{ + switch (type) { + case IFA_UNSPEC: + return "UNSPEC"; + case IFA_ADDRESS: + return "ADDRESS"; + case IFA_LOCAL: + return "LOCAL"; + case IFA_LABEL: + return "LABEL"; + case IFA_BROADCAST: + return "BROADCAST"; + case IFA_ANYCAST: + return "ANYCAST"; + case IFA_CACHEINFO: + return "CACHEINFO"; + case IFA_MULTICAST: + return "MULTICAST"; + case IFA_FLAGS: + return "FLAGS"; + default: + return "UNKNOWN"; + } +} + +const char *nhm_rta2str(int type) +{ + switch (type) { + case NHA_UNSPEC: + return "UNSPEC"; + case NHA_ID: + return "ID"; + case NHA_GROUP: + return "GROUP"; + case NHA_GROUP_TYPE: + return "GROUP_TYPE"; + case NHA_BLACKHOLE: + return "BLACKHOLE"; + case NHA_OIF: + return "OIF"; + case NHA_GATEWAY: + return "GATEWAY"; + case NHA_ENCAP_TYPE: + return "ENCAP_TYPE"; + case NHA_ENCAP: + return "ENCAP"; + case NHA_GROUPS: + return "GROUPS"; + case NHA_MASTER: + return "MASTER"; + default: + return "UNKNOWN"; + } +} + +const char *frh_rta2str(int type) +{ + switch (type) { + case FRA_DST: + return "DST"; + case FRA_SRC: + return "SRC"; + case FRA_IIFNAME: + return "IIFNAME"; + case FRA_GOTO: + return "GOTO"; + case FRA_UNUSED2: + return "UNUSED2"; + case FRA_PRIORITY: + return "PRIORITY"; + case FRA_UNUSED3: + return "UNUSED3"; + case FRA_UNUSED4: + return "UNUSED4"; + case FRA_UNUSED5: + return "UNUSED5"; + case FRA_FWMARK: + return "FWMARK"; + case FRA_FLOW: + return "FLOW"; + case FRA_TUN_ID: + return "TUN_ID"; + case FRA_SUPPRESS_IFGROUP: + return "SUPPRESS_IFGROUP"; + case FRA_SUPPRESS_PREFIXLEN: + return "SUPPRESS_PREFIXLEN"; + case FRA_TABLE: + return "TABLE"; + case FRA_FWMASK: + return "FWMASK"; + case FRA_OIFNAME: + return "OIFNAME"; + case FRA_PAD: + return "PAD"; + case FRA_L3MDEV: + return "L3MDEV"; + case FRA_UID_RANGE: + return "UID_RANGE"; + case FRA_PROTOCOL: + return "PROTOCOL"; + case FRA_IP_PROTO: + return "IP_PROTO"; + case FRA_SPORT_RANGE: + return "SPORT_RANGE"; + case FRA_DPORT_RANGE: + return "DPORT_RANGE"; + default: + return "UNKNOWN"; + } +} + +const char *frh_action2str(uint8_t action) +{ + switch (action) { + case FR_ACT_TO_TBL: + return "TO_TBL"; + case FR_ACT_GOTO: + return "GOTO"; + case FR_ACT_NOP: + return "NOP"; + case FR_ACT_RES3: + return "RES3"; + case FR_ACT_RES4: + return "RES4"; + case FR_ACT_BLACKHOLE: + return "BLACKHOLE"; + case FR_ACT_UNREACHABLE: + return "UNREACHABLE"; + case FR_ACT_PROHIBIT: + return "PROHIBIT"; + default: + return "UNKNOWN"; + } +} + +static const char *ncm_rta2str(int type) +{ + switch (type) { + case NETCONFA_UNSPEC: + return "UNSPEC"; + case NETCONFA_IFINDEX: + return "IFINDEX"; + case NETCONFA_FORWARDING: + return "FORWARDING"; + case NETCONFA_RP_FILTER: + return "RP_FILTER"; + case NETCONFA_MC_FORWARDING: + return "MCAST"; + case NETCONFA_PROXY_NEIGH: + return "PROXY_NEIGH"; + case NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN: + return "IGNORE_LINKDOWN"; + case NETCONFA_INPUT: + return "MPLS"; + case NETCONFA_BC_FORWARDING: + return "BCAST"; + default: + return "UNKNOWN"; + } +} + +static void dump_on_off(uint32_t ival, const char *prefix) +{ + zlog_debug("%s%s", prefix, (ival != 0) ? "on" : "off"); +} + +static inline void flag_write(int flags, int flag, const char *flagstr, + char *buf, size_t buflen) +{ + if (CHECK_FLAG(flags, flag) == 0) + return; + + if (buf[0]) + strlcat(buf, ",", buflen); + + strlcat(buf, flagstr, buflen); +} + +const char *nlmsg_flags2str(uint16_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + /* Specific flags. */ + flag_write(flags, NLM_F_REQUEST, "REQUEST", buf, buflen); + flag_write(flags, NLM_F_MULTI, "MULTI", buf, buflen); + flag_write(flags, NLM_F_ACK, "ACK", buf, buflen); + flag_write(flags, NLM_F_ECHO, "ECHO", buf, buflen); + flag_write(flags, NLM_F_DUMP, "DUMP", buf, buflen); + + /* Netlink family type dependent. */ + flag_write(flags, 0x0100, "(ROOT|REPLACE|CAPPED)", buf, buflen); + flag_write(flags, 0x0200, "(MATCH|EXCLUDE|ACK_TLVS)", buf, buflen); + flag_write(flags, 0x0400, "(ATOMIC|CREATE)", buf, buflen); + flag_write(flags, 0x0800, "(DUMP|APPEND)", buf, buflen); + + return (bufp); +} + +const char *if_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, IFF_UP, "UP", buf, buflen); + flag_write(flags, IFF_BROADCAST, "BROADCAST", buf, buflen); + flag_write(flags, IFF_DEBUG, "DEBUG", buf, buflen); + flag_write(flags, IFF_LOOPBACK, "LOOPBACK", buf, buflen); + flag_write(flags, IFF_POINTOPOINT, "POINTOPOINT", buf, buflen); + flag_write(flags, IFF_NOTRAILERS, "NOTRAILERS", buf, buflen); + flag_write(flags, IFF_RUNNING, "RUNNING", buf, buflen); + flag_write(flags, IFF_NOARP, "NOARP", buf, buflen); + flag_write(flags, IFF_PROMISC, "PROMISC", buf, buflen); + flag_write(flags, IFF_ALLMULTI, "ALLMULTI", buf, buflen); + flag_write(flags, IFF_MASTER, "MASTER", buf, buflen); + flag_write(flags, IFF_SLAVE, "SLAVE", buf, buflen); + flag_write(flags, IFF_MULTICAST, "MULTICAST", buf, buflen); + flag_write(flags, IFF_PORTSEL, "PORTSEL", buf, buflen); + flag_write(flags, IFF_AUTOMEDIA, "AUTOMEDIA", buf, buflen); + flag_write(flags, IFF_DYNAMIC, "DYNAMIC", buf, buflen); + + return (bufp); +} + +const char *rtm_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, RTM_F_NOTIFY, "NOTIFY", buf, buflen); + flag_write(flags, RTM_F_CLONED, "CLONED", buf, buflen); + flag_write(flags, RTM_F_EQUALIZE, "EQUALIZE", buf, buflen); + + return (bufp); +} + +const char *neigh_state2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, NUD_INCOMPLETE, "INCOMPLETE", buf, buflen); + flag_write(flags, NUD_REACHABLE, "REACHABLE", buf, buflen); + flag_write(flags, NUD_STALE, "STALE", buf, buflen); + flag_write(flags, NUD_DELAY, "DELAY", buf, buflen); + flag_write(flags, NUD_PROBE, "PROBE", buf, buflen); + flag_write(flags, NUD_FAILED, "FAILED", buf, buflen); + flag_write(flags, NUD_NOARP, "NOARP", buf, buflen); + flag_write(flags, NUD_PERMANENT, "PERMANENT", buf, buflen); + + return (bufp); +} + +const char *neigh_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, NTF_USE, "USE", buf, buflen); + flag_write(flags, NTF_SELF, "SELF", buf, buflen); + flag_write(flags, NTF_MASTER, "MASTER", buf, buflen); + flag_write(flags, NTF_PROXY, "PROXY", buf, buflen); + flag_write(flags, NTF_EXT_LEARNED, "EXT_LEARNED", buf, buflen); +#ifdef NTF_OFFLOADED + flag_write(flags, NTF_OFFLOADED, "OFFLOADED", buf, buflen); +#endif /* NTF_OFFLOADED */ + flag_write(flags, NTF_ROUTER, "ROUTER", buf, buflen); + + return (bufp); +} + +const char *ifa_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, IFA_F_SECONDARY, "SECONDARY", buf, buflen); + flag_write(flags, IFA_F_NODAD, "NODAD", buf, buflen); + flag_write(flags, IFA_F_OPTIMISTIC, "OPTIMISTIC", buf, buflen); + flag_write(flags, IFA_F_DADFAILED, "DADFAILED", buf, buflen); + flag_write(flags, IFA_F_HOMEADDRESS, "HOMEADDRESS", buf, buflen); + flag_write(flags, IFA_F_DEPRECATED, "DEPRECATED", buf, buflen); + flag_write(flags, IFA_F_TENTATIVE, "TENTATIVE", buf, buflen); + flag_write(flags, IFA_F_PERMANENT, "PERMANENT", buf, buflen); + flag_write(flags, IFA_F_MANAGETEMPADDR, "MANAGETEMPADDR", buf, buflen); + flag_write(flags, IFA_F_NOPREFIXROUTE, "NOPREFIXROUTE", buf, buflen); + flag_write(flags, IFA_F_MCAUTOJOIN, "MCAUTOJOIN", buf, buflen); + flag_write(flags, IFA_F_STABLE_PRIVACY, "STABLE_PRIVACY", buf, buflen); + + return (bufp); +} + +const char *nh_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, RTNH_F_DEAD, "DEAD", buf, buflen); + flag_write(flags, RTNH_F_PERVASIVE, "PERVASIVE", buf, buflen); + flag_write(flags, RTNH_F_ONLINK, "ONLINK", buf, buflen); + flag_write(flags, RTNH_F_OFFLOAD, "OFFLOAD", buf, buflen); + flag_write(flags, RTNH_F_LINKDOWN, "LINKDOWN", buf, buflen); + flag_write(flags, RTNH_F_UNRESOLVED, "UNRESOLVED", buf, buflen); + + return (bufp); +} + +/* + * Netlink abstractions. + */ +static void nllink_pdr_dump(struct rtattr *rta, size_t msglen) +{ + size_t plen; + uint32_t u32v; + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" linkinfo [len=%d (payload=%zu) type=(%d) %s]", + rta->rta_len, plen, rta->rta_type, + ifla_pdr_type2str(rta->rta_type)); + switch (rta->rta_type) { + case IFLA_PROTO_DOWN_REASON_MASK: + case IFLA_PROTO_DOWN_REASON_VALUE: + if (plen < sizeof(uint32_t)) { + zlog_debug(" invalid length"); + break; + } + + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nllink_linkinfo_dump(struct rtattr *rta, size_t msglen) +{ + size_t plen; + char dbuf[128]; + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" linkinfo [len=%d (payload=%zu) type=(%d) %s]", + rta->rta_len, plen, rta->rta_type, + ifla_info_type2str(rta->rta_type)); + switch (rta->rta_type) { + case IFLA_INFO_KIND: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + case IFLA_INFO_SLAVE_KIND: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nllink_dump(struct ifinfomsg *ifi, size_t msglen) +{ + uint8_t *datap; + struct rtattr *rta; + size_t plen, it; + uint32_t u32v; + uint8_t u8v; + char bytestr[16]; + char dbuf[128]; + unsigned short rta_type; + + /* Get the first attribute and go from there. */ + rta = IFLA_RTA(ifi); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + rta_type = rta->rta_type & ~NLA_F_NESTED; + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta_type, rta_type2str(rta_type)); + switch (rta_type) { + case IFLA_IFALIAS: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + case IFLA_MTU: + case IFLA_TXQLEN: + case IFLA_NUM_TX_QUEUES: + case IFLA_NUM_RX_QUEUES: + case IFLA_GROUP: + case IFLA_PROMISCUITY: +#ifdef IFLA_GSO_MAX_SEGS + case IFLA_GSO_MAX_SEGS: +#endif /* IFLA_GSO_MAX_SEGS */ +#ifdef IFLA_GSO_MAX_SIZE + case IFLA_GSO_MAX_SIZE: +#endif /* IFLA_GSO_MAX_SIZE */ + case IFLA_CARRIER_CHANGES: + case IFLA_MASTER: + case IFLA_LINK: + if (plen < sizeof(uint32_t)) { + zlog_debug(" invalid length"); + break; + } + + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case IFLA_PROTO_DOWN: + if (plen < sizeof(uint8_t)) { + zlog_debug(" invalid length"); + break; + } + + u8v = *(uint8_t *)RTA_DATA(rta); + zlog_debug(" %u", u8v); + break; + case IFLA_ADDRESS: + datap = RTA_DATA(rta); + dbuf[0] = 0; + for (it = 0; it < plen; it++) { + snprintf(bytestr, sizeof(bytestr), "%02X:", *datap); + strlcat(dbuf, bytestr, sizeof(dbuf)); + datap++; + } + /* Remove trailing ':'. */ + if (dbuf[0]) + dbuf[strlen(dbuf) - 1] = 0; + + zlog_debug(" %s", dbuf[0] ? dbuf : ""); + break; + + case IFLA_LINKINFO: + nllink_linkinfo_dump(RTA_DATA(rta), plen); + break; + + case IFLA_PROTO_DOWN_REASON: + nllink_pdr_dump(RTA_DATA(rta), plen); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlroute_dump(struct rtmsg *rtm, size_t msglen) +{ + struct rta_mfc_stats *mfc_stats; + struct rtattr *rta; + size_t plen; + uint32_t u32v; + uint64_t u64v; + + /* Get the first attribute and go from there. */ + rta = RTM_RTA(rtm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type & NLA_TYPE_MASK, + rtm_rta2str(rta->rta_type & NLA_TYPE_MASK)); + switch (rta->rta_type & NLA_TYPE_MASK) { + case RTA_IIF: + case RTA_OIF: + case RTA_PRIORITY: + case RTA_TABLE: + case RTA_NH_ID: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case RTA_EXPIRES: + u64v = *(uint64_t *)RTA_DATA(rta); + zlog_debug(" %" PRIu64, u64v); + break; + + case RTA_GATEWAY: + case RTA_DST: + case RTA_SRC: + case RTA_PREFSRC: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case RTA_MFC_STATS: + mfc_stats = (struct rta_mfc_stats *)RTA_DATA(rta); + zlog_debug(" pkts=%ju bytes=%ju wrong_if=%ju", + (uintmax_t)mfc_stats->mfcs_packets, + (uintmax_t)mfc_stats->mfcs_bytes, + (uintmax_t)mfc_stats->mfcs_wrong_if); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlneigh_dump(struct ndmsg *ndm, size_t msglen) +{ + struct rtattr *rta; + uint8_t *datap; + size_t plen, it; + uint16_t vid; + char bytestr[16]; + char dbuf[128]; + unsigned short rta_type; + +#ifndef NDA_RTA +#define NDA_RTA(ndm) \ + /* struct ndmsg *ndm; */ \ + ((struct rtattr *)(((uint8_t *)(ndm)) \ + + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#endif /* NDA_RTA */ + + /* Get the first attribute and go from there. */ + rta = NDA_RTA(ndm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + rta_type = rta->rta_type & ~NLA_F_NESTED; + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, neigh_rta2str(rta_type)); + switch (rta_type) { + case NDA_LLADDR: + datap = RTA_DATA(rta); + dbuf[0] = 0; + for (it = 0; it < plen; it++) { + snprintf(bytestr, sizeof(bytestr), "%02X:", *datap); + strlcat(dbuf, bytestr, sizeof(dbuf)); + datap++; + } + /* Remove trailing ':'. */ + if (dbuf[0]) + dbuf[strlen(dbuf) - 1] = 0; + + zlog_debug(" %s", dbuf[0] ? dbuf : ""); + break; + + case NDA_DST: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case NDA_VLAN: + vid = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %d", vid); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlifa_dump(struct ifaddrmsg *ifa, size_t msglen) +{ + struct rtattr *rta; + size_t plen; + uint32_t u32v; + + /* Get the first attribute and go from there. */ + rta = IFA_RTA(ifa); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, ifa_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case IFA_UNSPEC: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case IFA_LABEL: + zlog_debug(" %s", (const char *)RTA_DATA(rta)); + break; + + case IFA_ADDRESS: + case IFA_LOCAL: + case IFA_BROADCAST: + switch (plen) { + case 4: + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case 16: + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nltnl_dump(struct tunnel_msg *tnlm, size_t msglen) +{ + struct rtattr *attr; + vni_t vni_start = 0, vni_end = 0; + struct rtattr *ttb[VXLAN_VNIFILTER_ENTRY_MAX + 1]; + uint8_t rta_type; + + attr = TUNNEL_RTA(tnlm); +next_attr: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(attr, msglen) == 0) + return; + + rta_type = attr->rta_type & NLA_TYPE_MASK; + + if (rta_type != VXLAN_VNIFILTER_ENTRY) { + attr = RTA_NEXT(attr, msglen); + goto next_attr; + } + + memset(ttb, 0, sizeof(ttb)); + + netlink_parse_rtattr_flags(ttb, VXLAN_VNIFILTER_ENTRY_MAX, + RTA_DATA(attr), RTA_PAYLOAD(attr), + NLA_F_NESTED); + + if (ttb[VXLAN_VNIFILTER_ENTRY_START]) + vni_start = + *(uint32_t *)RTA_DATA(ttb[VXLAN_VNIFILTER_ENTRY_START]); + + if (ttb[VXLAN_VNIFILTER_ENTRY_END]) + vni_end = *(uint32_t *)RTA_DATA(ttb[VXLAN_VNIFILTER_ENTRY_END]); + zlog_debug(" vni_start %u, vni_end %u", vni_start, vni_end); + + attr = RTA_NEXT(attr, msglen); + goto next_attr; +} + +static const char *lwt_type2str(uint16_t type) +{ + switch (type) { + case LWTUNNEL_ENCAP_NONE: + return "NONE"; + case LWTUNNEL_ENCAP_MPLS: + return "MPLS"; + case LWTUNNEL_ENCAP_IP: + return "IPv4"; + case LWTUNNEL_ENCAP_ILA: + return "ILA"; + case LWTUNNEL_ENCAP_IP6: + return "IPv6"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; + case LWTUNNEL_ENCAP_BPF: + return "BPF"; + case LWTUNNEL_ENCAP_SEG6_LOCAL: + return "SEG6_LOCAL"; + default: + return "UNKNOWN"; + } +} + +static const char *nhg_type2str(uint16_t type) +{ + switch (type) { + case NEXTHOP_GRP_TYPE_MPATH: + return "MULTIPATH"; + case NEXTHOP_GRP_TYPE_RES: + return "RESILIENT MULTIPATH"; + default: + return "UNKNOWN"; + } +} + +static void nlnh_dump(struct nhmsg *nhm, size_t msglen) +{ + struct rtattr *rta; + int ifindex; + size_t plen; + uint16_t u16v; + uint32_t u32v; + unsigned long count, i; + struct nexthop_grp *nhgrp; + unsigned short rta_type; + + rta = RTM_NHA(nhm); + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + rta_type = rta->rta_type & ~NLA_F_NESTED; + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, nhm_rta2str(rta_type)); + switch (rta_type) { + case NHA_ID: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + case NHA_GROUP: + nhgrp = (struct nexthop_grp *)RTA_DATA(rta); + count = (RTA_PAYLOAD(rta) / sizeof(*nhgrp)); + if (count == 0 + || (count * sizeof(*nhgrp)) != RTA_PAYLOAD(rta)) { + zlog_debug(" invalid nexthop group received"); + return; + } + + for (i = 0; i < count; i++) + zlog_debug(" id %d weight %d", nhgrp[i].id, + nhgrp[i].weight); + break; + case NHA_ENCAP_TYPE: + u16v = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %s", lwt_type2str(u16v)); + break; + case NHA_GROUP_TYPE: + u16v = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %s", nhg_type2str(u16v)); + break; + case NHA_BLACKHOLE: + /* NOTHING */ + break; + case NHA_OIF: + ifindex = *(int *)RTA_DATA(rta); + zlog_debug(" %d", ifindex); + break; + case NHA_GATEWAY: + switch (nhm->nh_family) { + case AF_INET: + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case AF_INET6: + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + + default: + zlog_debug(" invalid family %d", nhm->nh_family); + break; + } + break; + case NHA_ENCAP: + /* TODO: handle MPLS labels. */ + zlog_debug(" unparsed MPLS labels"); + break; + case NHA_GROUPS: + /* TODO: handle this message. */ + zlog_debug(" unparsed GROUPS message"); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlrule_dump(struct fib_rule_hdr *frh, size_t msglen) +{ + struct rtattr *rta; + size_t plen; + uint8_t u8v; + uint32_t u32v; + int32_t s32v; + uint64_t u64v; + char dbuf[128]; + struct fib_rule_uid_range *u_range; + struct fib_rule_port_range *p_range; + + /* Get the first attribute and go from there. */ + rta = RTM_RTA(frh); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, frh_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case FRA_DST: + case FRA_SRC: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case FRA_IIFNAME: + case FRA_OIFNAME: + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + case FRA_GOTO: + case FRA_UNUSED2: + case FRA_PRIORITY: + case FRA_UNUSED3: + case FRA_UNUSED4: + case FRA_UNUSED5: + case FRA_FWMARK: + case FRA_FLOW: + case FRA_TABLE: + case FRA_FWMASK: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case FRA_SUPPRESS_IFGROUP: + case FRA_SUPPRESS_PREFIXLEN: + s32v = *(int32_t *)RTA_DATA(rta); + zlog_debug(" %d", s32v); + break; + + case FRA_TUN_ID: + u64v = *(uint64_t *)RTA_DATA(rta); + zlog_debug(" %" PRIu64, u64v); + break; + + case FRA_L3MDEV: + case FRA_PROTOCOL: + case FRA_IP_PROTO: + u8v = *(uint8_t *)RTA_DATA(rta); + zlog_debug(" %u", u8v); + break; + + case FRA_UID_RANGE: + u_range = (struct fib_rule_uid_range *)RTA_DATA(rta); + if (u_range->start == u_range->end) + zlog_debug(" %u", u_range->start); + else + zlog_debug(" %u-%u", u_range->start, u_range->end); + break; + + case FRA_SPORT_RANGE: + case FRA_DPORT_RANGE: + p_range = (struct fib_rule_port_range *)RTA_DATA(rta); + if (p_range->start == p_range->end) + zlog_debug(" %u", p_range->start); + else + zlog_debug(" %u-%u", p_range->start, p_range->end); + break; + + case FRA_PAD: /* fallthrough */ + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static const char *tcm_nltype2str(int nltype) +{ + switch (nltype) { + case RTM_NEWQDISC: + case RTM_DELQDISC: + return "qdisc"; + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + return "tclass"; + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + return "tfilter"; + default: + /* should never hit */ + return "unknown"; + } +} + +static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen) +{ + const struct rtattr *rta; + size_t plen; + uint32_t ival; + + rta = (void *)((const char *)ncm + + NLMSG_ALIGN(sizeof(struct netconfmsg))); + +next_rta: + /* Check the attr header for valid length. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, ncm_rta2str(rta->rta_type)); + + switch (rta->rta_type) { + case NETCONFA_IFINDEX: + ival = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %d", (int32_t)ival); + break; + + /* Most attrs are just on/off. */ + case NETCONFA_FORWARDING: + case NETCONFA_RP_FILTER: + case NETCONFA_MC_FORWARDING: + case NETCONFA_PROXY_NEIGH: + case NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN: + case NETCONFA_INPUT: + case NETCONFA_BC_FORWARDING: + ival = *(uint32_t *)RTA_DATA(rta); + dump_on_off(ival, " "); + break; + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +void nl_dump(void *msg, size_t msglen) +{ + struct nlmsghdr *nlmsg = msg; + struct nlmsgerr *nlmsgerr; + struct rtgenmsg *rtgen; + struct ifaddrmsg *ifa; + struct ndmsg *ndm; + struct rtmsg *rtm; + struct nhmsg *nhm; + struct netconfmsg *ncm; + struct ifinfomsg *ifi; + struct tunnel_msg *tnlm; + struct fib_rule_hdr *frh; + struct tcmsg *tcm; + + char fbuf[128]; + char ibuf[128]; + +next_header: + zlog_debug( + "nlmsghdr [len=%u type=(%d) %s flags=(0x%04x) {%s} seq=%u pid=%u]", + nlmsg->nlmsg_len, nlmsg->nlmsg_type, + nlmsg_type2str(nlmsg->nlmsg_type), nlmsg->nlmsg_flags, + nlmsg_flags2str(nlmsg->nlmsg_flags, fbuf, sizeof(fbuf)), + nlmsg->nlmsg_seq, nlmsg->nlmsg_pid); + + switch (nlmsg->nlmsg_type) { + /* Generic. */ + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + nlmsgerr = NLMSG_DATA(nlmsg); + zlog_debug(" nlmsgerr [error=(%d) %s]", nlmsgerr->error, + strerror(-nlmsgerr->error)); + break; + case NLMSG_DONE: + return; + case NLMSG_OVERRUN: + break; + + /* RTM. */ + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_SETLINK: + ifi = NLMSG_DATA(nlmsg); + zlog_debug( + " ifinfomsg [family=%d type=(%d) %s index=%d flags=0x%04x {%s}]", + ifi->ifi_family, ifi->ifi_type, + ifi_type2str(ifi->ifi_type), ifi->ifi_index, + ifi->ifi_flags, + if_flags2str(ifi->ifi_flags, ibuf, sizeof(ibuf))); + nllink_dump(ifi, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_GETLINK: + rtgen = NLMSG_DATA(nlmsg); + zlog_debug(" rtgen [family=(%d) %s]", rtgen->rtgen_family, + af_type2str(rtgen->rtgen_family)); + break; + + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + rtm = NLMSG_DATA(nlmsg); + zlog_debug( + " rtmsg [family=(%d) %s dstlen=%d srclen=%d tos=%d table=%d protocol=(%d) %s scope=(%d) %s type=(%d) %s flags=0x%04x {%s}]", + rtm->rtm_family, af_type2str(rtm->rtm_family), + rtm->rtm_dst_len, rtm->rtm_src_len, rtm->rtm_tos, + rtm->rtm_table, rtm->rtm_protocol, + rtm_protocol2str(rtm->rtm_protocol), rtm->rtm_scope, + rtm_scope2str(rtm->rtm_scope), rtm->rtm_type, + rtm_type2str(rtm->rtm_type), rtm->rtm_flags, + rtm_flags2str(rtm->rtm_flags, fbuf, sizeof(fbuf))); + nlroute_dump(rtm, + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + break; + + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + ndm = NLMSG_DATA(nlmsg); + zlog_debug( + " ndm [family=%d (%s) ifindex=%d state=0x%04x {%s} flags=0x%04x {%s} type=%d (%s)]", + ndm->ndm_family, af_type2str(ndm->ndm_family), + ndm->ndm_ifindex, ndm->ndm_state, + neigh_state2str(ndm->ndm_state, ibuf, sizeof(ibuf)), + ndm->ndm_flags, + neigh_flags2str(ndm->ndm_flags, fbuf, sizeof(fbuf)), + ndm->ndm_type, rtm_type2str(ndm->ndm_type)); + nlneigh_dump(ndm, + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ndm))); + break; + + case RTM_NEWRULE: + case RTM_DELRULE: + frh = NLMSG_DATA(nlmsg); + zlog_debug( + " frh [family=%d (%s) dst_len=%d src_len=%d tos=%d table=%d res1=%d res2=%d action=%d (%s) flags=0x%x]", + frh->family, af_type2str(frh->family), frh->dst_len, + frh->src_len, frh->tos, frh->table, frh->res1, + frh->res2, frh->action, frh_action2str(frh->action), + frh->flags); + nlrule_dump(frh, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*frh))); + break; + + + case RTM_NEWADDR: + case RTM_DELADDR: + ifa = NLMSG_DATA(nlmsg); + zlog_debug( + " ifa [family=(%d) %s prefixlen=%d flags=0x%04x {%s} scope=%d index=%u]", + ifa->ifa_family, af_type2str(ifa->ifa_family), + ifa->ifa_prefixlen, ifa->ifa_flags, + if_flags2str(ifa->ifa_flags, fbuf, sizeof(fbuf)), + ifa->ifa_scope, ifa->ifa_index); + nlifa_dump(ifa, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + break; + + case RTM_NEWNEXTHOP: + case RTM_DELNEXTHOP: + case RTM_GETNEXTHOP: + nhm = NLMSG_DATA(nlmsg); + zlog_debug( + " nhm [family=(%d) %s scope=(%d) %s protocol=(%d) %s flags=0x%08x {%s}]", + nhm->nh_family, af_type2str(nhm->nh_family), + nhm->nh_scope, rtm_scope2str(nhm->nh_scope), + nhm->nh_protocol, rtm_protocol2str(nhm->nh_protocol), + nhm->nh_flags, + nh_flags2str(nhm->nh_flags, fbuf, sizeof(fbuf))); + nlnh_dump(nhm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*nhm))); + break; + + case RTM_NEWTUNNEL: + case RTM_DELTUNNEL: + case RTM_GETTUNNEL: + tnlm = NLMSG_DATA(nlmsg); + zlog_debug(" tnlm [family=(%d) %s ifindex=%d ", tnlm->family, + af_type2str(tnlm->family), tnlm->ifindex); + nltnl_dump(tnlm, + nlmsg->nlmsg_len - + NLMSG_LENGTH(sizeof(struct tunnel_msg))); + break; + + + case RTM_NEWNETCONF: + case RTM_DELNETCONF: + ncm = NLMSG_DATA(nlmsg); + zlog_debug(" ncm [family=%s (%d)]", + af_type2str(ncm->ncm_family), ncm->ncm_family); + nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm))); + break; + + case RTM_NEWQDISC: + case RTM_DELQDISC: + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + tcm = NLMSG_DATA(nlmsg); + zlog_debug( + " tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]", + tcm_nltype2str(nlmsg->nlmsg_type), + af_type2str(tcm->tcm_family), tcm->tcm_family, + tcm->tcm_ifindex, tcm->tcm_handle >> 16, + tcm->tcm_handle & 0xffff); + break; + + default: + break; + } + + /* + * Try to get the next header. There should only be more + * messages if this header was flagged as MULTI, otherwise just + * end it here. + */ + nlmsg = NLMSG_NEXT(nlmsg, msglen); + if (NLMSG_OK(nlmsg, msglen) == 0) + return; + + goto next_header; +} + +#endif /* NETLINK_DEBUG */ diff --git a/zebra/dpdk/zebra_dplane_dpdk.c b/zebra/dpdk/zebra_dplane_dpdk.c new file mode 100644 index 0000000..4c32044 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" /* Include this explicitly */ +#endif + +#include "lib/libfrr.h" + +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_dplane.h" +#include "zebra/debug.h" +#include "zebra/zebra_pbr.h" + +#include "zebra/dpdk/zebra_dplane_dpdk_private.h" + +static const char *plugin_name = "zebra_dplane_dpdk"; + +static struct zd_dpdk_ctx dpdk_ctx_buf, *dpdk_ctx = &dpdk_ctx_buf; +#define dpdk_stat (&dpdk_ctx->stats) + +static struct zd_dpdk_port *zd_dpdk_port_find_by_index(int ifindex); + +DEFINE_MTYPE_STATIC(ZEBRA, DPDK_PORTS, "ZD DPDK port database"); + +void zd_dpdk_stat_show(struct vty *vty) +{ + uint32_t tmp_cnt; + + vty_out(vty, "%30s\n%30s\n", "Dataplane DPDK counters", + "======================="); + +#define ZD_DPDK_SHOW_COUNTER(label, counter) \ + do { \ + tmp_cnt = \ + atomic_load_explicit(&counter, memory_order_relaxed); \ + vty_out(vty, "%28s: %u\n", (label), (tmp_cnt)); \ + } while (0) + + ZD_DPDK_SHOW_COUNTER("PBR rule adds", dpdk_stat->rule_adds); + ZD_DPDK_SHOW_COUNTER("PBR rule dels", dpdk_stat->rule_dels); + ZD_DPDK_SHOW_COUNTER("Ignored updates", dpdk_stat->ignored_updates); +} + + +static void zd_dpdk_flow_stat_show(struct vty *vty, int in_ifindex, + intptr_t dp_flow_ptr) +{ + struct rte_flow_action_count count = {.shared = 0, .id = 0}; + const struct rte_flow_action actions[] = { + { + .type = RTE_FLOW_ACTION_TYPE_COUNT, + .conf = &count, + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + int rc; + struct zd_dpdk_port *in_dport; + struct rte_flow_query_count query; + struct rte_flow_error error; + uint64_t hits, bytes; + + in_dport = zd_dpdk_port_find_by_index(in_ifindex); + if (!in_dport) { + vty_out(vty, "PBR dpdk flow query failed; in_port %d missing\n", + in_ifindex); + return; + } + memset(&query, 0, sizeof(query)); + rc = rte_flow_query(in_dport->port_id, (struct rte_flow *)dp_flow_ptr, + actions, &query, &error); + if (rc) { + vty_out(vty, + "PBR dpdk flow query failed; in_ifindex %d rc %d\n", + in_ifindex, error.type); + return; + } + hits = (query.hits_set) ? query.hits : 0; + bytes = (query.bytes_set) ? query.bytes : 0; + vty_out(vty, " DPDK stats: packets %" PRIu64 " bytes %" PRIu64 "\n", + hits, bytes); +} + + +static int zd_dpdk_pbr_show_rules_walkcb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_pbr_rule *rule = (struct zebra_pbr_rule *)bucket->data; + struct vty *vty = (struct vty *)arg; + struct vrf *vrf; + struct interface *ifp = NULL; + struct zebra_pbr_action *zaction = &rule->action; + + zebra_pbr_show_rule_unit(rule, vty); + if (zaction->dp_flow_ptr) { + vrf = vrf_lookup_by_id(rule->vrf_id); + if (vrf) + ifp = if_lookup_by_name_vrf(rule->ifname, vrf); + + if (ifp) + zd_dpdk_flow_stat_show(vty, ifp->ifindex, + zaction->dp_flow_ptr); + } + return HASHWALK_CONTINUE; +} + + +void zd_dpdk_pbr_flows_show(struct vty *vty) +{ + hash_walk(zrouter.rules_hash, zd_dpdk_pbr_show_rules_walkcb, vty); +} + + +static void zd_dpdk_rule_add(struct zebra_dplane_ctx *ctx) +{ + static struct rte_flow_attr attrs = {.ingress = 1, .transfer = 1}; + uint32_t filter_bm = dplane_ctx_rule_get_filter_bm(ctx); + int in_ifindex = dplane_ctx_get_ifindex(ctx); + int out_ifindex = dplane_ctx_rule_get_out_ifindex(ctx); + struct rte_flow_item_eth eth, eth_mask; + struct rte_flow_item_ipv4 ip, ip_mask; + struct rte_flow_item_udp udp, udp_mask; + struct rte_flow_action_count conf_count; + struct rte_flow_action_set_mac conf_smac, conf_dmac; + struct rte_flow_action_port_id conf_port; + struct rte_flow_item items[ZD_PBR_PATTERN_MAX]; + struct rte_flow_action actions[ZD_PBR_ACTION_MAX]; + int item_cnt = 0; + int act_cnt = 0; + struct in_addr tmp_mask; + const struct ethaddr *mac; + struct rte_flow *flow; + struct rte_flow_error error; + struct zd_dpdk_port *in_dport; + struct zd_dpdk_port *out_dport; + uint32_t pri = dplane_ctx_rule_get_priority(ctx); + int seq = dplane_ctx_rule_get_seq(ctx); + int unique = dplane_ctx_rule_get_unique(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow create ifname %s seq %d pri %u unique %d\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, unique); + in_dport = zd_dpdk_port_find_by_index(in_ifindex); + if (!in_dport) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow create ifname %s seq %d pri %u unique %d failed; in_port %d missing\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, + unique, in_ifindex); + return; + } + + out_dport = zd_dpdk_port_find_by_index(out_ifindex); + if (!out_dport) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow create ifname %s seq %d pri %u unique %d failed; out_port %d missing\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, + unique, out_ifindex); + return; + } + + /*********************** match items **************************/ + memset(ð, 0, sizeof(eth)); + memset(ð_mask, 0, sizeof(eth_mask)); + eth.type = eth_mask.type = htons(RTE_ETHER_TYPE_IPV4); + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_ETH; + items[item_cnt].spec = ð + items[item_cnt].mask = ð_mask; + items[item_cnt].last = NULL; + ++item_cnt; + + memset(&ip, 0, sizeof(ip)); + memset(&ip_mask, 0, sizeof(ip_mask)); + if (filter_bm & PBR_FILTER_SRC_IP) { + const struct prefix *src_ip; + + src_ip = dplane_ctx_rule_get_src_ip(ctx); + ip.hdr.src_addr = src_ip->u.prefix4.s_addr; + masklen2ip(src_ip->prefixlen, &tmp_mask); + ip_mask.hdr.src_addr = tmp_mask.s_addr; + } + if (filter_bm & PBR_FILTER_DST_IP) { + const struct prefix *dst_ip; + + dst_ip = dplane_ctx_rule_get_dst_ip(ctx); + ip.hdr.dst_addr = dst_ip->u.prefix4.s_addr; + masklen2ip(dst_ip->prefixlen, &tmp_mask); + ip_mask.hdr.dst_addr = tmp_mask.s_addr; + } + if (filter_bm & PBR_FILTER_IP_PROTOCOL) { + ip.hdr.next_proto_id = dplane_ctx_rule_get_ipproto(ctx); + ip_mask.hdr.next_proto_id = UINT8_MAX; + } + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_IPV4; + items[item_cnt].spec = &ip; + items[item_cnt].mask = &ip_mask; + items[item_cnt].last = NULL; + ++item_cnt; + + if ((filter_bm & (PBR_FILTER_SRC_PORT | PBR_FILTER_DST_PORT))) { + memset(&udp, 0, sizeof(udp)); + memset(&udp_mask, 0, sizeof(udp_mask)); + if (filter_bm & PBR_FILTER_SRC_PORT) { + udp.hdr.src_port = + RTE_BE16(dplane_ctx_rule_get_src_port(ctx)); + udp_mask.hdr.src_port = UINT16_MAX; + } + if (filter_bm & PBR_FILTER_DST_PORT) { + udp.hdr.dst_port = + RTE_BE16(dplane_ctx_rule_get_dst_port(ctx)); + udp_mask.hdr.dst_port = UINT16_MAX; + } + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_UDP; + items[item_cnt].spec = &udp; + items[item_cnt].mask = &udp_mask; + items[item_cnt].last = NULL; + ++item_cnt; + } + + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_END; + + /*************************** actions *****************************/ + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_COUNT; + memset(&conf_count, 0, sizeof(conf_count)); + actions[act_cnt].conf = &conf_count; + ++act_cnt; + + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_DEC_TTL; + ++act_cnt; + + mac = dplane_ctx_rule_get_smac(ctx); + memcpy(conf_smac.mac_addr, mac, RTE_ETHER_ADDR_LEN); + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_SET_MAC_SRC; + actions[act_cnt].conf = &conf_smac; + ++act_cnt; + + mac = dplane_ctx_rule_get_dmac(ctx); + memcpy(conf_dmac.mac_addr, mac, RTE_ETHER_ADDR_LEN); + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_SET_MAC_DST; + actions[act_cnt].conf = &conf_dmac; + ++act_cnt; + + memset(&conf_port, 0, sizeof(conf_port)); + conf_port.id = out_dport->port_id; + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_PORT_ID; + actions[act_cnt].conf = &conf_port; + ++act_cnt; + + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_END; + + frr_with_privs (&zserv_privs) { + flow = rte_flow_create(in_dport->port_id, &attrs, items, + actions, &error); + } + + if (flow) { + dplane_ctx_rule_set_dp_flow_ptr(ctx, (intptr_t)flow); + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow 0x%" PRIxPTR + " created ifname %s seq %d pri %u unique %d\n", + (intptr_t)flow, dplane_ctx_rule_get_ifname(ctx), + seq, pri, unique); + } else { + zlog_warn( + "PBR dpdk flow create failed ifname %s seq %d pri %u unique %d; rc %d\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, unique, + error.type); + } +} + + +static void zd_dpdk_rule_del(struct zebra_dplane_ctx *ctx, const char *ifname, + int in_ifindex, intptr_t dp_flow_ptr) +{ + struct zd_dpdk_port *in_dport; + struct rte_flow_error error; + int rc; + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow delete ifname %s ifindex %d dp_flow 0x%" PRIxPTR + "\n", + ifname, in_ifindex, dp_flow_ptr); + + if (!dp_flow_ptr) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow delete failed; ifname %s ifindex %d dp_flow 0x%" PRIxPTR + "; empty dp\n", + ifname, in_ifindex, dp_flow_ptr); + return; + } + + dplane_ctx_rule_set_dp_flow_ptr(ctx, (intptr_t)NULL); + in_dport = zd_dpdk_port_find_by_index(in_ifindex); + if (!in_dport) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow delete failed; ifname %s ifindex %d dp_flow 0x%" PRIxPTR + " in port missing\n", + ifname, in_ifindex, dp_flow_ptr); + return; + } + + frr_with_privs (&zserv_privs) { + rc = rte_flow_destroy(in_dport->port_id, + (struct rte_flow *)dp_flow_ptr, &error); + } + + if (rc) + zlog_warn( + "PBR dpdk flow delete failed; ifname %s ifindex %d dp_flow 0x%" PRIxPTR + "\n", + ifname, in_ifindex, dp_flow_ptr); +} + + +static void zd_dpdk_rule_update(struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + int in_ifindex; + intptr_t dp_flow_ptr; + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug("Dplane %s", dplane_op2str(dplane_ctx_get_op(ctx))); + + + op = dplane_ctx_get_op(ctx); + switch (op) { + case DPLANE_OP_RULE_ADD: + atomic_fetch_add_explicit(&dpdk_stat->rule_adds, 1, + memory_order_relaxed); + zd_dpdk_rule_add(ctx); + break; + + case DPLANE_OP_RULE_UPDATE: + /* delete old rule and install new one */ + atomic_fetch_add_explicit(&dpdk_stat->rule_adds, 1, + memory_order_relaxed); + in_ifindex = dplane_ctx_get_ifindex(ctx); + dp_flow_ptr = dplane_ctx_rule_get_old_dp_flow_ptr(ctx); + zd_dpdk_rule_del(ctx, dplane_ctx_rule_get_ifname(ctx), + in_ifindex, dp_flow_ptr); + zd_dpdk_rule_add(ctx); + break; + + case DPLANE_OP_RULE_DELETE: + atomic_fetch_add_explicit(&dpdk_stat->rule_dels, 1, + memory_order_relaxed); + in_ifindex = dplane_ctx_get_ifindex(ctx); + dp_flow_ptr = dplane_ctx_rule_get_dp_flow_ptr(ctx); + zd_dpdk_rule_del(ctx, dplane_ctx_rule_get_ifname(ctx), + in_ifindex, dp_flow_ptr); + break; + + case DPLANE_OP_NONE: + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + break; + } +} + + +/* DPDK provider callback. + */ +static void zd_dpdk_process_update(struct zebra_dplane_ctx *ctx) +{ + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_RULE_DELETE: + zd_dpdk_rule_update(ctx); + break; + case DPLANE_OP_NONE: + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + atomic_fetch_add_explicit(&dpdk_stat->ignored_updates, 1, + memory_order_relaxed); + + break; + } +} + + +static int zd_dpdk_process(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + int counter, limit; + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug("processing %s", dplane_provider_get_name(prov)); + + limit = dplane_provider_get_work_limit(prov); + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (!ctx) + break; + + zd_dpdk_process_update(ctx); + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + return 0; +} + +static void zd_dpdk_port_show_entry(struct zd_dpdk_port *dport, struct vty *vty, + int detail) +{ + struct rte_eth_dev_info *dev_info; + + dev_info = &dport->dev_info; + if (detail) { + vty_out(vty, "DPDK port: %u\n", dport->port_id); + vty_out(vty, " Device: %s\n", + dev_info->device ? dev_info->device->name : "-"); + vty_out(vty, " Driver: %s\n", + dev_info->driver_name ? dev_info->driver_name : "-"); + vty_out(vty, " Interface: %s (%d)\n", + ifindex2ifname(dev_info->if_index, VRF_DEFAULT), + dev_info->if_index); + vty_out(vty, " Switch: %s Domain: %u Port: %u\n", + dev_info->switch_info.name, + dev_info->switch_info.domain_id, + dev_info->switch_info.port_id); + vty_out(vty, "\n"); + } else { + vty_out(vty, "%-4u %-16s %-16s %-16d %s,%u,%u\n", + dport->port_id, + dev_info->device ? dev_info->device->name : "-", + ifindex2ifname(dev_info->if_index, VRF_DEFAULT), + dev_info->if_index, dev_info->switch_info.name, + dev_info->switch_info.domain_id, + dev_info->switch_info.port_id); + } +} + + +static struct zd_dpdk_port *zd_dpdk_port_find_by_index(int ifindex) +{ + int count; + struct zd_dpdk_port *dport; + struct rte_eth_dev_info *dev_info; + + for (count = 0; count < RTE_MAX_ETHPORTS; ++count) { + dport = &dpdk_ctx->dpdk_ports[count]; + if (!(dport->flags & ZD_DPDK_PORT_FLAG_INITED)) + continue; + dev_info = &dport->dev_info; + if (dev_info->if_index == (uint32_t)ifindex) + return dport; + } + + return NULL; +} + + +void zd_dpdk_port_show(struct vty *vty, uint16_t port_id, bool uj, int detail) +{ + int count; + struct zd_dpdk_port *dport; + + /* XXX - support for json is yet to be added */ + if (uj) + return; + + if (!detail) { + vty_out(vty, "%-4s %-16s %-16s %-16s %s\n", "Port", "Device", + "IfName", "IfIndex", "sw,domain,port"); + } + + for (count = 0; count < RTE_MAX_ETHPORTS; ++count) { + dport = &dpdk_ctx->dpdk_ports[count]; + if (dport->flags & ZD_DPDK_PORT_FLAG_INITED) + zd_dpdk_port_show_entry(dport, vty, detail); + } +} + + +static void zd_dpdk_port_init(void) +{ + struct zd_dpdk_port *dport; + uint16_t port_id; + struct rte_eth_dev_info *dev_info; + int count; + int rc; + struct rte_flow_error error; + + /* allocate a list of ports */ + dpdk_ctx->dpdk_ports = + XCALLOC(MTYPE_DPDK_PORTS, + sizeof(struct zd_dpdk_port) * RTE_MAX_ETHPORTS); + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("dpdk port init"); + count = 0; + RTE_ETH_FOREACH_DEV(port_id) + { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("dpdk port init %d", port_id); + dport = &dpdk_ctx->dpdk_ports[count]; + count++; + dport->port_id = port_id; + dport->flags |= ZD_DPDK_PORT_FLAG_PROBED; + dev_info = &dport->dev_info; + if (rte_eth_dev_info_get(port_id, dev_info) < 0) { + zlog_warn("failed to get dev info for %u, %s", port_id, + rte_strerror(rte_errno)); + continue; + } + dport->flags |= ZD_DPDK_PORT_FLAG_INITED; + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug( + "port %u, dev %s, ifI %d, sw_name %s, sw_domain %u, sw_port %u", + port_id, + dev_info->device ? dev_info->device->name : "-", + dev_info->if_index, dev_info->switch_info.name, + dev_info->switch_info.domain_id, + dev_info->switch_info.port_id); + if (rte_flow_isolate(port_id, 1, &error)) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug( + "Flow isolate on port %u failed %d", + port_id, error.type); + } else { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("Flow isolate on port %u", + port_id); + } + rc = rte_eth_dev_start(port_id); + if (rc) { + zlog_warn("DPDK port %d start error: %s", port_id, + rte_strerror(-rc)); + continue; + } + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("DPDK port %d started in promiscuous mode ", + port_id); + } + + if (!count) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("no probed ethernet devices"); + } +} + + +static int zd_dpdk_init(void) +{ + int rc; + static const char *argv[] = {(char *)"/usr/lib/frr/zebra", + (char *)"--"}; + + zd_dpdk_vty_init(); + + frr_with_privs (&zserv_privs) { + rc = rte_eal_init(array_size(argv), argv); + } + if (rc < 0) { + zlog_warn("EAL init failed %s", rte_strerror(rte_errno)); + return -1; + } + + frr_with_privs (&zserv_privs) { + zd_dpdk_port_init(); + } + return 0; +} + + +static int zd_dpdk_start(struct zebra_dplane_provider *prov) +{ + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s start", dplane_provider_get_name(prov)); + + return zd_dpdk_init(); +} + + +static int zd_dpdk_finish(struct zebra_dplane_provider *prov, bool early) +{ + int rc; + + if (early) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s early finish", + dplane_provider_get_name(prov)); + + return 0; + } + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s finish", dplane_provider_get_name(prov)); + + + frr_with_privs (&zserv_privs) { + rc = rte_eal_cleanup(); + } + if (rc < 0) + zlog_warn("EAL cleanup failed %s", rte_strerror(rte_errno)); + + return 0; +} + + +static int zd_dpdk_plugin_init(struct event_loop *tm) +{ + int ret; + + ret = dplane_provider_register( + plugin_name, DPLANE_PRIO_KERNEL, DPLANE_PROV_FLAGS_DEFAULT, + zd_dpdk_start, zd_dpdk_process, zd_dpdk_finish, dpdk_ctx, NULL); + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s register status %d", plugin_name, ret); + + return 0; +} + + +static int zd_dpdk_module_init(void) +{ + hook_register(frr_late_init, zd_dpdk_plugin_init); + return 0; +} + +FRR_MODULE_SETUP(.name = "dplane_dpdk", .version = "0.0.1", + .description = "Data plane plugin using dpdk for hw offload", + .init = zd_dpdk_module_init); diff --git a/zebra/dpdk/zebra_dplane_dpdk.h b/zebra/dpdk/zebra_dplane_dpdk.h new file mode 100644 index 0000000..e5a3dbe --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk.h @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + */ + +#ifndef _ZEBRA_DPLANE_DPDK_H +#define _ZEBRA_DPLANE_DPDK_H + +#include + + +#define ZD_DPDK_INVALID_PORT 0xffff + +extern void zd_dpdk_pbr_flows_show(struct vty *vty); +extern void zd_dpdk_port_show(struct vty *vty, uint16_t port_id, bool uj, + int detail); +extern void zd_dpdk_stat_show(struct vty *vty); +extern void zd_dpdk_vty_init(void); + +#endif diff --git a/zebra/dpdk/zebra_dplane_dpdk_private.h b/zebra/dpdk/zebra_dplane_dpdk_private.h new file mode 100644 index 0000000..e10f525 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk_private.h @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + */ + +#ifndef _ZEBRA_DPLANE_DPDK_PRIVATE_H +#define _ZEBRA_DPLANE_DPDK_PRIVATE_H + +#include + +#include + +#include "zebra_dplane_dpdk.h" + +/* match on eth, sip, dip, udp */ +#define ZD_PBR_PATTERN_MAX 6 +/* dec_ttl, set_smac, set_dmac, * phy_port, count + */ +#define ZD_PBR_ACTION_MAX 6 + +#define ZD_ETH_TYPE_IP 0x800 + +struct zd_dpdk_port { + uint16_t port_id; /* dpdk port_id */ + struct rte_eth_dev_info dev_info; /* PCI info + driver name */ + uint32_t flags; +#define ZD_DPDK_PORT_FLAG_PROBED (1 << 0) +#define ZD_DPDK_PORT_FLAG_INITED (1 << 1) +}; + +struct zd_dpdk_stat { + _Atomic uint32_t ignored_updates; + + _Atomic uint32_t rule_adds; + _Atomic uint32_t rule_dels; +}; + +struct zd_dpdk_ctx { + /* Stats */ + struct zd_dpdk_stat stats; + struct zd_dpdk_port *dpdk_ports; + int dpdk_logtype; +}; + +#endif diff --git a/zebra/dpdk/zebra_dplane_dpdk_vty.c b/zebra/dpdk/zebra_dplane_dpdk_vty.c new file mode 100644 index 0000000..45334a7 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk_vty.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Donald Sharp + */ +#include + +#include "lib/json.h" +#include "zebra/dpdk/zebra_dplane_dpdk.h" + +#include "zebra/dpdk/zebra_dplane_dpdk_vty_clippy.c" + +#define ZD_STR "Zebra dataplane information\n" +#define ZD_DPDK_STR "DPDK offload information\n" + +DEFPY(zd_dpdk_show_counters, zd_dpdk_show_counters_cmd, + "show dplane dpdk counters", + SHOW_STR ZD_STR ZD_DPDK_STR "show counters\n") +{ + zd_dpdk_stat_show(vty); + + return CMD_SUCCESS; +} + + +DEFPY (zd_dpdk_show_ports, + zd_dpdk_show_ports_cmd, + "show dplane dpdk port [(1-32)$port_id] [detail$detail] [json$json]", + SHOW_STR + ZD_STR + ZD_DPDK_STR + "show port info\n" + "DPDK port identifier\n" + "Detailed information\n" + JSON_STR) +{ + bool uj = !!json; + bool ud = !!detail; + + if (!port_id) + port_id = ZD_DPDK_INVALID_PORT; + zd_dpdk_port_show(vty, port_id, uj, ud); + + return CMD_SUCCESS; +} + + +DEFPY (zd_dpdk_show_pbr_flows, + zd_dpdk_show_pbr_flows_cmd, + "show dplane dpdk pbr flows", + SHOW_STR + ZD_STR + ZD_DPDK_STR + "show pbr info\n" + "DPDK flows\n") +{ + zd_dpdk_pbr_flows_show(vty); + + return CMD_SUCCESS; +} + + +void zd_dpdk_vty_init(void) +{ + install_element(VIEW_NODE, &zd_dpdk_show_counters_cmd); + install_element(VIEW_NODE, &zd_dpdk_show_ports_cmd); + install_element(VIEW_NODE, &zd_dpdk_show_pbr_flows_cmd); +} diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c new file mode 100644 index 0000000..2a87925 --- /dev/null +++ b/zebra/dplane_fpm_nl.c @@ -0,0 +1,1666 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra dataplane plugin for Forwarding Plane Manager (FPM) using netlink. + * + * Copyright (C) 2019 Network Device Education Foundation, Inc. ("NetDEF") + * Rafael Zalamena + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" /* Include this explicitly */ +#endif + +#include + +#include +#include + +#include +#include + +#include "lib/zebra.h" +#include "lib/json.h" +#include "lib/libfrr.h" +#include "lib/frratomic.h" +#include "lib/command.h" +#include "lib/memory.h" +#include "lib/network.h" +#include "lib/ns.h" +#include "lib/frr_pthread.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_router.h" +#include "zebra/interface.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/debug.h" +#include "fpm/fpm.h" + +#define SOUTHBOUND_DEFAULT_ADDR INADDR_LOOPBACK +#define SOUTHBOUND_DEFAULT_PORT 2620 + +/** + * FPM header: + * { + * version: 1 byte (always 1), + * type: 1 byte (1 for netlink, 2 protobuf), + * len: 2 bytes (network order), + * } + * + * This header is used with any format to tell the users how many bytes to + * expect. + */ +#define FPM_HEADER_SIZE 4 + +static const char *prov_name = "dplane_fpm_nl"; + +struct fpm_nl_ctx { + /* data plane connection. */ + int socket; + bool disabled; + bool connecting; + bool use_nhg; + bool use_route_replace; + struct sockaddr_storage addr; + + /* data plane buffers. */ + struct stream *ibuf; + struct stream *obuf; + pthread_mutex_t obuf_mutex; + + /* + * data plane context queue: + * When a FPM server connection becomes a bottleneck, we must keep the + * data plane contexts until we get a chance to process them. + */ + struct dplane_ctx_list_head ctxqueue; + pthread_mutex_t ctxqueue_mutex; + + /* data plane events. */ + struct zebra_dplane_provider *prov; + struct frr_pthread *fthread; + struct event *t_connect; + struct event *t_read; + struct event *t_write; + struct event *t_event; + struct event *t_nhg; + struct event *t_dequeue; + + /* zebra events. */ + struct event *t_lspreset; + struct event *t_lspwalk; + struct event *t_nhgreset; + struct event *t_nhgwalk; + struct event *t_ribreset; + struct event *t_ribwalk; + struct event *t_rmacreset; + struct event *t_rmacwalk; + + /* Statistic counters. */ + struct { + /* Amount of bytes read into ibuf. */ + _Atomic uint32_t bytes_read; + /* Amount of bytes written from obuf. */ + _Atomic uint32_t bytes_sent; + /* Output buffer current usage. */ + _Atomic uint32_t obuf_bytes; + /* Output buffer peak usage. */ + _Atomic uint32_t obuf_peak; + + /* Amount of connection closes. */ + _Atomic uint32_t connection_closes; + /* Amount of connection errors. */ + _Atomic uint32_t connection_errors; + + /* Amount of user configurations: FNE_RECONNECT. */ + _Atomic uint32_t user_configures; + /* Amount of user disable requests: FNE_DISABLE. */ + _Atomic uint32_t user_disables; + + /* Amount of data plane context processed. */ + _Atomic uint32_t dplane_contexts; + /* Amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len; + /* Peak amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len_peak; + + /* Amount of buffer full events. */ + _Atomic uint32_t buffer_full; + } counters; +} *gfnc; + +enum fpm_nl_events { + /* Ask for FPM to reconnect the external server. */ + FNE_RECONNECT, + /* Disable FPM. */ + FNE_DISABLE, + /* Reset counters. */ + FNE_RESET_COUNTERS, + /* Toggle next hop group feature. */ + FNE_TOGGLE_NHG, + /* Reconnect request by our own code to avoid races. */ + FNE_INTERNAL_RECONNECT, + + /* LSP walk finished. */ + FNE_LSP_FINISHED, + /* Next hop groups walk finished. */ + FNE_NHG_FINISHED, + /* RIB walk finished. */ + FNE_RIB_FINISHED, + /* RMAC walk finished. */ + FNE_RMAC_FINISHED, +}; + +#define FPM_RECONNECT(fnc) \ + event_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \ + FNE_INTERNAL_RECONNECT, &(fnc)->t_event) + +#define WALK_FINISH(fnc, ev) \ + event_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \ + (ev), NULL) + +/* + * Prototypes. + */ +static void fpm_process_event(struct event *t); +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx); +static void fpm_lsp_send(struct event *t); +static void fpm_lsp_reset(struct event *t); +static void fpm_nhg_send(struct event *t); +static void fpm_nhg_reset(struct event *t); +static void fpm_rib_send(struct event *t); +static void fpm_rib_reset(struct event *t); +static void fpm_rmac_send(struct event *t); +static void fpm_rmac_reset(struct event *t); + +/* + * CLI. + */ +#define FPM_STR "Forwarding Plane Manager configuration\n" + +DEFUN(fpm_set_address, fpm_set_address_cmd, + "fpm address [port (1-65535)]", + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + uint16_t port = 0; + uint8_t naddr[INET6_BUFSIZ]; + + if (argc == 5) + port = strtol(argv[4]->arg, NULL, 10); + + /* Handle IPv4 addresses. */ + if (inet_pton(AF_INET, argv[2]->arg, naddr) == 1) { + sin = (struct sockaddr_in *)&gfnc->addr; + + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_port = + port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin->sin_len = sizeof(*sin); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin->sin_addr, naddr, sizeof(sin->sin_addr)); + + goto ask_reconnect; + } + + /* Handle IPv6 addresses. */ + if (inet_pton(AF_INET6, argv[2]->arg, naddr) != 1) { + vty_out(vty, "%% Invalid address: %s\n", argv[2]->arg); + return CMD_WARNING; + } + + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin6->sin6_len = sizeof(*sin6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin6->sin6_addr, naddr, sizeof(sin6->sin6_addr)); + +ask_reconnect: + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RECONNECT, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(no_fpm_set_address, no_fpm_set_address_cmd, + "no fpm address [ [port <1-65535>]]", + NO_STR + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_DISABLE, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_use_nhg, fpm_use_nhg_cmd, + "fpm use-next-hop-groups", + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already enabled. */ + if (gfnc->use_nhg) + return CMD_SUCCESS; + + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_nhg); + + return CMD_SUCCESS; +} + +DEFUN(no_fpm_use_nhg, no_fpm_use_nhg_cmd, + "no fpm use-next-hop-groups", + NO_STR + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already disabled. */ + if (!gfnc->use_nhg) + return CMD_SUCCESS; + + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_nhg); + + return CMD_SUCCESS; +} + +DEFUN(fpm_use_route_replace, fpm_use_route_replace_cmd, + "fpm use-route-replace", + FPM_STR + "Use netlink route replace semantics\n") +{ + gfnc->use_route_replace = true; + return CMD_SUCCESS; +} + +DEFUN(no_fpm_use_route_replace, no_fpm_use_route_replace_cmd, + "no fpm use-route-replace", + NO_STR + FPM_STR + "Use netlink route replace semantics\n") +{ + gfnc->use_route_replace = false; + return CMD_SUCCESS; +} + +DEFUN(fpm_reset_counters, fpm_reset_counters_cmd, + "clear fpm counters", + CLEAR_STR + FPM_STR + "FPM statistic counters\n") +{ + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RESET_COUNTERS, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters, fpm_show_counters_cmd, + "show fpm counters", + SHOW_STR + FPM_STR + "FPM statistic counters\n") +{ + vty_out(vty, "%30s\n%30s\n", "FPM counters", "============"); + +#define SHOW_COUNTER(label, counter) \ + vty_out(vty, "%28s: %u\n", (label), (counter)) + + SHOW_COUNTER("Input bytes", gfnc->counters.bytes_read); + SHOW_COUNTER("Output bytes", gfnc->counters.bytes_sent); + SHOW_COUNTER("Output buffer current size", gfnc->counters.obuf_bytes); + SHOW_COUNTER("Output buffer peak size", gfnc->counters.obuf_peak); + SHOW_COUNTER("Connection closes", gfnc->counters.connection_closes); + SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors); + SHOW_COUNTER("Data plane items processed", + gfnc->counters.dplane_contexts); + SHOW_COUNTER("Data plane items enqueued", + gfnc->counters.ctxqueue_len); + SHOW_COUNTER("Data plane items queue peak", + gfnc->counters.ctxqueue_len_peak); + SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full); + SHOW_COUNTER("User FPM configurations", gfnc->counters.user_configures); + SHOW_COUNTER("User FPM disable requests", gfnc->counters.user_disables); + +#undef SHOW_COUNTER + + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd, + "show fpm counters json", + SHOW_STR + FPM_STR + "FPM statistic counters\n" + JSON_STR) +{ + struct json_object *jo; + + jo = json_object_new_object(); + json_object_int_add(jo, "bytes-read", gfnc->counters.bytes_read); + json_object_int_add(jo, "bytes-sent", gfnc->counters.bytes_sent); + json_object_int_add(jo, "obuf-bytes", gfnc->counters.obuf_bytes); + json_object_int_add(jo, "obuf-bytes-peak", gfnc->counters.obuf_peak); + json_object_int_add(jo, "connection-closes", + gfnc->counters.connection_closes); + json_object_int_add(jo, "connection-errors", + gfnc->counters.connection_errors); + json_object_int_add(jo, "data-plane-contexts", + gfnc->counters.dplane_contexts); + json_object_int_add(jo, "data-plane-contexts-queue", + gfnc->counters.ctxqueue_len); + json_object_int_add(jo, "data-plane-contexts-queue-peak", + gfnc->counters.ctxqueue_len_peak); + json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full); + json_object_int_add(jo, "user-configures", + gfnc->counters.user_configures); + json_object_int_add(jo, "user-disables", gfnc->counters.user_disables); + vty_json(vty, jo); + + return CMD_SUCCESS; +} + +static int fpm_write_config(struct vty *vty) +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + int written = 0; + + if (gfnc->disabled) + return written; + + switch (gfnc->addr.ss_family) { + case AF_INET: + written = 1; + sin = (struct sockaddr_in *)&gfnc->addr; + vty_out(vty, "fpm address %pI4", &sin->sin_addr); + if (sin->sin_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin->sin_port)); + + vty_out(vty, "\n"); + break; + case AF_INET6: + written = 1; + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + vty_out(vty, "fpm address %pI6", &sin6->sin6_addr); + if (sin6->sin6_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin6->sin6_port)); + + vty_out(vty, "\n"); + break; + + default: + break; + } + + if (!gfnc->use_nhg) { + vty_out(vty, "no fpm use-next-hop-groups\n"); + written = 1; + } + + if (!gfnc->use_route_replace) { + vty_out(vty, "no fpm use-route-replace\n"); + written = 1; + } + + return written; +} + +static struct cmd_node fpm_node = { + .name = "fpm", + .node = FPM_NODE, + .prompt = "", + .config_write = fpm_write_config, +}; + +/* + * FPM functions. + */ +static void fpm_connect(struct event *t); + +static void fpm_reconnect(struct fpm_nl_ctx *fnc) +{ + /* Cancel all zebra threads first. */ + event_cancel_async(zrouter.master, &fnc->t_lspreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_lspwalk, NULL); + event_cancel_async(zrouter.master, &fnc->t_nhgreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_nhgwalk, NULL); + event_cancel_async(zrouter.master, &fnc->t_ribreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL); + event_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL); + + /* + * Grab the lock to empty the streams (data plane might try to + * enqueue updates while we are closing). + */ + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + /* Avoid calling close on `-1`. */ + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + stream_reset(fnc->ibuf); + stream_reset(fnc->obuf); + EVENT_OFF(fnc->t_read); + EVENT_OFF(fnc->t_write); + + /* FPM is disabled, don't attempt to connect. */ + if (fnc->disabled) + return; + + event_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); +} + +static void fpm_read(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + fpm_msg_hdr_t fpm; + ssize_t rv; + char buf[65535]; + struct nlmsghdr *hdr; + struct zebra_dplane_ctx *ctx; + size_t available_bytes; + size_t hdr_available_bytes; + + /* Let's ignore the input at the moment. */ + rv = stream_read_try(fnc->ibuf, fnc->socket, + STREAM_WRITEABLE(fnc->ibuf)); + if (rv == 0) { + atomic_fetch_add_explicit(&fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + + FPM_RECONNECT(fnc); + return; + } + if (rv == -1) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + FPM_RECONNECT(fnc); + return; + } + + /* Schedule the next read */ + event_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, + &fnc->t_read); + + /* We've got an interruption. */ + if (rv == -2) + return; + + + /* Account all bytes read. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_read, rv, + memory_order_relaxed); + + available_bytes = STREAM_READABLE(fnc->ibuf); + while (available_bytes) { + if (available_bytes < (ssize_t)FPM_MSG_HDR_LEN) { + stream_pulldown(fnc->ibuf); + return; + } + + fpm.version = stream_getc(fnc->ibuf); + fpm.msg_type = stream_getc(fnc->ibuf); + fpm.msg_len = stream_getw(fnc->ibuf); + + if (fpm.version != FPM_PROTO_VERSION && + fpm.msg_type != FPM_MSG_TYPE_NETLINK) { + stream_reset(fnc->ibuf); + zlog_warn( + "%s: Received version/msg_type %u/%u, expected 1/1", + __func__, fpm.version, fpm.msg_type); + + FPM_RECONNECT(fnc); + return; + } + + /* + * If the passed in length doesn't even fill in the header + * something is wrong and reset. + */ + if (fpm.msg_len < FPM_MSG_HDR_LEN) { + zlog_warn( + "%s: Received message length: %u that does not even fill the FPM header", + __func__, fpm.msg_len); + FPM_RECONNECT(fnc); + return; + } + + /* + * If we have not received the whole payload, reset the stream + * back to the beginning of the header and move it to the + * top. + */ + if (fpm.msg_len > available_bytes) { + stream_rewind_getp(fnc->ibuf, FPM_MSG_HDR_LEN); + stream_pulldown(fnc->ibuf); + return; + } + + available_bytes -= FPM_MSG_HDR_LEN; + + /* + * Place the data from the stream into a buffer + */ + hdr = (struct nlmsghdr *)buf; + stream_get(buf, fnc->ibuf, fpm.msg_len - FPM_MSG_HDR_LEN); + hdr_available_bytes = fpm.msg_len - FPM_MSG_HDR_LEN; + available_bytes -= hdr_available_bytes; + + /* Sanity check: must be at least header size. */ + if (hdr->nlmsg_len < sizeof(*hdr)) { + zlog_warn( + "%s: [seq=%u] invalid message length %u (< %zu)", + __func__, hdr->nlmsg_seq, hdr->nlmsg_len, + sizeof(*hdr)); + continue; + } + if (hdr->nlmsg_len > fpm.msg_len) { + zlog_warn( + "%s: Received a inner header length of %u that is greater than the fpm total length of %u", + __func__, hdr->nlmsg_len, fpm.msg_len); + FPM_RECONNECT(fnc); + } + /* Not enough bytes available. */ + if (hdr->nlmsg_len > hdr_available_bytes) { + zlog_warn( + "%s: [seq=%u] invalid message length %u (> %zu)", + __func__, hdr->nlmsg_seq, hdr->nlmsg_len, + available_bytes); + continue; + } + + if (!(hdr->nlmsg_flags & NLM_F_REQUEST)) { + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: [seq=%u] not a request, skipping", + __func__, hdr->nlmsg_seq); + + /* + * This request is a bust, go to the next one + */ + continue; + } + + switch (hdr->nlmsg_type) { + case RTM_NEWROUTE: + ctx = dplane_ctx_alloc(); + dplane_ctx_route_init(ctx, DPLANE_OP_ROUTE_NOTIFY, NULL, + NULL); + if (netlink_route_change_read_unicast_internal( + hdr, 0, false, ctx) != 1) { + dplane_ctx_fini(&ctx); + stream_pulldown(fnc->ibuf); + /* + * Let's continue to read other messages + * Even if we ignore this one. + */ + } + break; + default: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: Received message type %u which is not currently handled", + __func__, hdr->nlmsg_type); + break; + } + } + + stream_reset(fnc->ibuf); +} + +static void fpm_write(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + socklen_t statuslen; + ssize_t bwritten; + int rv, status; + size_t btotal; + + if (fnc->connecting == true) { + status = 0; + statuslen = sizeof(status); + + rv = getsockopt(fnc->socket, SOL_SOCKET, SO_ERROR, &status, + &statuslen); + if (rv == -1 || status != 0) { + if (rv != -1) + zlog_warn("%s: connection failed: %s", __func__, + strerror(status)); + else + zlog_warn("%s: SO_ERROR failed: %s", __func__, + strerror(status)); + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + + FPM_RECONNECT(fnc); + return; + } + + fnc->connecting = false; + + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + */ + event_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); + + /* Permit receiving messages now. */ + event_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, + &fnc->t_read); + } + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + while (true) { + /* Stream is empty: reset pointers and return. */ + if (STREAM_READABLE(fnc->obuf) == 0) { + stream_reset(fnc->obuf); + break; + } + + /* Try to write all at once. */ + btotal = stream_get_endp(fnc->obuf) - + stream_get_getp(fnc->obuf); + bwritten = write(fnc->socket, stream_pnt(fnc->obuf), btotal); + if (bwritten == 0) { + atomic_fetch_add_explicit( + &fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + break; + } + if (bwritten == -1) { + /* Attempt to continue if blocked by a signal. */ + if (errno == EINTR) + continue; + /* Receiver is probably slow, lets give it some time. */ + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + + FPM_RECONNECT(fnc); + return; + } + + /* Account all bytes sent. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_sent, bwritten, + memory_order_relaxed); + + /* Account number of bytes free. */ + atomic_fetch_sub_explicit(&fnc->counters.obuf_bytes, bwritten, + memory_order_relaxed); + + stream_forward_getp(fnc->obuf, (size_t)bwritten); + } + + /* Stream is not empty yet, we must schedule more writes. */ + if (STREAM_READABLE(fnc->obuf)) { + stream_pulldown(fnc->obuf); + event_add_write(fnc->fthread->master, fpm_write, fnc, + fnc->socket, &fnc->t_write); + return; + } +} + +static void fpm_connect(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct sockaddr_in *sin = (struct sockaddr_in *)&fnc->addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&fnc->addr; + socklen_t slen; + int rv, sock; + char addrstr[INET6_ADDRSTRLEN]; + + sock = socket(fnc->addr.ss_family, SOCK_STREAM, 0); + if (sock == -1) { + zlog_err("%s: fpm socket failed: %s", __func__, + strerror(errno)); + event_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return; + } + + set_nonblocking(sock); + + if (fnc->addr.ss_family == AF_INET) { + inet_ntop(AF_INET, &sin->sin_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin); + } else { + inet_ntop(AF_INET6, &sin6->sin6_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin6); + } + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: attempting to connect to %s:%d", __func__, + addrstr, ntohs(sin->sin_port)); + + rv = connect(sock, (struct sockaddr *)&fnc->addr, slen); + if (rv == -1 && errno != EINPROGRESS) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + close(sock); + zlog_warn("%s: fpm connection failed: %s", __func__, + strerror(errno)); + event_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return; + } + + fnc->connecting = (errno == EINPROGRESS); + fnc->socket = sock; + if (!fnc->connecting) + event_add_read(fnc->fthread->master, fpm_read, fnc, sock, + &fnc->t_read); + event_add_write(fnc->fthread->master, fpm_write, fnc, sock, + &fnc->t_write); + + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + * + * If we are not connected, then delay the objects reset/send. + */ + if (!fnc->connecting) + event_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); +} + +/** + * Encode data plane operation context into netlink and enqueue it in the FPM + * output buffer. + * + * @param fnc the netlink FPM context. + * @param ctx the data plane operation context data. + * @return 0 on success or -1 on not enough space. + */ +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) +{ + uint8_t nl_buf[NL_PKT_BUF_SIZE]; + size_t nl_buf_len; + ssize_t rv; + uint64_t obytes, obytes_peak; + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + /* + * If we were configured to not use next hop groups, then quit as soon + * as possible. + */ + if ((!fnc->use_nhg) + && (op == DPLANE_OP_NH_DELETE || op == DPLANE_OP_NH_INSTALL + || op == DPLANE_OP_NH_UPDATE)) + return 0; + + nl_buf_len = 0; + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + /* + * If route replace is enabled then directly encode the install which + * is going to use `NLM_F_REPLACE` (instead of delete/add operations). + */ + if (fnc->use_route_replace && op == DPLANE_OP_ROUTE_UPDATE) + op = DPLANE_OP_ROUTE_INSTALL; + + switch (op) { + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + rv = netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, + nl_buf, sizeof(nl_buf), + true, fnc->use_nhg, + false); + if (rv <= 0) { + zlog_err( + "%s: netlink_route_multipath_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + + /* UPDATE operations need a INSTALL, otherwise just quit. */ + if (op == DPLANE_OP_ROUTE_DELETE) + break; + + /* FALL THROUGH */ + case DPLANE_OP_ROUTE_INSTALL: + rv = netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, + &nl_buf[nl_buf_len], + sizeof(nl_buf) - + nl_buf_len, + true, fnc->use_nhg, + fnc->use_route_replace); + if (rv <= 0) { + zlog_err( + "%s: netlink_route_multipath_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + rv = netlink_macfdb_update_ctx(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_macfdb_update_ctx failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_NH_DELETE: + rv = netlink_nexthop_msg_encode(RTM_DELNEXTHOP, ctx, nl_buf, + sizeof(nl_buf), true); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + rv = netlink_nexthop_msg_encode(RTM_NEWNEXTHOP, ctx, nl_buf, + sizeof(nl_buf), true); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + rv = netlink_lsp_msg_encoder(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_lsp_msg_encoder failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + /* Un-handled by FPM at this time. */ + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + case DPLANE_OP_NONE: + case DPLANE_OP_STARTUP_STAGE: + break; + + } + + /* Skip empty enqueues. */ + if (nl_buf_len == 0) + return 0; + + /* We must know if someday a message goes beyond 65KiB. */ + assert((nl_buf_len + FPM_HEADER_SIZE) <= UINT16_MAX); + + /* Check if we have enough buffer space. */ + if (STREAM_WRITEABLE(fnc->obuf) < (nl_buf_len + FPM_HEADER_SIZE)) { + atomic_fetch_add_explicit(&fnc->counters.buffer_full, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: buffer full: wants to write %zu but has %zu", + __func__, nl_buf_len + FPM_HEADER_SIZE, + STREAM_WRITEABLE(fnc->obuf)); + + return -1; + } + + /* + * Fill in the FPM header information. + * + * See FPM_HEADER_SIZE definition for more information. + */ + stream_putc(fnc->obuf, 1); + stream_putc(fnc->obuf, 1); + stream_putw(fnc->obuf, nl_buf_len + FPM_HEADER_SIZE); + + /* Write current data. */ + stream_write(fnc->obuf, nl_buf, (size_t)nl_buf_len); + + /* Account number of bytes waiting to be written. */ + atomic_fetch_add_explicit(&fnc->counters.obuf_bytes, + nl_buf_len + FPM_HEADER_SIZE, + memory_order_relaxed); + obytes = atomic_load_explicit(&fnc->counters.obuf_bytes, + memory_order_relaxed); + obytes_peak = atomic_load_explicit(&fnc->counters.obuf_peak, + memory_order_relaxed); + if (obytes_peak < obytes) + atomic_store_explicit(&fnc->counters.obuf_peak, obytes, + memory_order_relaxed); + + /* Tell the thread to start writing. */ + event_add_write(fnc->fthread->master, fpm_write, fnc, fnc->socket, + &fnc->t_write); + + return 0; +} + +/* + * LSP walk/send functions + */ +struct fpm_lsp_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_lsp_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_lsp *lsp = bucket->data; + struct fpm_lsp_arg *fla = arg; + + /* Skip entries which have already been sent */ + if (CHECK_FLAG(lsp->flags, LSP_FLAG_FPM)) + return HASHWALK_CONTINUE; + + dplane_ctx_reset(fla->ctx); + dplane_ctx_lsp_init(fla->ctx, DPLANE_OP_LSP_INSTALL, lsp); + + if (fpm_nl_enqueue(fla->fnc, fla->ctx) == -1) { + fla->complete = false; + return HASHWALK_ABORT; + } + + /* Mark entry as sent */ + SET_FLAG(lsp->flags, LSP_FLAG_FPM); + return HASHWALK_CONTINUE; +} + +static void fpm_lsp_send(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + struct fpm_lsp_arg fla; + + fla.fnc = fnc; + fla.ctx = dplane_ctx_alloc(); + fla.complete = true; + + hash_walk(zvrf->lsp_table, fpm_lsp_send_cb, &fla); + + dplane_ctx_fini(&fla.ctx); + + if (fla.complete) { + WALK_FINISH(fnc, FNE_LSP_FINISHED); + + /* Now move onto routes */ + event_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, + &fnc->t_nhgreset); + } else { + /* Didn't finish - reschedule LSP walk */ + event_add_timer(zrouter.master, fpm_lsp_send, fnc, 0, + &fnc->t_lspwalk); + } +} + +/* + * Next hop walk/send functions. + */ +struct fpm_nhg_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_nhg_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + struct fpm_nhg_arg *fna = arg; + + /* This entry was already sent, skip it. */ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_FPM)) + return HASHWALK_CONTINUE; + + /* Reset ctx to reuse allocated memory, take a snapshot and send it. */ + dplane_ctx_reset(fna->ctx); + dplane_ctx_nexthop_init(fna->ctx, DPLANE_OP_NH_INSTALL, nhe); + if (fpm_nl_enqueue(fna->fnc, fna->ctx) == -1) { + /* Our buffers are full, lets give it some cycles. */ + fna->complete = false; + return HASHWALK_ABORT; + } + + /* Mark group as sent, so it doesn't get sent again. */ + SET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); + + return HASHWALK_CONTINUE; +} + +static void fpm_nhg_send(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct fpm_nhg_arg fna; + + fna.fnc = fnc; + fna.ctx = dplane_ctx_alloc(); + fna.complete = true; + + /* Send next hops. */ + if (fnc->use_nhg) + hash_walk(zrouter.nhgs_id, fpm_nhg_send_cb, &fna); + + /* `free()` allocated memory. */ + dplane_ctx_fini(&fna.ctx); + + /* We are done sending next hops, lets install the routes now. */ + if (fna.complete) { + WALK_FINISH(fnc, FNE_NHG_FINISHED); + event_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, + &fnc->t_ribreset); + } else /* Otherwise reschedule next hop group again. */ + event_add_timer(zrouter.master, fpm_nhg_send, fnc, 0, + &fnc->t_nhgwalk); +} + +/** + * Send all RIB installed routes to the connected data plane. + */ +static void fpm_rib_send(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + struct zebra_dplane_ctx *ctx; + rib_tables_iter_t rt_iter; + + /* Allocate temporary context for all transactions. */ + ctx = dplane_ctx_alloc(); + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL || dest->selected_fib == NULL) + continue; + + /* Check for already sent routes. */ + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) + continue; + + /* Enqueue route install. */ + dplane_ctx_reset(ctx); + dplane_ctx_route_init(ctx, DPLANE_OP_ROUTE_INSTALL, rn, + dest->selected_fib); + if (fpm_nl_enqueue(fnc, ctx) == -1) { + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + event_add_timer(zrouter.master, fpm_rib_send, + fnc, 1, &fnc->t_ribwalk); + return; + } + + /* Mark as sent. */ + SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + /* All RIB routes sent! */ + WALK_FINISH(fnc, FNE_RIB_FINISHED); + + /* Schedule next event: RMAC reset. */ + event_add_event(zrouter.master, fpm_rmac_reset, fnc, 0, + &fnc->t_rmacreset); +} + +/* + * The next three functions will handle RMAC enqueue. + */ +struct fpm_rmac_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + struct zebra_l3vni *zl3vni; + bool complete; +}; + +static void fpm_enqueue_rmac_table(struct hash_bucket *bucket, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + struct zebra_mac *zrmac = bucket->data; + struct zebra_if *zif = fra->zl3vni->vxlan_if->info; + struct zebra_vxlan_vni *vni; + struct zebra_if *br_zif; + vlanid_t vid; + bool sticky; + + /* Entry already sent. */ + if (CHECK_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT) || !fra->complete) + return; + + sticky = !!CHECK_FLAG(zrmac->flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); + br_zif = (struct zebra_if *)(zif->brslave_info.br_if->info); + vni = zebra_vxlan_if_vni_find(zif, fra->zl3vni->vni); + vid = IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) ? vni->access_vlan : 0; + + dplane_ctx_reset(fra->ctx); + dplane_ctx_set_op(fra->ctx, DPLANE_OP_MAC_INSTALL); + dplane_mac_init(fra->ctx, fra->zl3vni->vxlan_if, + zif->brslave_info.br_if, vid, &zrmac->macaddr, vni->vni, + zrmac->fwd_info.r_vtep_ip, sticky, 0 /*nhg*/, + 0 /*update_flags*/); + if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) { + event_add_timer(zrouter.master, fpm_rmac_send, fra->fnc, 1, + &fra->fnc->t_rmacwalk); + fra->complete = false; + } +} + +static void fpm_enqueue_l3vni_table(struct hash_bucket *bucket, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + struct zebra_l3vni *zl3vni = bucket->data; + + fra->zl3vni = zl3vni; + hash_iterate(zl3vni->rmac_table, fpm_enqueue_rmac_table, zl3vni); +} + +static void fpm_rmac_send(struct event *t) +{ + struct fpm_rmac_arg fra; + + fra.fnc = EVENT_ARG(t); + fra.ctx = dplane_ctx_alloc(); + fra.complete = true; + hash_iterate(zrouter.l3vni_table, fpm_enqueue_l3vni_table, &fra); + dplane_ctx_fini(&fra.ctx); + + /* RMAC walk completed. */ + if (fra.complete) + WALK_FINISH(fra.fnc, FNE_RMAC_FINISHED); +} + +/* + * Resets the next hop FPM flags so we send all next hops again. + */ +static void fpm_nhg_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + + /* Unset FPM installation flag so it gets installed again. */ + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); +} + +static void fpm_nhg_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + + hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL); + + /* Schedule next step: send next hop groups. */ + event_add_event(zrouter.master, fpm_nhg_send, fnc, 0, &fnc->t_nhgwalk); +} + +/* + * Resets the LSP FPM flag so we send all LSPs again. + */ +static void fpm_lsp_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_lsp *lsp = bucket->data; + + UNSET_FLAG(lsp->flags, LSP_FLAG_FPM); +} + +static void fpm_lsp_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + hash_iterate(zvrf->lsp_table, fpm_lsp_reset_cb, NULL); + + /* Schedule next step: send LSPs */ + event_add_event(zrouter.master, fpm_lsp_send, fnc, 0, &fnc->t_lspwalk); +} + +/** + * Resets the RIB FPM flags so we send all routes again. + */ +static void fpm_rib_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + rib_tables_iter_t rt_iter; + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL) + continue; + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Schedule next step: send RIB routes. */ + event_add_event(zrouter.master, fpm_rib_send, fnc, 0, &fnc->t_ribwalk); +} + +/* + * The next three function will handle RMAC table reset. + */ +static void fpm_unset_rmac_table(struct hash_bucket *bucket, void *arg) +{ + struct zebra_mac *zrmac = bucket->data; + + UNSET_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT); +} + +static void fpm_unset_l3vni_table(struct hash_bucket *bucket, void *arg) +{ + struct zebra_l3vni *zl3vni = bucket->data; + + hash_iterate(zl3vni->rmac_table, fpm_unset_rmac_table, zl3vni); +} + +static void fpm_rmac_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + + hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL); + + /* Schedule next event: send RMAC entries. */ + event_add_event(zrouter.master, fpm_rmac_send, fnc, 0, + &fnc->t_rmacwalk); +} + +static void fpm_process_queue(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct zebra_dplane_ctx *ctx; + bool no_bufs = false; + uint64_t processed_contexts = 0; + + while (true) { + /* No space available yet. */ + if (STREAM_WRITEABLE(fnc->obuf) < NL_PKT_BUF_SIZE) { + no_bufs = true; + break; + } + + /* Dequeue next item or quit processing. */ + frr_with_mutex (&fnc->ctxqueue_mutex) { + ctx = dplane_ctx_dequeue(&fnc->ctxqueue); + } + if (ctx == NULL) + break; + + /* + * Intentionally ignoring the return value + * as that we are ensuring that we can write to + * the output data in the STREAM_WRITEABLE + * check above, so we can ignore the return + */ + if (fnc->socket != -1) + (void)fpm_nl_enqueue(fnc, ctx); + + /* Account the processed entries. */ + processed_contexts++; + atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1, + memory_order_relaxed); + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(fnc->prov, ctx); + } + + /* Update count of processed contexts */ + atomic_fetch_add_explicit(&fnc->counters.dplane_contexts, + processed_contexts, memory_order_relaxed); + + /* Re-schedule if we ran out of buffer space */ + if (no_bufs) + event_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0, + &fnc->t_dequeue); + + /* + * Let the dataplane thread know if there are items in the + * output queue to be processed. Otherwise they may sit + * until the dataplane thread gets scheduled for new, + * unrelated work. + */ + if (dplane_provider_out_ctx_queue_len(fnc->prov) > 0) + dplane_provider_work_ready(); +} + +/** + * Handles external (e.g. CLI, data plane or others) events. + */ +static void fpm_process_event(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + enum fpm_nl_events event = EVENT_VAL(t); + + switch (event) { + case FNE_DISABLE: + zlog_info("%s: manual FPM disable event", __func__); + fnc->disabled = true; + atomic_fetch_add_explicit(&fnc->counters.user_disables, 1, + memory_order_relaxed); + + /* Call reconnect to disable timers and clean up context. */ + fpm_reconnect(fnc); + break; + + case FNE_RECONNECT: + zlog_info("%s: manual FPM reconnect event", __func__); + fnc->disabled = false; + atomic_fetch_add_explicit(&fnc->counters.user_configures, 1, + memory_order_relaxed); + fpm_reconnect(fnc); + break; + + case FNE_RESET_COUNTERS: + zlog_info("%s: manual FPM counters reset event", __func__); + memset(&fnc->counters, 0, sizeof(fnc->counters)); + break; + + case FNE_TOGGLE_NHG: + zlog_info("%s: toggle next hop groups support", __func__); + fnc->use_nhg = !fnc->use_nhg; + fpm_reconnect(fnc); + break; + + case FNE_INTERNAL_RECONNECT: + fpm_reconnect(fnc); + break; + + case FNE_NHG_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: next hop groups walk finished", + __func__); + break; + case FNE_RIB_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: RIB walk finished", __func__); + break; + case FNE_RMAC_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: RMAC walk finished", __func__); + break; + case FNE_LSP_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: LSP walk finished", __func__); + break; + } +} + +/* + * Data plane functions. + */ +static int fpm_nl_start(struct zebra_dplane_provider *prov) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + fnc->fthread = frr_pthread_new(NULL, prov_name, prov_name); + assert(frr_pthread_run(fnc->fthread, NULL) == 0); + fnc->ibuf = stream_new(NL_PKT_BUF_SIZE); + fnc->obuf = stream_new(NL_PKT_BUF_SIZE * 128); + pthread_mutex_init(&fnc->obuf_mutex, NULL); + fnc->socket = -1; + fnc->disabled = true; + fnc->prov = prov; + dplane_ctx_q_init(&fnc->ctxqueue); + pthread_mutex_init(&fnc->ctxqueue_mutex, NULL); + + /* Set default values. */ + fnc->use_nhg = true; + fnc->use_route_replace = true; + + return 0; +} + +static int fpm_nl_finish_early(struct fpm_nl_ctx *fnc) +{ + /* Disable all events and close socket. */ + EVENT_OFF(fnc->t_lspreset); + EVENT_OFF(fnc->t_lspwalk); + EVENT_OFF(fnc->t_nhgreset); + EVENT_OFF(fnc->t_nhgwalk); + EVENT_OFF(fnc->t_ribreset); + EVENT_OFF(fnc->t_ribwalk); + EVENT_OFF(fnc->t_rmacreset); + EVENT_OFF(fnc->t_rmacwalk); + EVENT_OFF(fnc->t_event); + EVENT_OFF(fnc->t_nhg); + event_cancel_async(fnc->fthread->master, &fnc->t_read, NULL); + event_cancel_async(fnc->fthread->master, &fnc->t_write, NULL); + event_cancel_async(fnc->fthread->master, &fnc->t_connect, NULL); + + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + return 0; +} + +static int fpm_nl_finish_late(struct fpm_nl_ctx *fnc) +{ + /* Stop the running thread. */ + frr_pthread_stop(fnc->fthread, NULL); + + /* Free all allocated resources. */ + pthread_mutex_destroy(&fnc->obuf_mutex); + pthread_mutex_destroy(&fnc->ctxqueue_mutex); + stream_free(fnc->ibuf); + stream_free(fnc->obuf); + free(gfnc); + gfnc = NULL; + + return 0; +} + +static int fpm_nl_finish(struct zebra_dplane_provider *prov, bool early) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + if (early) + return fpm_nl_finish_early(fnc); + + return fpm_nl_finish_late(fnc); +} + +static int fpm_nl_process(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + int counter, limit; + uint64_t cur_queue, peak_queue = 0, stored_peak_queue; + + fnc = dplane_provider_get_data(prov); + limit = dplane_provider_get_work_limit(prov); + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + + /* + * Skip all notifications if not connected, we'll walk the RIB + * anyway. + */ + if (fnc->socket != -1 && fnc->connecting == false) { + /* + * Update the number of queued contexts *before* + * enqueueing, to ensure counter consistency. + */ + atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len, + 1, memory_order_relaxed); + + frr_with_mutex (&fnc->ctxqueue_mutex) { + dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); + } + + cur_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len, + memory_order_relaxed); + if (peak_queue < cur_queue) + peak_queue = cur_queue; + continue; + } + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + /* Update peak queue length, if we just observed a new peak */ + stored_peak_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len_peak, memory_order_relaxed); + if (stored_peak_queue < peak_queue) + atomic_store_explicit(&fnc->counters.ctxqueue_len_peak, + peak_queue, memory_order_relaxed); + + if (atomic_load_explicit(&fnc->counters.ctxqueue_len, + memory_order_relaxed) + > 0) + event_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0, + &fnc->t_dequeue); + + /* Ensure dataplane thread is rescheduled if we hit the work limit */ + if (counter >= limit) + dplane_provider_work_ready(); + + return 0; +} + +static int fpm_nl_new(struct event_loop *tm) +{ + struct zebra_dplane_provider *prov = NULL; + int rv; + + gfnc = calloc(1, sizeof(*gfnc)); + rv = dplane_provider_register(prov_name, DPLANE_PRIO_POSTPROCESS, + DPLANE_PROV_FLAG_THREADED, fpm_nl_start, + fpm_nl_process, fpm_nl_finish, gfnc, + &prov); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s register status: %d", prov_name, rv); + + install_node(&fpm_node); + install_element(ENABLE_NODE, &fpm_show_counters_cmd); + install_element(ENABLE_NODE, &fpm_show_counters_json_cmd); + install_element(ENABLE_NODE, &fpm_reset_counters_cmd); + install_element(CONFIG_NODE, &fpm_set_address_cmd); + install_element(CONFIG_NODE, &no_fpm_set_address_cmd); + install_element(CONFIG_NODE, &fpm_use_nhg_cmd); + install_element(CONFIG_NODE, &no_fpm_use_nhg_cmd); + install_element(CONFIG_NODE, &fpm_use_route_replace_cmd); + install_element(CONFIG_NODE, &no_fpm_use_route_replace_cmd); + + return 0; +} + +static int fpm_nl_init(void) +{ + hook_register(frr_late_init, fpm_nl_new); + return 0; +} + +FRR_MODULE_SETUP( + .name = "dplane_fpm_nl", + .version = "0.0.1", + .description = "Data plane plugin for FPM using netlink.", + .init = fpm_nl_init, +); diff --git a/zebra/if_ioctl.c b/zebra/if_ioctl.c new file mode 100644 index 0000000..b3cf865 --- /dev/null +++ b/zebra/if_ioctl.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Interface looking up by ioctl (). + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#ifdef OPEN_BSD + +#include "if.h" +#include "sockunion.h" +#include "prefix.h" +#include "ioctl.h" +#include "connected.h" +#include "memory.h" +#include "log.h" +#include "vrf.h" +#include "vty.h" +#include "lib_errors.h" + +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zebra_errors.h" + +#include + +/* Interface looking up using infamous SIOCGIFCONF. */ +static int interface_list_ioctl(void) +{ + int ret; + int sock; +#define IFNUM_BASE 32 + int ifnum; + struct ifreq *ifreq; + struct ifconf ifconf; + struct interface *ifp; + int n; + int lastlen; + + /* Normally SIOCGIFCONF works with AF_INET socket. */ + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't make AF_INET socket stream: %s", + safe_strerror(errno)); + return -1; + } + +/* Set initial ifreq count. This will be double when SIOCGIFCONF + fail. Solaris has SIOCGIFNUM. */ +#ifdef SIOCGIFNUM + ret = ioctl(sock, SIOCGIFNUM, &ifnum); + if (ret < 0) + ifnum = IFNUM_BASE; + else + ifnum++; +#else + ifnum = IFNUM_BASE; +#endif /* SIOCGIFNUM */ + + ifconf.ifc_buf = NULL; + + lastlen = 0; + /* Loop until SIOCGIFCONF success. */ + for (;;) { + ifconf.ifc_len = sizeof(struct ifreq) * ifnum; + ifconf.ifc_buf = + XREALLOC(MTYPE_TMP, ifconf.ifc_buf, ifconf.ifc_len); + + ret = ioctl(sock, SIOCGIFCONF, &ifconf); + + if (ret < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "SIOCGIFCONF: %s", + safe_strerror(errno)); + goto end; + } + /* Repeatedly get info til buffer fails to grow. */ + if (ifconf.ifc_len > lastlen) { + lastlen = ifconf.ifc_len; + ifnum += 10; + continue; + } + /* Success. */ + break; + } + + /* Allocate interface. */ + ifreq = ifconf.ifc_req; + +#ifdef OPEN_BSD + for (n = 0; n < ifconf.ifc_len;) { + unsigned int size; + + ifreq = (struct ifreq *)((caddr_t)ifconf.ifc_req + n); + ifp = if_get_by_name(ifreq->ifr_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_add_update(ifp); + size = ifreq->ifr_addr.sa_len; + if (size < sizeof(ifreq->ifr_addr)) + size = sizeof(ifreq->ifr_addr); + size += sizeof(ifreq->ifr_name); + n += size; + } +#else + for (n = 0; n < ifconf.ifc_len; n += sizeof(struct ifreq)) { + ifp = if_get_by_name(ifreq->ifr_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_add_update(ifp); + ifreq++; + } +#endif /* OPEN_BSD */ + +end: + close(sock); + XFREE(MTYPE_TMP, ifconf.ifc_buf); + + return ret; +} + +/* Get interface's index by ioctl. */ +static int if_get_index(struct interface *ifp) +{ + if_set_index(ifp, if_nametoindex(ifp->name)); + return ifp->ifindex; +} + +#ifdef SIOCGIFHWADDR +static int if_get_hwaddr(struct interface *ifp) +{ + int ret; + struct ifreq ifreq; + int i; + + strlcpy(ifreq.ifr_name, ifp->name, sizeof(ifreq.ifr_name)); + ifreq.ifr_addr.sa_family = AF_INET; + + /* Fetch Hardware address if available. */ + ret = vrf_if_ioctl(SIOCGIFHWADDR, (caddr_t)&ifreq, ifp->vrf->vrf_id); + if (ret < 0) + ifp->hw_addr_len = 0; + else { + memcpy(ifp->hw_addr, ifreq.ifr_hwaddr.sa_data, 6); + + for (i = 0; i < 6; i++) + if (ifp->hw_addr[i] != 0) + break; + + if (i == 6) + ifp->hw_addr_len = 0; + else + ifp->hw_addr_len = 6; + } + return 0; +} +#endif /* SIOCGIFHWADDR */ + +static int if_getaddrs(void) +{ + int ret; + struct ifaddrs *ifap; + struct ifaddrs *ifapfree; + struct interface *ifp; + int prefixlen; + + ret = getifaddrs(&ifap); + if (ret != 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "getifaddrs(): %s", + safe_strerror(errno)); + return -1; + } + + for (ifapfree = ifap; ifap; ifap = ifap->ifa_next) { + if (ifap->ifa_addr == NULL) { + flog_err( + EC_LIB_INTERFACE, + "%s: nonsensical ifaddr with NULL ifa_addr, ifname %s", + __func__, + (ifap->ifa_name ? ifap->ifa_name : "(null)")); + continue; + } + + ifp = if_lookup_by_name(ifap->ifa_name, VRF_DEFAULT); + if (ifp == NULL) { + flog_err(EC_LIB_INTERFACE, + "%s: Can't lookup interface %s", __func__, + ifap->ifa_name); + continue; + } + + if (ifap->ifa_addr->sa_family == AF_INET) { + struct sockaddr_in *addr; + struct sockaddr_in *mask; + struct sockaddr_in *dest; + struct in_addr *dest_pnt; + int flags = 0; + + addr = (struct sockaddr_in *)ifap->ifa_addr; + mask = (struct sockaddr_in *)ifap->ifa_netmask; + prefixlen = ip_masklen(mask->sin_addr); + + dest_pnt = NULL; + + if (if_is_pointopoint(ifp) && ifap->ifa_dstaddr + && !IPV4_ADDR_SAME(&addr->sin_addr, + &((struct sockaddr_in *) + ifap->ifa_dstaddr) + ->sin_addr)) { + dest = (struct sockaddr_in *)ifap->ifa_dstaddr; + dest_pnt = &dest->sin_addr; + flags = ZEBRA_IFA_PEER; + } else if (ifap->ifa_broadaddr + && !IPV4_ADDR_SAME( + &addr->sin_addr, + &((struct sockaddr_in *) + ifap->ifa_broadaddr) + ->sin_addr)) { + dest = (struct sockaddr_in *) + ifap->ifa_broadaddr; + dest_pnt = &dest->sin_addr; + } + + connected_add_ipv4(ifp, flags, &addr->sin_addr, + prefixlen, dest_pnt, NULL, + METRIC_MAX); + } + if (ifap->ifa_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr; + struct sockaddr_in6 *mask; + int flags = 0; + + addr = (struct sockaddr_in6 *)ifap->ifa_addr; + mask = (struct sockaddr_in6 *)ifap->ifa_netmask; + prefixlen = ip6_masklen(mask->sin6_addr); + +#if defined(KAME) + if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { + addr->sin6_scope_id = + ntohs(*(uint16_t *)&addr->sin6_addr + .s6_addr[2]); + addr->sin6_addr.s6_addr[2] = + addr->sin6_addr.s6_addr[3] = 0; + } +#endif + + connected_add_ipv6(ifp, flags, &addr->sin6_addr, NULL, + prefixlen, NULL, METRIC_MAX); + } + } + + freeifaddrs(ifapfree); + + return 0; +} + +/* Fetch interface information via ioctl(). */ +static void interface_info_ioctl() +{ + struct vrf *vrf = vrf_lookup_by_id(VRF_DEFAULT); + struct interface *ifp; + + FOR_ALL_INTERFACES (vrf, ifp) { + if_get_index(ifp); +#ifdef SIOCGIFHWADDR + if_get_hwaddr(ifp); +#endif /* SIOCGIFHWADDR */ + if_get_flags(ifp); + if_get_mtu(ifp); + if_get_metric(ifp); + } +} + +/* Lookup all interface information. */ +void interface_list(struct zebra_ns *zns) +{ + + zlog_info("%s: NS %u", __func__, zns->ns_id); + +/* Linux can do both proc & ioctl, ioctl is the only way to get + interface aliases in 2.2 series kernels. */ +#ifdef HAVE_PROC_NET_DEV + interface_list_proc(); +#endif /* HAVE_PROC_NET_DEV */ + interface_list_ioctl(); + + /* After listing is done, get index, address, flags and other + interface's information. */ + interface_info_ioctl(); + + if_getaddrs(); + +#if defined(HAVE_PROC_NET_IF_INET6) + /* Linux provides interface's IPv6 address via + /proc/net/if_inet6. */ + ifaddr_proc_ipv6(); +#endif /* HAVE_PROC_NET_IF_INET6 */ +} + +#endif /* OPEN_BSD */ diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c new file mode 100644 index 0000000..ed2e0a2 --- /dev/null +++ b/zebra/if_netlink.c @@ -0,0 +1,2087 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Interface looking up by netlink. + * Copyright (C) 1998 Kunihiro Ishiguro + */ + +#include + +#ifdef GNU_LINUX + +/* The following definition is to workaround an issue in the Linux kernel + * header files with redefinition of 'struct in6_addr' in both + * netinet/in.h and linux/in6.h. + * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html + */ +#define _LINUX_IN6_H +#define _LINUX_IF_H +#define _LINUX_IP_H + +#include +#include +#include +#include +#include +#include +#include + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "rib.h" +#include "frrevent.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "vrf_int.h" +#include "mpls.h" +#include "lib_errors.h" + +#include "vty.h" +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_mpls.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_l2.h" +#include "zebra/netconf_netlink.h" +#include "zebra/zebra_trace.h" + +extern struct zebra_privs_t zserv_privs; + +/* Utility function to parse hardware link-layer address and update ifp */ +static void netlink_interface_update_hw_addr(struct rtattr **tb, + struct zebra_dplane_ctx *ctx) +{ + if (tb[IFLA_ADDRESS]) { + int hw_addr_len; + + hw_addr_len = RTA_PAYLOAD(tb[IFLA_ADDRESS]); + + if (hw_addr_len > INTERFACE_HWADDR_MAX) + zlog_warn("Hardware address is too large: %d", + hw_addr_len); + else + dplane_ctx_set_ifp_hw_addr(ctx, hw_addr_len, + RTA_DATA(tb[IFLA_ADDRESS])); + } +} + +static enum zebra_link_type netlink_to_zebra_link_type(unsigned int hwt) +{ + switch (hwt) { + case ARPHRD_ETHER: + return ZEBRA_LLT_ETHER; + case ARPHRD_EETHER: + return ZEBRA_LLT_EETHER; + case ARPHRD_AX25: + return ZEBRA_LLT_AX25; + case ARPHRD_PRONET: + return ZEBRA_LLT_PRONET; + case ARPHRD_IEEE802: + return ZEBRA_LLT_IEEE802; + case ARPHRD_ARCNET: + return ZEBRA_LLT_ARCNET; + case ARPHRD_APPLETLK: + return ZEBRA_LLT_APPLETLK; + case ARPHRD_DLCI: + return ZEBRA_LLT_DLCI; + case ARPHRD_ATM: + return ZEBRA_LLT_ATM; + case ARPHRD_METRICOM: + return ZEBRA_LLT_METRICOM; + case ARPHRD_IEEE1394: + return ZEBRA_LLT_IEEE1394; + case ARPHRD_EUI64: + return ZEBRA_LLT_EUI64; + case ARPHRD_INFINIBAND: + return ZEBRA_LLT_INFINIBAND; + case ARPHRD_SLIP: + return ZEBRA_LLT_SLIP; + case ARPHRD_CSLIP: + return ZEBRA_LLT_CSLIP; + case ARPHRD_SLIP6: + return ZEBRA_LLT_SLIP6; + case ARPHRD_CSLIP6: + return ZEBRA_LLT_CSLIP6; + case ARPHRD_RSRVD: + return ZEBRA_LLT_RSRVD; + case ARPHRD_ADAPT: + return ZEBRA_LLT_ADAPT; + case ARPHRD_ROSE: + return ZEBRA_LLT_ROSE; + case ARPHRD_X25: + return ZEBRA_LLT_X25; + case ARPHRD_PPP: + return ZEBRA_LLT_PPP; + case ARPHRD_CISCO: + return ZEBRA_LLT_CHDLC; + case ARPHRD_LAPB: + return ZEBRA_LLT_LAPB; + case ARPHRD_RAWHDLC: + return ZEBRA_LLT_RAWHDLC; + case ARPHRD_TUNNEL: + return ZEBRA_LLT_IPIP; + case ARPHRD_TUNNEL6: + return ZEBRA_LLT_IPIP6; + case ARPHRD_FRAD: + return ZEBRA_LLT_FRAD; + case ARPHRD_SKIP: + return ZEBRA_LLT_SKIP; + case ARPHRD_LOOPBACK: + return ZEBRA_LLT_LOOPBACK; + case ARPHRD_LOCALTLK: + return ZEBRA_LLT_LOCALTLK; + case ARPHRD_FDDI: + return ZEBRA_LLT_FDDI; + case ARPHRD_SIT: + return ZEBRA_LLT_SIT; + case ARPHRD_IPDDP: + return ZEBRA_LLT_IPDDP; + case ARPHRD_IPGRE: + return ZEBRA_LLT_IPGRE; + case ARPHRD_PIMREG: + return ZEBRA_LLT_PIMREG; + case ARPHRD_HIPPI: + return ZEBRA_LLT_HIPPI; + case ARPHRD_ECONET: + return ZEBRA_LLT_ECONET; + case ARPHRD_IRDA: + return ZEBRA_LLT_IRDA; + case ARPHRD_FCPP: + return ZEBRA_LLT_FCPP; + case ARPHRD_FCAL: + return ZEBRA_LLT_FCAL; + case ARPHRD_FCPL: + return ZEBRA_LLT_FCPL; + case ARPHRD_FCFABRIC: + return ZEBRA_LLT_FCFABRIC; + case ARPHRD_IEEE802_TR: + return ZEBRA_LLT_IEEE802_TR; + case ARPHRD_IEEE80211: + return ZEBRA_LLT_IEEE80211; +#ifdef ARPHRD_IEEE802154 + case ARPHRD_IEEE802154: + return ZEBRA_LLT_IEEE802154; +#endif +#ifdef ARPHRD_IP6GRE + case ARPHRD_IP6GRE: + return ZEBRA_LLT_IP6GRE; +#endif +#ifdef ARPHRD_IEEE802154_PHY + case ARPHRD_IEEE802154_PHY: + return ZEBRA_LLT_IEEE802154_PHY; +#endif + + default: + return ZEBRA_LLT_UNKNOWN; + } +} + +static void netlink_determine_zebra_iftype(const char *kind, + enum zebra_iftype *zif_type) +{ + *zif_type = ZEBRA_IF_OTHER; + + if (!kind) + return; + + if (strcmp(kind, "vrf") == 0) + *zif_type = ZEBRA_IF_VRF; + else if (strcmp(kind, "bridge") == 0) + *zif_type = ZEBRA_IF_BRIDGE; + else if (strcmp(kind, "vlan") == 0) + *zif_type = ZEBRA_IF_VLAN; + else if (strcmp(kind, "vxlan") == 0) + *zif_type = ZEBRA_IF_VXLAN; + else if (strcmp(kind, "macvlan") == 0) + *zif_type = ZEBRA_IF_MACVLAN; + else if (strcmp(kind, "veth") == 0) + *zif_type = ZEBRA_IF_VETH; + else if (strcmp(kind, "bond") == 0) + *zif_type = ZEBRA_IF_BOND; + else if (strcmp(kind, "gre") == 0) + *zif_type = ZEBRA_IF_GRE; +} + +static void netlink_vrf_change(struct nlmsghdr *h, struct rtattr *tb, + uint32_t ns_id, const char *name, + struct zebra_dplane_ctx *ctx) +{ + struct rtattr *linkinfo[IFLA_INFO_MAX + 1]; + struct rtattr *attr[IFLA_VRF_MAX + 1]; + + netlink_parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb); + + if (!linkinfo[IFLA_INFO_DATA]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: IFLA_INFO_DATA missing from VRF message: %s", + __func__, name); + return; + } + + netlink_parse_rtattr_nested(attr, IFLA_VRF_MAX, + linkinfo[IFLA_INFO_DATA]); + if (!attr[IFLA_VRF_TABLE]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: IFLA_VRF_TABLE missing from VRF message: %s", + __func__, name); + return; + } + + dplane_ctx_set_ifp_table_id( + ctx, *(uint32_t *)RTA_DATA(attr[IFLA_VRF_TABLE])); +} + +static uint32_t get_iflink_speed(struct interface *interface, int *error) +{ + struct ifreq ifdata; + struct ethtool_cmd ecmd; + int sd; + int rc; + const char *ifname = interface->name; + + if (error) + *error = 0; + /* initialize struct */ + memset(&ifdata, 0, sizeof(ifdata)); + + /* set interface name */ + strlcpy(ifdata.ifr_name, ifname, sizeof(ifdata.ifr_name)); + + /* initialize ethtool interface */ + memset(&ecmd, 0, sizeof(ecmd)); + ecmd.cmd = ETHTOOL_GSET; /* ETHTOOL_GLINK */ + ifdata.ifr_data = (caddr_t)&ecmd; + + /* use ioctl to get speed of an interface */ + frr_with_privs(&zserv_privs) { + sd = vrf_socket(PF_INET, SOCK_DGRAM, IPPROTO_IP, + interface->vrf->vrf_id, NULL); + if (sd < 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Failure to read interface %s speed: %d %s", + ifname, errno, safe_strerror(errno)); + /* no vrf socket creation may probably mean vrf issue */ + if (error) + *error = -1; + return 0; + } + /* Get the current link state for the interface */ + rc = vrf_ioctl(interface->vrf->vrf_id, sd, SIOCETHTOOL, + (char *)&ifdata); + } + if (rc < 0) { + if (errno != EOPNOTSUPP && IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IOCTL failure to read interface %s speed: %d %s", + ifname, errno, safe_strerror(errno)); + /* no device means interface unreachable */ + if (errno == ENODEV && error) + *error = -1; + ecmd.speed_hi = 0; + ecmd.speed = 0; + } + + close(sd); + + return ((uint32_t)ecmd.speed_hi << 16) | ecmd.speed; +} + +uint32_t kernel_get_speed(struct interface *ifp, int *error) +{ + return get_iflink_speed(ifp, error); +} + +static ssize_t +netlink_gre_set_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifi; + char buf[]; + } *req = buf; + uint32_t link_idx; + unsigned int mtu; + struct rtattr *rta_info, *rta_data; + const struct zebra_l2info_gre *gre_info; + + if (buflen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_type = RTM_NEWLINK; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + + req->ifi.ifi_index = dplane_ctx_get_ifindex(ctx); + + gre_info = dplane_ctx_gre_get_info(ctx); + if (!gre_info) + return 0; + + req->ifi.ifi_change = 0xFFFFFFFF; + link_idx = dplane_ctx_gre_get_link_ifindex(ctx); + mtu = dplane_ctx_gre_get_mtu(ctx); + + if (mtu && !nl_attr_put32(&req->n, buflen, IFLA_MTU, mtu)) + return 0; + + rta_info = nl_attr_nest(&req->n, buflen, IFLA_LINKINFO); + if (!rta_info) + return 0; + + if (!nl_attr_put(&req->n, buflen, IFLA_INFO_KIND, "gre", 3)) + return 0; + + rta_data = nl_attr_nest(&req->n, buflen, IFLA_INFO_DATA); + if (!rta_data) + return 0; + + if (!nl_attr_put32(&req->n, buflen, IFLA_GRE_LINK, link_idx)) + return 0; + + if (gre_info->vtep_ip.s_addr && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_LOCAL, + gre_info->vtep_ip.s_addr)) + return 0; + + if (gre_info->vtep_ip_remote.s_addr && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_REMOTE, + gre_info->vtep_ip_remote.s_addr)) + return 0; + + if (gre_info->ikey && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_IKEY, + gre_info->ikey)) + return 0; + if (gre_info->okey && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_IKEY, + gre_info->okey)) + return 0; + + nl_attr_nest_end(&req->n, rta_data); + nl_attr_nest_end(&req->n, rta_info); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static int netlink_extract_bridge_info(struct rtattr *link_data, + struct zebra_l2info_bridge *bridge_info) +{ + struct rtattr *attr[IFLA_BR_MAX + 1]; + + memset(bridge_info, 0, sizeof(*bridge_info)); + netlink_parse_rtattr_nested(attr, IFLA_BR_MAX, link_data); + if (attr[IFLA_BR_VLAN_FILTERING]) + bridge_info->bridge.vlan_aware = + *(uint8_t *)RTA_DATA(attr[IFLA_BR_VLAN_FILTERING]); + return 0; +} + +static int netlink_extract_vlan_info(struct rtattr *link_data, + struct zebra_l2info_vlan *vlan_info) +{ + struct rtattr *attr[IFLA_VLAN_MAX + 1]; + vlanid_t vid_in_msg; + + memset(vlan_info, 0, sizeof(*vlan_info)); + netlink_parse_rtattr_nested(attr, IFLA_VLAN_MAX, link_data); + if (!attr[IFLA_VLAN_ID]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_VLAN_ID missing from VLAN IF message"); + return -1; + } + + vid_in_msg = *(vlanid_t *)RTA_DATA(attr[IFLA_VLAN_ID]); + vlan_info->vid = vid_in_msg; + return 0; +} + +static int netlink_extract_gre_info(struct rtattr *link_data, + struct zebra_l2info_gre *gre_info) +{ + struct rtattr *attr[IFLA_GRE_MAX + 1]; + + memset(gre_info, 0, sizeof(*gre_info)); + memset(attr, 0, sizeof(attr)); + netlink_parse_rtattr_nested(attr, IFLA_GRE_MAX, link_data); + + if (!attr[IFLA_GRE_LOCAL]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_GRE_LOCAL missing from GRE IF message"); + } else + gre_info->vtep_ip = + *(struct in_addr *)RTA_DATA(attr[IFLA_GRE_LOCAL]); + if (!attr[IFLA_GRE_REMOTE]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_GRE_REMOTE missing from GRE IF message"); + } else + gre_info->vtep_ip_remote = + *(struct in_addr *)RTA_DATA(attr[IFLA_GRE_REMOTE]); + + if (!attr[IFLA_GRE_LINK]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_GRE_LINK missing from GRE IF message"); + } else { + gre_info->ifindex_link = + *(ifindex_t *)RTA_DATA(attr[IFLA_GRE_LINK]); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_GRE_LINK obtained is %u", + gre_info->ifindex_link); + } + if (attr[IFLA_GRE_IKEY]) + gre_info->ikey = *(uint32_t *)RTA_DATA(attr[IFLA_GRE_IKEY]); + if (attr[IFLA_GRE_OKEY]) + gre_info->okey = *(uint32_t *)RTA_DATA(attr[IFLA_GRE_OKEY]); + return 0; +} + +static int netlink_extract_vxlan_info(struct rtattr *link_data, + struct zebra_l2info_vxlan *vxl_info) +{ + uint8_t svd = 0; + struct rtattr *attr[IFLA_VXLAN_MAX + 1]; + vni_t vni_in_msg; + struct in_addr vtep_ip_in_msg; + ifindex_t ifindex_link; + + memset(vxl_info, 0, sizeof(*vxl_info)); + netlink_parse_rtattr_nested(attr, IFLA_VXLAN_MAX, link_data); + if (attr[IFLA_VXLAN_COLLECT_METADATA]) { + svd = *(uint8_t *)RTA_DATA(attr[IFLA_VXLAN_COLLECT_METADATA]); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_VXLAN_COLLECT_METADATA=%u in VXLAN IF message", + svd); + } + + if (!svd) { + /* + * In case of svd we will not get vni info directly from the + * device + */ + if (!attr[IFLA_VXLAN_ID]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_VXLAN_ID missing from VXLAN IF message"); + return -1; + } + + vxl_info->vni_info.iftype = ZEBRA_VXLAN_IF_VNI; + vni_in_msg = *(vni_t *)RTA_DATA(attr[IFLA_VXLAN_ID]); + vxl_info->vni_info.vni.vni = vni_in_msg; + } else { + vxl_info->vni_info.iftype = ZEBRA_VXLAN_IF_SVD; + } + + if (!attr[IFLA_VXLAN_LOCAL]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_VXLAN_LOCAL missing from VXLAN IF message"); + } else { + vtep_ip_in_msg = + *(struct in_addr *)RTA_DATA(attr[IFLA_VXLAN_LOCAL]); + vxl_info->vtep_ip = vtep_ip_in_msg; + } + + if (attr[IFLA_VXLAN_GROUP]) { + if (!svd) + vxl_info->vni_info.vni.mcast_grp = + *(struct in_addr *)RTA_DATA( + attr[IFLA_VXLAN_GROUP]); + } + + if (!attr[IFLA_VXLAN_LINK]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_VXLAN_LINK missing from VXLAN IF message"); + } else { + ifindex_link = + *(ifindex_t *)RTA_DATA(attr[IFLA_VXLAN_LINK]); + vxl_info->ifindex_link = ifindex_link; + } + return 0; +} + +/* + * Extract and save L2 params (of interest) for an interface. When a + * bridge interface is added or updated, take further actions to map + * its members. Likewise, for VxLAN interface. + */ +static void netlink_interface_update_l2info(struct zebra_dplane_ctx *ctx, + enum zebra_iftype zif_type, + struct rtattr *link_data, int add, + ns_id_t link_nsid) +{ + struct zebra_l2info_bridge bridge_info; + struct zebra_l2info_vlan vlan_info; + struct zebra_l2info_vxlan vxlan_info; + struct zebra_l2info_gre gre_info; + + if (!link_data) + return; + + switch (zif_type) { + case ZEBRA_IF_BRIDGE: + netlink_extract_bridge_info(link_data, &bridge_info); + dplane_ctx_set_ifp_bridge_info(ctx, &bridge_info); + break; + case ZEBRA_IF_VLAN: + netlink_extract_vlan_info(link_data, &vlan_info); + dplane_ctx_set_ifp_vlan_info(ctx, &vlan_info); + break; + case ZEBRA_IF_VXLAN: + netlink_extract_vxlan_info(link_data, &vxlan_info); + vxlan_info.link_nsid = link_nsid; + dplane_ctx_set_ifp_vxlan_info(ctx, &vxlan_info); + break; + case ZEBRA_IF_GRE: + netlink_extract_gre_info(link_data, &gre_info); + gre_info.link_nsid = link_nsid; + dplane_ctx_set_ifp_gre_info(ctx, &gre_info); + break; + case ZEBRA_IF_OTHER: + case ZEBRA_IF_VRF: + case ZEBRA_IF_MACVLAN: + case ZEBRA_IF_VETH: + case ZEBRA_IF_BOND: + break; + } +} + +static int +netlink_bridge_vxlan_vlan_vni_map_update(struct zebra_dplane_ctx *ctx, + struct rtattr *af_spec) +{ + int rem; + uint16_t flags; + struct rtattr *i; + struct zebra_vxlan_vni_array *vniarray = NULL; + struct zebra_vxlan_vni vni_end; + struct zebra_vxlan_vni vni_start; + struct rtattr *aftb[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1]; + int32_t count = 0; + + memset(&vni_start, 0, sizeof(vni_start)); + memset(&vni_end, 0, sizeof(vni_end)); + + for (i = RTA_DATA(af_spec), rem = RTA_PAYLOAD(af_spec); RTA_OK(i, rem); + i = RTA_NEXT(i, rem)) { + + if (i->rta_type != IFLA_BRIDGE_VLAN_TUNNEL_INFO) + continue; + + memset(aftb, 0, sizeof(aftb)); + netlink_parse_rtattr_nested(aftb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, + i); + if (!aftb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || + !aftb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) + /* vlan-vni info missing */ + return 0; + + count++; + flags = 0; + vniarray = XREALLOC( + MTYPE_TMP, vniarray, + sizeof(struct zebra_vxlan_vni_array) + + count * sizeof(struct zebra_vxlan_vni)); + + memset(&vniarray->vnis[count - 1], 0, + sizeof(struct zebra_vxlan_vni)); + + vniarray->vnis[count - 1].vni = + *(vni_t *)RTA_DATA(aftb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); + vniarray->vnis[count - 1].access_vlan = *(vlanid_t *)RTA_DATA( + aftb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); + + if (aftb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]) + flags = *(uint16_t *)RTA_DATA( + aftb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]); + + vniarray->vnis[count - 1].flags = flags; + } + + if (count) { + vniarray->count = count; + dplane_ctx_set_ifp_vxlan_vni_array(ctx, vniarray); + } + return 0; +} + +static int netlink_bridge_vxlan_update(struct zebra_dplane_ctx *ctx, + struct rtattr *af_spec) +{ + struct rtattr *aftb[IFLA_BRIDGE_MAX + 1]; + struct bridge_vlan_info *vinfo; + struct zebra_dplane_bridge_vlan_info bvinfo; + + if (!af_spec) { + dplane_ctx_set_ifp_no_afspec(ctx); + return 0; + } + + netlink_bridge_vxlan_vlan_vni_map_update(ctx, af_spec); + + /* There is a 1-to-1 mapping of VLAN to VxLAN - hence + * only 1 access VLAN is accepted. + */ + netlink_parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, af_spec); + if (!aftb[IFLA_BRIDGE_VLAN_INFO]) { + dplane_ctx_set_ifp_no_bridge_vlan_info(ctx); + return 0; + } + + vinfo = RTA_DATA(aftb[IFLA_BRIDGE_VLAN_INFO]); + bvinfo.flags = vinfo->flags; + bvinfo.vid = vinfo->vid; + + dplane_ctx_set_ifp_bridge_vlan_info(ctx, &bvinfo); + return 0; +} + +static void netlink_bridge_vlan_update(struct zebra_dplane_ctx *ctx, + struct rtattr *af_spec) +{ + struct rtattr *i; + int rem; + struct bridge_vlan_info *vinfo; + struct zebra_dplane_bridge_vlan_info_array *bvarray = NULL; + int32_t count = 0; + + if (af_spec) { + for (i = RTA_DATA(af_spec), rem = RTA_PAYLOAD(af_spec); + RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + if (i->rta_type != IFLA_BRIDGE_VLAN_INFO) + continue; + + count++; + bvarray = XREALLOC( + MTYPE_TMP, bvarray, + sizeof(struct + zebra_dplane_bridge_vlan_info_array) + + count * sizeof(struct + zebra_dplane_bridge_vlan_info)); + + vinfo = RTA_DATA(i); + bvarray->array[count - 1].flags = vinfo->flags; + bvarray->array[count - 1].vid = vinfo->vid; + } + } + + if (count) { + bvarray->count = count; + dplane_ctx_set_ifp_bridge_vlan_info_array(ctx, bvarray); + } +} + +static int netlink_bridge_interface(struct zebra_dplane_ctx *ctx, + struct rtattr *af_spec, int startup) +{ + + netlink_bridge_vxlan_update(ctx, af_spec); + + /* build vlan bitmap associated with this interface if that + * device type is interested in the vlans + */ + netlink_bridge_vlan_update(ctx, af_spec); + + dplane_provider_enqueue_to_zebra(ctx); + return 0; +} + +/* + * Process interface protodown dplane update. + * + * If the interface is an es bond member then it must follow EVPN's + * protodown setting. + */ +static void netlink_proc_dplane_if_protodown(struct zebra_dplane_ctx *ctx, + struct rtattr **tb) +{ + bool protodown; + uint32_t rc_bitfield = 0; + struct rtattr *pd_reason_info[IFLA_MAX + 1]; + + protodown = !!*(uint8_t *)RTA_DATA(tb[IFLA_PROTO_DOWN]); + + if (tb[IFLA_PROTO_DOWN_REASON]) { + netlink_parse_rtattr_nested(pd_reason_info, IFLA_INFO_MAX, + tb[IFLA_PROTO_DOWN_REASON]); + + if (pd_reason_info[IFLA_PROTO_DOWN_REASON_VALUE]) + rc_bitfield = *(uint32_t *)RTA_DATA( + pd_reason_info[IFLA_PROTO_DOWN_REASON_VALUE]); + } + + dplane_ctx_set_ifp_rc_bitfield(ctx, rc_bitfield); + dplane_ctx_set_ifp_protodown(ctx, protodown); + dplane_ctx_set_ifp_protodown_set(ctx, true); +} + +static uint8_t netlink_parse_lacp_bypass(struct rtattr **linkinfo) +{ + uint8_t bypass = 0; + struct rtattr *mbrinfo[IFLA_BOND_SLAVE_MAX + 1]; + + netlink_parse_rtattr_nested(mbrinfo, IFLA_BOND_SLAVE_MAX, + linkinfo[IFLA_INFO_SLAVE_DATA]); + if (mbrinfo[IFLA_BOND_SLAVE_AD_RX_BYPASS]) + bypass = *(uint8_t *)RTA_DATA( + mbrinfo[IFLA_BOND_SLAVE_AD_RX_BYPASS]); + + return bypass; +} + +/* Request for specific interface or address information from the kernel */ +static int netlink_request_intf_addr(struct nlsock *netlink_cmd, int family, + int type, uint32_t filter_mask) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifm; + char buf[256]; + } req; + + frrtrace(4, frr_zebra, netlink_request_intf_addr, netlink_cmd, family, + type, filter_mask); + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.ifm.ifi_family = family; + + /* Include filter, if specified. */ + if (filter_mask) + nl_attr_put32(&req.n, sizeof(req), IFLA_EXT_MASK, filter_mask); + + return netlink_request(netlink_cmd, &req); +} + +enum netlink_msg_status +netlink_put_gre_set_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + + op = dplane_ctx_get_op(ctx); + assert(op == DPLANE_OP_GRE_SET); + + ret = netlink_batch_add_msg(bth, ctx, netlink_gre_set_msg_encoder, false); + + return ret; +} + +/* Interface lookup by netlink socket. */ +int interface_lookup_netlink(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + struct nlsock *netlink_cmd = &zns->netlink_dplane_out; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get interface information. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_PACKET, RTM_GETLINK, 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_link_change, netlink_cmd, &dp_info, 0, + true); + if (ret < 0) + return ret; + + /* Get interface information - for bridge interfaces. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_BRIDGE, RTM_GETLINK, + RTEXT_FILTER_BRVLAN); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_link_change, netlink_cmd, &dp_info, 0, + true); + if (ret < 0) + return ret; + + return ret; +} + +void interface_list_tunneldump(struct zebra_ns *zns) +{ + int ret; + + /* + * So netlink_tunneldump_read will initiate a request + * per tunnel to get data. If we are on a kernel that + * does not support this then we will get X error messages + * (one per tunnel request )back which netlink_parse_info will + * stop after the first one. So we need to read equivalent + * error messages per tunnel then we can continue. + * if we do not gather all the read failures then + * later requests will not work right. + */ + ret = netlink_tunneldump_read(zns); + if (ret < 0) + return; + + zebra_dplane_startup_stage(zns, ZEBRA_DPLANE_TUNNELS_READ); +} + + +/** + * interface_addr_lookup_netlink() - Look up interface addresses + * + * @zns: Zebra netlink socket + * Return: Result status + */ +static int interface_addr_lookup_netlink(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + struct nlsock *netlink_cmd = &zns->netlink_cmd; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get IPv4 address of the interfaces. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_INET, RTM_GETADDR, 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_interface_addr_dplane, netlink_cmd, + &dp_info, 0, true); + if (ret < 0) + return ret; + + /* Get IPv6 address of the interfaces. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_INET6, RTM_GETADDR, 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_interface_addr_dplane, netlink_cmd, + &dp_info, 0, true); + if (ret < 0) + return ret; + + return 0; +} + +int kernel_interface_set_master(struct interface *master, + struct interface *slave) +{ + struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); + + struct { + struct nlmsghdr n; + struct ifinfomsg ifa; + char buf[NL_PKT_BUF_SIZE]; + } req; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_SETLINK; + req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.ifa.ifi_index = slave->ifindex; + + nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master->ifindex); + nl_attr_put32(&req.n, sizeof(req), IFLA_LINK, slave->ifindex); + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +/* Interface address modification. */ +static ssize_t netlink_address_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + int bytelen; + const struct prefix *p; + int cmd; + const char *label; + + struct { + struct nlmsghdr n; + struct ifaddrmsg ifa; + char buf[0]; + } *req = buf; + + if (buflen < sizeof(*req)) + return 0; + + p = dplane_ctx_get_intf_addr(ctx); + memset(req, 0, sizeof(*req)); + + bytelen = (p->family == AF_INET ? 4 : 16); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) + cmd = RTM_NEWADDR; + else + cmd = RTM_DELADDR; + + req->n.nlmsg_type = cmd; + req->ifa.ifa_family = p->family; + + req->ifa.ifa_index = dplane_ctx_get_ifindex(ctx); + + if (!nl_attr_put(&req->n, buflen, IFA_LOCAL, &p->u.prefix, bytelen)) + return 0; + + if (p->family == AF_INET) { + if (dplane_ctx_intf_is_connected(ctx)) { + p = dplane_ctx_get_intf_dest(ctx); + if (!nl_attr_put(&req->n, buflen, IFA_ADDRESS, + &p->u.prefix, bytelen)) + return 0; + } else if (cmd == RTM_NEWADDR) { + struct in_addr broad = { + .s_addr = ipv4_broadcast_addr(p->u.prefix4.s_addr, + p->prefixlen) + }; + if (!nl_attr_put(&req->n, buflen, IFA_BROADCAST, &broad, + bytelen)) + return 0; + } + } + + /* p is now either address or destination/bcast addr */ + req->ifa.ifa_prefixlen = p->prefixlen; + + if (dplane_ctx_intf_is_secondary(ctx)) + SET_FLAG(req->ifa.ifa_flags, IFA_F_SECONDARY); + + if (dplane_ctx_intf_has_label(ctx)) { + label = dplane_ctx_get_intf_label(ctx); + if (!nl_attr_put(&req->n, buflen, IFA_LABEL, label, + strlen(label) + 1)) + return 0; + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +enum netlink_msg_status +netlink_put_address_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_address_msg_encoder, + false); +} + +static ssize_t netlink_intf_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + enum dplane_op_e op; + int cmd = 0; + + op = dplane_ctx_get_op(ctx); + + switch (op) { + case DPLANE_OP_INTF_UPDATE: + cmd = RTM_SETLINK; + break; + case DPLANE_OP_INTF_INSTALL: + cmd = RTM_NEWLINK; + break; + case DPLANE_OP_INTF_DELETE: + cmd = RTM_DELLINK; + break; + case DPLANE_OP_NONE: + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + case DPLANE_OP_STARTUP_STAGE: + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel interface update with incorrect OP code (%u)", + op); + return -1; + } + + return netlink_intf_msg_encode(cmd, ctx, buf, buflen); +} + +enum netlink_msg_status +netlink_put_intf_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_intf_msg_encoder, false); +} + +int netlink_interface_addr(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ifaddrmsg *ifa; + struct rtattr *tb[IFA_MAX + 1]; + struct interface *ifp; + void *addr; + void *broad; + uint8_t flags = 0; + char *label = NULL; + struct zebra_ns *zns; + uint32_t metric = METRIC_MAX; + uint32_t kernel_flags = 0; + + frrtrace(3, frr_zebra, netlink_interface_addr, h, ns_id, startup); + + zns = zebra_ns_lookup(ns_id); + ifa = NLMSG_DATA(h); + + if (ifa->ifa_family != AF_INET && ifa->ifa_family != AF_INET6) { + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel interface addr change: %s", + ifa->ifa_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ifaddrmsg))); + return -1; + } + + netlink_parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len); + + ifp = if_lookup_by_index_per_ns(zns, ifa->ifa_index); + if (ifp == NULL) { + if (startup) { + /* During startup, failure to lookup the referenced + * interface should not be an error, so we have + * downgraded this condition to warning, and we permit + * the startup interface state retrieval to continue. + */ + flog_warn(EC_LIB_INTERFACE, + "%s: can't find interface by index %d", + __func__, ifa->ifa_index); + return 0; + } else { + flog_err(EC_LIB_INTERFACE, + "%s: can't find interface by index %d", + __func__, ifa->ifa_index); + return -1; + } + } + + /* Flags passed through */ + if (tb[IFA_FLAGS]) + kernel_flags = *(int *)RTA_DATA(tb[IFA_FLAGS]); + else + kernel_flags = ifa->ifa_flags; + + if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */ + { + char buf[BUFSIZ]; + zlog_debug("%s %s %s flags 0x%x:", __func__, + nl_msg_type_to_str(h->nlmsg_type), ifp->name, + kernel_flags); + if (tb[IFA_LOCAL]) + zlog_debug(" IFA_LOCAL %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_LOCAL]), buf, + BUFSIZ), + ifa->ifa_prefixlen); + if (tb[IFA_ADDRESS]) + zlog_debug(" IFA_ADDRESS %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_ADDRESS]), buf, + BUFSIZ), + ifa->ifa_prefixlen); + if (tb[IFA_BROADCAST]) + zlog_debug(" IFA_BROADCAST %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_BROADCAST]), buf, + BUFSIZ), + ifa->ifa_prefixlen); + if (tb[IFA_LABEL] && strcmp(ifp->name, RTA_DATA(tb[IFA_LABEL]))) + zlog_debug(" IFA_LABEL %s", + (char *)RTA_DATA(tb[IFA_LABEL])); + + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci = RTA_DATA(tb[IFA_CACHEINFO]); + zlog_debug(" IFA_CACHEINFO pref %d, valid %d", + ci->ifa_prefered, ci->ifa_valid); + } + } + + /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */ + if (tb[IFA_LOCAL] == NULL) + tb[IFA_LOCAL] = tb[IFA_ADDRESS]; + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + + /* local interface address */ + addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL); + + /* is there a peer address? */ + if (tb[IFA_ADDRESS] + && memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), + RTA_PAYLOAD(tb[IFA_ADDRESS]))) { + broad = RTA_DATA(tb[IFA_ADDRESS]); + SET_FLAG(flags, ZEBRA_IFA_PEER); + } else + /* seeking a broadcast address */ + broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) + : NULL); + + /* addr is primary key, SOL if we don't have one */ + if (addr == NULL) { + zlog_debug("%s: Local Interface Address is NULL for %s", + __func__, ifp->name); + return -1; + } + + /* Flags. */ + if (kernel_flags & IFA_F_SECONDARY) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* Label */ + if (tb[IFA_LABEL]) + label = (char *)RTA_DATA(tb[IFA_LABEL]); + + if (label && strcmp(ifp->name, label) == 0) + label = NULL; + + if (tb[IFA_RT_PRIORITY]) + metric = *(uint32_t *)RTA_DATA(tb[IFA_RT_PRIORITY]); + + /* Register interface address to the interface. */ + if (ifa->ifa_family == AF_INET) { + if (ifa->ifa_prefixlen > IPV4_MAX_BITLEN) { + zlog_err( + "Invalid prefix length: %u received from kernel interface addr change: %s", + ifa->ifa_prefixlen, + nl_msg_type_to_str(h->nlmsg_type)); + return -1; + } + + if (h->nlmsg_type == RTM_NEWADDR) + connected_add_ipv4(ifp, flags, (struct in_addr *)addr, + ifa->ifa_prefixlen, + (struct in_addr *)broad, label, + metric); + else if (CHECK_FLAG(flags, ZEBRA_IFA_PEER)) { + /* Delete with a peer address */ + connected_delete_ipv4( + ifp, flags, (struct in_addr *)addr, + ifa->ifa_prefixlen, broad); + } else + connected_delete_ipv4( + ifp, flags, (struct in_addr *)addr, + ifa->ifa_prefixlen, NULL); + } + + if (ifa->ifa_family == AF_INET6) { + if (ifa->ifa_prefixlen > IPV6_MAX_BITLEN) { + zlog_err( + "Invalid prefix length: %u received from kernel interface addr change: %s", + ifa->ifa_prefixlen, + nl_msg_type_to_str(h->nlmsg_type)); + return -1; + } + if (h->nlmsg_type == RTM_NEWADDR) { + /* Only consider valid addresses; we'll not get a + * notification from + * the kernel till IPv6 DAD has completed, but at init + * time, Quagga + * does query for and will receive all addresses. + */ + if (!(kernel_flags + & (IFA_F_DADFAILED | IFA_F_TENTATIVE))) + connected_add_ipv6(ifp, flags, + (struct in6_addr *)addr, + (struct in6_addr *)broad, + ifa->ifa_prefixlen, label, + metric); + } else + connected_delete_ipv6(ifp, (struct in6_addr *)addr, + NULL, ifa->ifa_prefixlen); + } + + /* + * Linux kernel does not send route delete on interface down/addr del + * so we have to re-process routes it owns (i.e. kernel routes) + */ + if (h->nlmsg_type != RTM_NEWADDR) + rib_update(RIB_UPDATE_KERNEL); + + return 0; +} + +/* + * Parse and validate an incoming interface address change message, + * generating a dplane context object. + * This runs in the dplane pthread; the context is enqueued to the + * main pthread for processing. + */ +int netlink_interface_addr_dplane(struct nlmsghdr *h, ns_id_t ns_id, + int startup /*ignored*/) +{ + int len; + struct ifaddrmsg *ifa; + struct rtattr *tb[IFA_MAX + 1]; + void *addr; + void *broad; + char *label = NULL; + uint32_t metric = METRIC_MAX; + uint32_t kernel_flags = 0; + struct zebra_dplane_ctx *ctx; + struct prefix p; + + ifa = NLMSG_DATA(h); + + /* Validate message types */ + if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR) + return 0; + + if (ifa->ifa_family != AF_INET && ifa->ifa_family != AF_INET6) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid address family: %u", + __func__, nl_msg_type_to_str(h->nlmsg_type), + ifa->ifa_family); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + if (len < 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: netlink msg bad size: %d %zu", + __func__, nl_msg_type_to_str(h->nlmsg_type), + h->nlmsg_len, + (size_t)NLMSG_LENGTH( + sizeof(struct ifaddrmsg))); + return -1; + } + + netlink_parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len); + + /* Flags passed through */ + if (tb[IFA_FLAGS]) + kernel_flags = *(int *)RTA_DATA(tb[IFA_FLAGS]); + else + kernel_flags = ifa->ifa_flags; + + if (IS_ZEBRA_DEBUG_KERNEL) { /* remove this line to see initial ifcfg */ + char buf[PREFIX_STRLEN]; + + zlog_debug("%s: %s nsid %u ifindex %u flags 0x%x:", __func__, + nl_msg_type_to_str(h->nlmsg_type), ns_id, + ifa->ifa_index, kernel_flags); + if (tb[IFA_LOCAL]) + zlog_debug(" IFA_LOCAL %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_LOCAL]), buf, + sizeof(buf)), + ifa->ifa_prefixlen); + if (tb[IFA_ADDRESS]) + zlog_debug(" IFA_ADDRESS %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_ADDRESS]), buf, + sizeof(buf)), + ifa->ifa_prefixlen); + if (tb[IFA_BROADCAST]) + zlog_debug(" IFA_BROADCAST %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_BROADCAST]), buf, + sizeof(buf)), + ifa->ifa_prefixlen); + if (tb[IFA_LABEL]) + zlog_debug(" IFA_LABEL %s", + (const char *)RTA_DATA(tb[IFA_LABEL])); + + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci = RTA_DATA(tb[IFA_CACHEINFO]); + + zlog_debug(" IFA_CACHEINFO pref %d, valid %d", + ci->ifa_prefered, ci->ifa_valid); + } + } + + /* Validate prefix length */ + + if (ifa->ifa_family == AF_INET + && ifa->ifa_prefixlen > IPV4_MAX_BITLEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid prefix length: %u", + __func__, nl_msg_type_to_str(h->nlmsg_type), + ifa->ifa_prefixlen); + return -1; + } + + if (ifa->ifa_family == AF_INET6) { + if (ifa->ifa_prefixlen > IPV6_MAX_BITLEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid prefix length: %u", + __func__, + nl_msg_type_to_str(h->nlmsg_type), + ifa->ifa_prefixlen); + return -1; + } + + /* Only consider valid addresses; we'll not get a kernel + * notification till IPv6 DAD has completed, but at init + * time, FRR does query for and will receive all addresses. + */ + if (h->nlmsg_type == RTM_NEWADDR + && (kernel_flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid/tentative addr", + __func__, + nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + } + + /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */ + if (tb[IFA_LOCAL] == NULL) + tb[IFA_LOCAL] = tb[IFA_ADDRESS]; + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + + /* local interface address */ + addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL); + + /* addr is primary key, SOL if we don't have one */ + if (addr == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: No local interface address", + __func__, nl_msg_type_to_str(h->nlmsg_type)); + return -1; + } + + /* Allocate a context object, now that validation is done. */ + ctx = dplane_ctx_alloc(); + if (h->nlmsg_type == RTM_NEWADDR) + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_ADDR_ADD); + else + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_ADDR_DEL); + + dplane_ctx_set_ifindex(ctx, ifa->ifa_index); + dplane_ctx_set_ns_id(ctx, ns_id); + + /* Convert addr to prefix */ + memset(&p, 0, sizeof(p)); + p.family = ifa->ifa_family; + p.prefixlen = ifa->ifa_prefixlen; + if (p.family == AF_INET) + p.u.prefix4 = *(struct in_addr *)addr; + else + p.u.prefix6 = *(struct in6_addr *)addr; + + dplane_ctx_set_intf_addr(ctx, &p); + + /* is there a peer address? */ + if (tb[IFA_ADDRESS] + && memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), + RTA_PAYLOAD(tb[IFA_ADDRESS]))) { + broad = RTA_DATA(tb[IFA_ADDRESS]); + dplane_ctx_intf_set_connected(ctx); + } else if (tb[IFA_BROADCAST]) { + /* seeking a broadcast address */ + broad = RTA_DATA(tb[IFA_BROADCAST]); + dplane_ctx_intf_set_broadcast(ctx); + } else + broad = NULL; + + if (broad) { + /* Convert addr to prefix */ + memset(&p, 0, sizeof(p)); + p.family = ifa->ifa_family; + p.prefixlen = ifa->ifa_prefixlen; + if (p.family == AF_INET) + p.u.prefix4 = *(struct in_addr *)broad; + else + p.u.prefix6 = *(struct in6_addr *)broad; + + dplane_ctx_set_intf_dest(ctx, &p); + } + + /* Flags. */ + if (kernel_flags & IFA_F_SECONDARY) + dplane_ctx_intf_set_secondary(ctx); + + /* Label */ + if (tb[IFA_LABEL]) { + label = (char *)RTA_DATA(tb[IFA_LABEL]); + dplane_ctx_set_intf_label(ctx, label); + } + + if (tb[IFA_RT_PRIORITY]) + metric = *(uint32_t *)RTA_DATA(tb[IFA_RT_PRIORITY]); + + dplane_ctx_set_intf_metric(ctx, metric); + + /* Enqueue ctx for main pthread to process */ + dplane_provider_enqueue_to_zebra(ctx); + return 0; +} + +int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ifinfomsg *ifi; + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_MAX + 1]; + char *name = NULL; + char *kind = NULL; + char *slave_kind = NULL; + vrf_id_t vrf_id = VRF_DEFAULT; + enum zebra_iftype zif_type = ZEBRA_IF_OTHER; + enum zebra_slave_iftype zif_slave_type = ZEBRA_IF_SLAVE_NONE; + ifindex_t bridge_ifindex = IFINDEX_INTERNAL; + ifindex_t bond_ifindex = IFINDEX_INTERNAL; + ifindex_t link_ifindex = IFINDEX_INTERNAL; + ns_id_t link_nsid = ns_id; + ifindex_t master_infindex = IFINDEX_INTERNAL; + uint8_t bypass = 0; + uint32_t txqlen = 0; + + frrtrace(3, frr_zebra, netlink_interface, h, ns_id, startup); + + ifi = NLMSG_DATA(h); + + /* assume if not default zns, then new VRF */ + if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)) { + /* If this is not link add/delete message so print warning. */ + zlog_debug("%s: wrong kernel message %s", __func__, + nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + if (!(ifi->ifi_family == AF_UNSPEC || ifi->ifi_family == AF_BRIDGE + || ifi->ifi_family == AF_INET6)) { + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel link change: %s", + ifi->ifi_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifinfomsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ifinfomsg))); + return -1; + } + + /* Looking up interface name. */ + memset(linkinfo, 0, sizeof(linkinfo)); + netlink_parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, + NLA_F_NESTED); + + /* check for wireless messages to ignore */ + if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ignoring IFLA_WIRELESS message", + __func__); + return 0; + } + + if (tb[IFLA_IFNAME] == NULL) + return -1; + name = (char *)RTA_DATA(tb[IFLA_IFNAME]); + + /* Must be valid string. */ + len = RTA_PAYLOAD(tb[IFLA_IFNAME]); + if (len < 2 || name[len - 1] != '\0') { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: invalid intf name", __func__); + return -1; + } + + if (tb[IFLA_LINKINFO]) { + netlink_parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO]); + + if (linkinfo[IFLA_INFO_KIND]) + kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); + + if (linkinfo[IFLA_INFO_SLAVE_KIND]) + slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); + + netlink_determine_zebra_iftype(kind, &zif_type); + } + + /* If linking to another interface, note it. */ + if (tb[IFLA_LINK]) + link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); + + if (tb[IFLA_LINK_NETNSID]) { + link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); + link_nsid = ns_id_get_absolute(ns_id, link_nsid); + } + + if (tb[IFLA_TXQLEN]) + txqlen = *(uint32_t *)RTA_DATA(tb[IFLA_TXQLEN]); + + struct zebra_dplane_ctx *ctx = dplane_ctx_alloc(); + dplane_ctx_set_ns_id(ctx, ns_id); + dplane_ctx_set_ifp_link_nsid(ctx, link_nsid); + dplane_ctx_set_ifp_zif_type(ctx, zif_type); + dplane_ctx_set_ifindex(ctx, ifi->ifi_index); + dplane_ctx_set_ifname(ctx, name); + dplane_ctx_set_ifp_startup(ctx, startup); + dplane_ctx_set_ifp_family(ctx, ifi->ifi_family); + dplane_ctx_set_intf_txqlen(ctx, txqlen); + + /* We are interested in some AF_BRIDGE notifications. */ +#ifndef AF_BRIDGE +#define AF_BRIDGE 7 +#endif + if (ifi->ifi_family == AF_BRIDGE) { + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_INSTALL); + return netlink_bridge_interface(ctx, tb[IFLA_AF_SPEC], startup); + } + + if (h->nlmsg_type == RTM_NEWLINK) { + dplane_ctx_set_ifp_link_ifindex(ctx, link_ifindex); + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_INSTALL); + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_QUEUED); + if (tb[IFLA_IFALIAS]) { + dplane_ctx_set_ifp_desc(ctx, + RTA_DATA(tb[IFLA_IFALIAS])); + } + if (!tb[IFLA_MTU]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK for interface %s(%u) without MTU set", + name, ifi->ifi_index); + dplane_ctx_fini(&ctx); + return 0; + } + dplane_ctx_set_ifp_mtu(ctx, *(int *)RTA_DATA(tb[IFLA_MTU])); + + /* If VRF, create or update the VRF structure itself. */ + if (zif_type == ZEBRA_IF_VRF && !vrf_is_backend_netns()) { + netlink_vrf_change(h, tb[IFLA_LINKINFO], ns_id, name, + ctx); + vrf_id = ifi->ifi_index; + } + + if (tb[IFLA_MASTER]) { + if (slave_kind && (strcmp(slave_kind, "vrf") == 0) + && !vrf_is_backend_netns()) { + zif_slave_type = ZEBRA_IF_SLAVE_VRF; + master_infindex = vrf_id = + *(uint32_t *)RTA_DATA(tb[IFLA_MASTER]); + } else if (slave_kind + && (strcmp(slave_kind, "bridge") == 0)) { + zif_slave_type = ZEBRA_IF_SLAVE_BRIDGE; + master_infindex = bridge_ifindex = + *(ifindex_t *)RTA_DATA(tb[IFLA_MASTER]); + } else if (slave_kind + && (strcmp(slave_kind, "bond") == 0)) { + zif_slave_type = ZEBRA_IF_SLAVE_BOND; + master_infindex = bond_ifindex = + *(ifindex_t *)RTA_DATA(tb[IFLA_MASTER]); + bypass = netlink_parse_lacp_bypass(linkinfo); + } else + zif_slave_type = ZEBRA_IF_SLAVE_OTHER; + } + dplane_ctx_set_ifp_zif_slave_type(ctx, zif_slave_type); + dplane_ctx_set_ifp_vrf_id(ctx, vrf_id); + dplane_ctx_set_ifp_master_ifindex(ctx, master_infindex); + dplane_ctx_set_ifp_bridge_ifindex(ctx, bridge_ifindex); + dplane_ctx_set_ifp_bond_ifindex(ctx, bond_ifindex); + dplane_ctx_set_ifp_bypass(ctx, bypass); + dplane_ctx_set_ifp_zltype( + ctx, netlink_to_zebra_link_type(ifi->ifi_type)); + + if (vrf_is_backend_netns()) + dplane_ctx_set_ifp_vrf_id(ctx, ns_id); + + dplane_ctx_set_ifp_flags(ctx, ifi->ifi_flags & 0x0000fffff); + + if (tb[IFLA_PROTO_DOWN]) { + dplane_ctx_set_ifp_protodown_set(ctx, true); + netlink_proc_dplane_if_protodown(ctx, tb); + } else + dplane_ctx_set_ifp_protodown_set(ctx, false); + + netlink_interface_update_hw_addr(tb, ctx); + + /* Extract and save L2 interface information, take + * additional actions. */ + netlink_interface_update_l2info( + ctx, zif_type, linkinfo[IFLA_INFO_DATA], 1, link_nsid); + } else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("RTM_DELLINK for %s(%u), enqueuing to zebra", + name, ifi->ifi_index); + + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_DELETE); + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_QUEUED); + + dplane_ctx_set_ifp_bond_ifindex(ctx, bond_ifindex); + } + + dplane_provider_enqueue_to_zebra(ctx); + + return 0; +} + +/** + * Interface encoding helper function. + * + * \param[in] cmd netlink command. + * \param[in] ctx dataplane context (information snapshot). + * \param[out] buf buffer to hold the packet. + * \param[in] buflen amount of buffer bytes. + */ + +ssize_t netlink_intf_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifa; + char buf[]; + } *req = buf; + + struct rtattr *nest_protodown_reason; + ifindex_t ifindex = dplane_ctx_get_ifindex(ctx); + bool down = dplane_ctx_intf_is_protodown(ctx); + bool pd_reason_val = dplane_ctx_get_intf_pd_reason_val(ctx); + struct nlsock *nl = + kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + if (buflen < sizeof(*req)) + return 0; + + memset(req, 0, sizeof(*req)); + + if (cmd != RTM_SETLINK) + flog_err( + EC_ZEBRA_INTF_UPDATE_FAILURE, + "Only RTM_SETLINK message type currently supported in dplane pthread"); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->ifa.ifi_index = ifindex; + + nl_attr_put8(&req->n, buflen, IFLA_PROTO_DOWN, down); + nl_attr_put32(&req->n, buflen, IFLA_LINK, ifindex); + + /* Reason info nest */ + nest_protodown_reason = + nl_attr_nest(&req->n, buflen, IFLA_PROTO_DOWN_REASON); + + if (!nest_protodown_reason) + return -1; + + nl_attr_put32(&req->n, buflen, IFLA_PROTO_DOWN_REASON_MASK, + (1 << if_netlink_get_frr_protodown_r_bit())); + nl_attr_put32(&req->n, buflen, IFLA_PROTO_DOWN_REASON_VALUE, + ((int)pd_reason_val) + << if_netlink_get_frr_protodown_r_bit()); + + nl_attr_nest_end(&req->n, nest_protodown_reason); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, protodown=%d reason_val=%d ifindex=%u", + __func__, nl_msg_type_to_str(cmd), down, + pd_reason_val, ifindex); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* Interface information read by netlink. */ +void interface_list(struct zebra_ns *zns) +{ + interface_lookup_netlink(zns); + + zebra_dplane_startup_stage(zns, ZEBRA_DPLANE_INTERFACES_READ); +} + +void interface_list_second(struct zebra_ns *zns) +{ + zebra_if_update_all_links(zns); + /* We add routes for interface address, + * so we need to get the nexthop info + * from the kernel before we can do that + */ + netlink_nexthop_read(zns); + + interface_addr_lookup_netlink(zns); + + zebra_dplane_startup_stage(zns, ZEBRA_DPLANE_ADDRESSES_READ); +} + +/** + * netlink_request_tunneldump() - Request all tunnels from the linux kernel + * + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* (RTM_GETTUNNEL) route type + * + * Return: Result status + */ +static int netlink_request_tunneldump(struct zebra_ns *zns, int family, + int ifindex) +{ + struct { + struct nlmsghdr n; + struct tunnel_msg tmsg; + char buf[256]; + } req; + + /* Form the request */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tunnel_msg)); + req.n.nlmsg_type = RTM_GETTUNNEL; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.tmsg.family = family; + req.tmsg.ifindex = ifindex; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* + * Currently we only ask for vxlan l3svd vni information. + * In the future this can be expanded. + */ +int netlink_tunneldump_read(struct zebra_ns *zns) +{ + int ret = 0; + struct zebra_dplane_info dp_info; + struct route_node *rn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct nlsock *netlink_cmd = &zns->netlink_cmd; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + if (!tmp_if) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + + ret = netlink_request_tunneldump(zns, PF_BRIDGE, + tmp_if->ifindex); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_link_change, netlink_cmd, + &dp_info, 0, true); + + if (ret < 0) + return ret; + } + + return 0; +} + +static const char *port_state2str(uint8_t state) +{ + switch (state) { + case BR_STATE_DISABLED: + return "DISABLED"; + case BR_STATE_LISTENING: + return "LISTENING"; + case BR_STATE_LEARNING: + return "LEARNING"; + case BR_STATE_FORWARDING: + return "FORWARDING"; + case BR_STATE_BLOCKING: + return "BLOCKING"; + } + + return "UNKNOWN"; +} + +static void vxlan_vni_state_change(struct zebra_if *zif, uint16_t id, + uint8_t state) +{ + struct zebra_vxlan_vni *vnip; + + vnip = zebra_vxlan_if_vlanid_vni_find(zif, id); + + if (!vnip) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Cannot find VNI for VID (%u) IF %s for vlan state update", + id, zif->ifp->name); + + return; + } + + switch (state) { + case BR_STATE_FORWARDING: + zebra_vxlan_if_vni_up(zif->ifp, vnip); + break; + case BR_STATE_BLOCKING: + zebra_vxlan_if_vni_down(zif->ifp, vnip); + break; + case BR_STATE_DISABLED: + case BR_STATE_LISTENING: + case BR_STATE_LEARNING: + default: + /* Not used for anything at the moment */ + break; + } +} + +static void vlan_id_range_state_change(struct interface *ifp, uint16_t id_start, + uint16_t id_end, uint8_t state) +{ + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + + if (!zif) + return; + + for (uint16_t i = id_start; i <= id_end; i++) + vxlan_vni_state_change(zif, i, state); +} + +/** + * netlink_vlan_change() - Read in change about vlans from the kernel + * + * @h: Netlink message header + * @ns_id: Namspace id + * @startup: Are we reading under startup conditions? + * + * Return: Result status + */ +int netlink_vlan_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len, rem; + struct interface *ifp; + struct br_vlan_msg *bvm; + struct bridge_vlan_info *vinfo; + struct rtattr *vtb[BRIDGE_VLANDB_ENTRY_MAX + 1] = {}; + struct rtattr *attr; + uint8_t state; + uint32_t vrange; + int type; + + /* We only care about state changes for now */ + if (!(h->nlmsg_type == RTM_NEWVLAN)) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct br_vlan_msg)); + if (len < 0) { + zlog_warn( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct br_vlan_msg))); + return -1; + } + + bvm = NLMSG_DATA(h); + + if (bvm->family != AF_BRIDGE) + return 0; + + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), bvm->ifindex); + if (!ifp) { + zlog_debug("Cannot find bridge-vlan IF (%u) for vlan update", + bvm->ifindex); + return 0; + } + + if (!IS_ZEBRA_IF_VXLAN(ifp)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Ignoring non-vxlan IF (%s) for vlan update", + ifp->name); + + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("%s %s IF %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(bvm->family), ifp->name, ns_id); + + /* Loop over "ALL" BRIDGE_VLANDB_ENTRY */ + rem = len; + for (attr = BRVLAN_RTA(bvm); RTA_OK(attr, rem); + attr = RTA_NEXT(attr, rem)) { + vinfo = NULL; + vrange = 0; + + type = attr->rta_type & NLA_TYPE_MASK; + + if (type != BRIDGE_VLANDB_ENTRY) + continue; + + /* Parse nested entry data */ + netlink_parse_rtattr_nested(vtb, BRIDGE_VLANDB_ENTRY_MAX, attr); + + /* It must have info for the ID */ + if (!vtb[BRIDGE_VLANDB_ENTRY_INFO]) + continue; + + vinfo = (struct bridge_vlan_info *)RTA_DATA( + vtb[BRIDGE_VLANDB_ENTRY_INFO]); + + /* + * We only care about state info, if there is none, just ignore + * it. + */ + if (!vtb[BRIDGE_VLANDB_ENTRY_STATE]) + continue; + + state = *(uint8_t *)RTA_DATA(vtb[BRIDGE_VLANDB_ENTRY_STATE]); + + if (vtb[BRIDGE_VLANDB_ENTRY_RANGE]) + vrange = *(uint32_t *)RTA_DATA( + vtb[BRIDGE_VLANDB_ENTRY_RANGE]); + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) { + if (vrange) + zlog_debug("VLANDB_ENTRY: VID (%u-%u) state=%s", + vinfo->vid, vrange, + port_state2str(state)); + else + zlog_debug("VLANDB_ENTRY: VID (%u) state=%s", + vinfo->vid, port_state2str(state)); + } + + vlan_id_range_state_change( + ifp, vinfo->vid, (vrange ? vrange : vinfo->vid), state); + } + + return 0; +} + +/** + * netlink_request_vlan() - Request vlan information from the kernel + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* type + * + * Return: Result status + */ +static int netlink_request_vlan(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct br_vlan_msg bvm; + char buf[256]; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_vlan_msg)); + req.bvm.family = family; + + nl_attr_put32(&req.n, sizeof(req), BRIDGE_VLANDB_DUMP_FLAGS, + BRIDGE_VLANDB_DUMPF_STATS); + + return netlink_request(&zns->netlink_cmd, &req); +} + +/** + * netlink_vlan_read() - Vlan read function using netlink interface + * + * @zns: Zebra name space + * + * Return: Result status + * Only called at bootstrap time. + */ +int netlink_vlan_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get bridg vlan info */ + ret = netlink_request_vlan(zns, PF_BRIDGE, RTM_GETVLAN); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_vlan_change, &zns->netlink_cmd, + &dp_info, 0, 1); + + return ret; +} + +#endif /* GNU_LINUX */ diff --git a/zebra/if_netlink.h b/zebra/if_netlink.h new file mode 100644 index 0000000..9b31906 --- /dev/null +++ b/zebra/if_netlink.h @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Header file exported by if_netlink.c to zebra. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_IF_NETLINK_H +#define _ZEBRA_IF_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +extern int netlink_interface_addr(struct nlmsghdr *h, ns_id_t ns_id, + int startup); + +/* + * Parse an incoming interface address change message, generate a dplane + * context object for processing. + */ +int netlink_interface_addr_dplane(struct nlmsghdr *h, ns_id_t ns_id, + int startup); + +extern int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); +extern int interface_lookup_netlink(struct zebra_ns *zns); + +extern int netlink_vlan_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); +extern int netlink_vlan_read(struct zebra_ns *zns); + +extern ssize_t netlink_intf_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen); +extern enum netlink_msg_status +netlink_put_gre_set_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +extern enum netlink_msg_status +netlink_put_address_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); + +extern int netlink_tunneldump_read(struct zebra_ns *zns); +extern enum netlink_msg_status +netlink_put_intf_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_IF_NETLINK_H */ diff --git a/zebra/if_socket.c b/zebra/if_socket.c new file mode 100644 index 0000000..1172f8b --- /dev/null +++ b/zebra/if_socket.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Interface interaction with the kernel using socket. + * Copyright (C) 2022 NVIDIA CORPORATION & AFFILIATES + * Stephen Worley + */ + +#include + +#ifndef HAVE_NETLINK + +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_intf_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +enum zebra_dplane_result +kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx) +{ + const char *ifname = dplane_ctx_get_ifname(ctx); + enum dplane_netconf_status_e mpls_on = dplane_ctx_get_netconf_mpls(ctx); + + zlog_warn("%s: Unable to set kernel mpls state for interface %s(%d)", + __func__, ifname, mpls_on); + + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} +#endif diff --git a/zebra/if_sysctl.c b/zebra/if_sysctl.c new file mode 100644 index 0000000..ae29268 --- /dev/null +++ b/zebra/if_sysctl.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Get interface's address and mask information by sysctl() function. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#if !defined(GNU_LINUX) && !defined(OPEN_BSD) + +#include "if.h" +#include "sockunion.h" +#include "prefix.h" +#include "connected.h" +#include "memory.h" +#include "ioctl.h" +#include "log.h" +#include "interface.h" +#include "vrf.h" + +#include "zebra/rt.h" +#include "zebra/kernel_socket.h" +#include "zebra/rib.h" +#include "zebra/zebra_errors.h" + +void ifstat_update_sysctl(void) +{ + caddr_t ref, buf, end; + size_t bufsiz; + struct if_msghdr *ifm; + struct interface *ifp; + +#define MIBSIZ 6 + int mib[MIBSIZ] = { + CTL_NET, PF_ROUTE, 0, 0, /* AF_INET & AF_INET6 */ + NET_RT_IFLIST, 0}; + + /* Query buffer size. */ + if (sysctl(mib, MIBSIZ, NULL, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl() error by %s", + safe_strerror(errno)); + return; + } + + /* We free this memory at the end of this function. */ + ref = buf = XMALLOC(MTYPE_TMP, bufsiz); + + /* Fetch interface information into allocated buffer. */ + if (sysctl(mib, MIBSIZ, buf, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl error by %s", + safe_strerror(errno)); + XFREE(MTYPE_TMP, ref); + return; + } + + /* Parse both interfaces and addresses. */ + for (end = buf + bufsiz; buf < end; buf += ifm->ifm_msglen) { + ifm = (struct if_msghdr *)buf; + if (ifm->ifm_type == RTM_IFINFO) { + ifp = if_lookup_by_index(ifm->ifm_index, VRF_DEFAULT); + if (ifp) + ifp->stats = ifm->ifm_data; + } + } + + /* Free sysctl buffer. */ + XFREE(MTYPE_TMP, ref); + + return; +} + +/* Interface listing up function using sysctl(). */ +void interface_list(struct zebra_ns *zns) +{ + caddr_t ref, buf, end; + size_t bufsiz; + struct if_msghdr *ifm; + +#define MIBSIZ 6 + int mib[MIBSIZ] = { + CTL_NET, PF_ROUTE, 0, 0, /* AF_INET & AF_INET6 */ + NET_RT_IFLIST, 0}; + + if (zns->ns_id != NS_DEFAULT) { + zlog_debug("%s: ignore NS %u", __func__, zns->ns_id); + return; + } + + /* Query buffer size. */ + if (sysctl(mib, MIBSIZ, NULL, &bufsiz, NULL, 0) < 0) { + flog_err_sys(EC_ZEBRA_IFLIST_FAILED, + "Could not enumerate interfaces: %s", + safe_strerror(errno)); + return; + } + + /* We free this memory at the end of this function. */ + ref = buf = XMALLOC(MTYPE_TMP, bufsiz); + + /* Fetch interface information into allocated buffer. */ + if (sysctl(mib, MIBSIZ, buf, &bufsiz, NULL, 0) < 0) { + flog_err_sys(EC_ZEBRA_IFLIST_FAILED, + "Could not enumerate interfaces: %s", + safe_strerror(errno)); + return; + } + + /* Parse both interfaces and addresses. */ + for (end = buf + bufsiz; buf < end; buf += ifm->ifm_msglen) { + ifm = (struct if_msghdr *)buf; + + switch (ifm->ifm_type) { + case RTM_IFINFO: + ifm_read(ifm); + break; + case RTM_NEWADDR: + ifam_read((struct ifa_msghdr *)ifm); + break; + default: + zlog_info("%s: unexpected message type", __func__); + XFREE(MTYPE_TMP, ref); + return; + break; + } + } + + /* Free sysctl buffer. */ + XFREE(MTYPE_TMP, ref); +} + +#endif /* !defined(GNU_LINUX) && !defined(OPEN_BSD) */ diff --git a/zebra/interface.c b/zebra/interface.c new file mode 100644 index 0000000..1afd9d5 --- /dev/null +++ b/zebra/interface.c @@ -0,0 +1,5731 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Interface function. + * Copyright (C) 1997, 1999 Kunihiro Ishiguro + */ + +#include + +#include "if.h" +#include "lib_errors.h" +#include "vty.h" +#include "sockunion.h" +#include "prefix.h" +#include "command.h" +#include "memory.h" +#include "ioctl.h" +#include "connected.h" +#include "log.h" +#include "zclient.h" +#include "vrf.h" +#include "lib/northbound_cli.h" + +#include "zebra/rtadv.h" +#include "zebra_ns.h" +#include "zebra_vrf.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/irdp.h" +#include "zebra/zebra_ptm.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/interface.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZINFO, "Zebra Interface Information"); + +#define ZEBRA_PTM_SUPPORT + +DEFINE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp), + (vty, ifp)); +DEFINE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp), + (vty, ifp)); + +DEFINE_MTYPE(ZEBRA, ZIF_DESC, "Intf desc"); + +static void if_down_del_nbr_connected(struct interface *ifp); + +static void if_zebra_speed_update(struct event *thread) +{ + struct interface *ifp = EVENT_ARG(thread); + struct zebra_if *zif = ifp->info; + uint32_t new_speed; + bool changed = false; + int error = 0; + + new_speed = kernel_get_speed(ifp, &error); + + /* error may indicate vrf not available or + * interfaces not available. + * note that loopback & virtual interfaces can return 0 as speed + */ + if (error < 0) + return; + + if (new_speed != ifp->speed) { + zlog_info("%s: %s old speed: %u new speed: %u", __func__, + ifp->name, ifp->speed, new_speed); + ifp->speed = new_speed; + if_add_update(ifp); + changed = true; + } + + if (changed || new_speed == UINT32_MAX) { +#define SPEED_UPDATE_SLEEP_TIME 5 +#define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME) + /* + * Some interfaces never actually have an associated speed + * with them ( I am looking at you bridges ). + * So instead of iterating forever, let's give the + * system 4 minutes to try to figure out the speed + * if after that it it's probably never going to become + * useful. + * Since I don't know all the wonderful types of interfaces + * that may come into existence in the future I am going + * to not update the system to keep track of that. This + * is far simpler to just stop trying after 4 minutes + */ + if (new_speed == UINT32_MAX && + zif->speed_update_count == SPEED_UPDATE_COUNT_MAX) + return; + + zif->speed_update_count++; + event_add_timer(zrouter.master, if_zebra_speed_update, ifp, + SPEED_UPDATE_SLEEP_TIME, &zif->speed_update); + event_ignore_late_timer(zif->speed_update); + } +} + +static void zebra_if_node_destroy(route_table_delegate_t *delegate, + struct route_table *table, + struct route_node *node) +{ + if (node->info) + list_delete((struct list **)&node->info); + route_node_destroy(delegate, table, node); +} + +static void zebra_if_nhg_dependents_free(struct zebra_if *zebra_if) +{ + nhg_connected_tree_free(&zebra_if->nhg_dependents); +} + +static void zebra_if_nhg_dependents_init(struct zebra_if *zebra_if) +{ + nhg_connected_tree_init(&zebra_if->nhg_dependents); +} + + +route_table_delegate_t zebra_if_table_delegate = { + .create_node = route_node_create, + .destroy_node = zebra_if_node_destroy}; + +/* Called when new interface is added. */ +static int if_zebra_new_hook(struct interface *ifp) +{ + struct zebra_if *zebra_if; + + zebra_if = XCALLOC(MTYPE_ZINFO, sizeof(struct zebra_if)); + zebra_if->ifp = ifp; + + zebra_if->multicast = IF_ZEBRA_DATA_UNSPEC; + zebra_if->mpls_config = IF_ZEBRA_DATA_UNSPEC; + zebra_if->shutdown = IF_ZEBRA_DATA_OFF; + + zebra_if->link_nsid = NS_UNKNOWN; + + zebra_if_nhg_dependents_init(zebra_if); + + zebra_ptm_if_init(zebra_if); + + ifp->ptm_enable = zebra_ptm_get_enable_state(); + + rtadv_if_init(zebra_if); + + memset(&zebra_if->neigh_mac[0], 0, 6); + + /* Initialize installed address chains tree. */ + zebra_if->ipv4_subnets = + route_table_init_with_delegate(&zebra_if_table_delegate); + + ifp->info = zebra_if; + + /* + * Some platforms are telling us that the interface is + * up and ready to go. When we check the speed we + * sometimes get the wrong value. Wait a couple + * of seconds and ask again. Hopefully it's all settled + * down upon startup. + */ + zebra_if->speed_update_count = 0; + event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 15, + &zebra_if->speed_update); + event_ignore_late_timer(zebra_if->speed_update); + + return 0; +} + +static void if_nhg_dependents_check_valid(struct nhg_hash_entry *nhe) +{ + zebra_nhg_check_valid(nhe); +} + +static void if_down_nhg_dependents(const struct interface *ifp) +{ + struct nhg_connected *rb_node_dep = NULL; + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) + if_nhg_dependents_check_valid(rb_node_dep->nhe); +} + +static void if_nhg_dependents_release(const struct interface *ifp) +{ + struct nhg_connected *rb_node_dep = NULL; + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) { + rb_node_dep->nhe->ifp = NULL; /* Null it out */ + if_nhg_dependents_check_valid(rb_node_dep->nhe); + } +} + +/* Called when interface is deleted. */ +static int if_zebra_delete_hook(struct interface *ifp) +{ + struct zebra_if *zebra_if; + struct zebra_l2info_bond *bond; + + if (ifp->info) { + zebra_if = ifp->info; + + /* If we set protodown, clear our reason now from the kernel */ + if (ZEBRA_IF_IS_PROTODOWN(zebra_if) && zebra_if->protodown_rc && + !ZEBRA_IF_IS_PROTODOWN_ONLY_EXTERNAL(zebra_if)) + zebra_if_update_protodown_rc(ifp, true, + (zebra_if->protodown_rc & + ~ZEBRA_PROTODOWN_ALL)); + + /* Free installed address chains tree. */ + if (zebra_if->ipv4_subnets) + route_table_finish(zebra_if->ipv4_subnets); + + rtadv_if_fini(zebra_if); + + bond = &zebra_if->bond_info; + if (bond && bond->mbr_zifs) + list_delete(&bond->mbr_zifs); + + zebra_l2_bridge_if_cleanup(ifp); + zebra_evpn_if_cleanup(zebra_if); + zebra_evpn_mac_ifp_del(ifp); + + if_nhg_dependents_release(ifp); + zebra_if_nhg_dependents_free(zebra_if); + + XFREE(MTYPE_ZIF_DESC, zebra_if->desc); + + EVENT_OFF(zebra_if->speed_update); + + XFREE(MTYPE_ZINFO, zebra_if); + } + + return 0; +} + +/* Build the table key */ +static void if_build_key(uint32_t ifindex, struct prefix *p) +{ + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + p->u.prefix4.s_addr = ifindex; +} + +/* Link an interface in a per NS interface tree */ +struct interface *if_link_per_ns(struct zebra_ns *ns, struct interface *ifp) +{ + struct prefix p; + struct route_node *rn; + + if (ifp->ifindex == IFINDEX_INTERNAL) + return NULL; + + if_build_key(ifp->ifindex, &p); + rn = route_node_get(ns->if_table, &p); + if (rn->info) { + ifp = (struct interface *)rn->info; + route_unlock_node(rn); /* get */ + return ifp; + } + + rn->info = ifp; + ifp->node = rn; + + return ifp; +} + +/* Delete a VRF. This is called in vrf_terminate(). */ +void if_unlink_per_ns(struct interface *ifp) +{ + if (!ifp->node) + return; + + ifp->node->info = NULL; + route_unlock_node(ifp->node); + ifp->node = NULL; +} + +/* Look up an interface by identifier within a NS */ +struct interface *if_lookup_by_index_per_ns(struct zebra_ns *ns, + uint32_t ifindex) +{ + struct prefix p; + struct route_node *rn; + struct interface *ifp = NULL; + + if_build_key(ifindex, &p); + rn = route_node_lookup(ns->if_table, &p); + if (rn) { + ifp = (struct interface *)rn->info; + route_unlock_node(rn); /* lookup */ + } + return ifp; +} + +/* Look up an interface by name within a NS */ +struct interface *if_lookup_by_name_per_ns(struct zebra_ns *ns, + const char *ifname) +{ + struct route_node *rn; + struct interface *ifp; + + for (rn = route_top(ns->if_table); rn; rn = route_next(rn)) { + ifp = (struct interface *)rn->info; + if (ifp && strcmp(ifp->name, ifname) == 0) { + route_unlock_node(rn); + return (ifp); + } + } + + return NULL; +} + +struct interface *if_lookup_by_index_per_nsid(ns_id_t ns_id, uint32_t ifindex) +{ + struct zebra_ns *zns; + + zns = zebra_ns_lookup(ns_id); + return zns ? if_lookup_by_index_per_ns(zns, ifindex) : NULL; +} + +const char *ifindex2ifname_per_ns(struct zebra_ns *zns, unsigned int ifindex) +{ + struct interface *ifp; + + return ((ifp = if_lookup_by_index_per_ns(zns, ifindex)) != NULL) + ? ifp->name + : "unknown"; +} + +/* Tie an interface address to its derived subnet list of addresses. */ +int if_subnet_add(struct interface *ifp, struct connected *ifc) +{ + struct route_node *rn; + struct zebra_if *zebra_if; + struct prefix cp; + struct list *addr_list; + + assert(ifp && ifp->info && ifc); + zebra_if = ifp->info; + + /* Get address derived subnet node and associated address list, while + marking + address secondary attribute appropriately. */ + cp = *CONNECTED_PREFIX(ifc); + apply_mask(&cp); + rn = route_node_get(zebra_if->ipv4_subnets, &cp); + + if ((addr_list = rn->info)) + SET_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY); + else { + UNSET_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY); + rn->info = addr_list = list_new(); + route_lock_node(rn); + } + + /* Tie address at the tail of address list. */ + listnode_add(addr_list, ifc); + + /* Return list element count. */ + return (addr_list->count); +} + +/* Untie an interface address from its derived subnet list of addresses. */ +int if_subnet_delete(struct interface *ifp, struct connected *ifc) +{ + struct route_node *rn; + struct zebra_if *zebra_if; + struct list *addr_list; + struct prefix cp; + + assert(ifp && ifp->info && ifc); + zebra_if = ifp->info; + + cp = *CONNECTED_PREFIX(ifc); + apply_mask(&cp); + + /* Get address derived subnet node. */ + rn = route_node_lookup(zebra_if->ipv4_subnets, &cp); + if (!(rn && rn->info)) { + flog_warn(EC_ZEBRA_REMOVE_ADDR_UNKNOWN_SUBNET, + "Trying to remove an address from an unknown subnet. (please report this bug)"); + return -1; + } + route_unlock_node(rn); + + /* Untie address from subnet's address list. */ + addr_list = rn->info; + + /* Deleting an address that is not registered is a bug. + * In any case, we shouldn't decrement the lock counter if the address + * is unknown. */ + if (!listnode_lookup(addr_list, ifc)) { + flog_warn( + EC_ZEBRA_REMOVE_UNREGISTERED_ADDR, + "Trying to remove an address from a subnet where it is not currently registered. (please report this bug)"); + return -1; + } + + listnode_delete(addr_list, ifc); + route_unlock_node(rn); + + /* Return list element count, if not empty. */ + if (addr_list->count) { + /* If deleted address is primary, mark subsequent one as such + * and distribute. */ + if (!CHECK_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY)) { + ifc = listgetdata( + (struct listnode *)listhead(addr_list)); + zebra_interface_address_delete_update(ifp, ifc); + UNSET_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY); + /* XXX: Linux kernel removes all the secondary addresses + * when the primary + * address is removed. We could try to work around that, + * though this is + * non-trivial. */ + zebra_interface_address_add_update(ifp, ifc); + } + + return addr_list->count; + } + + /* Otherwise, free list and route node. */ + list_delete(&addr_list); + rn->info = NULL; + route_unlock_node(rn); + + return 0; +} + +/* if_flags_mangle: A place for hacks that require mangling + * or tweaking the interface flags. + * + * ******************** Solaris flags hacks ************************** + * + * Solaris IFF_UP flag reflects only the primary interface as the + * routing socket only sends IFINFO for the primary interface. Hence + * ~IFF_UP does not per se imply all the logical interfaces are also + * down - which we only know of as addresses. Instead we must determine + * whether the interface really is up or not according to how many + * addresses are still attached. (Solaris always sends RTM_DELADDR if + * an interface, logical or not, goes ~IFF_UP). + * + * Ie, we mangle IFF_UP to *additionally* reflect whether or not there + * are addresses left in struct connected, not just the actual underlying + * IFF_UP flag. + * + * We must hence remember the real state of IFF_UP, which we do in + * struct zebra_if.primary_state. + * + * Setting IFF_UP within zebra to administratively shutdown the + * interface will affect only the primary interface/address on Solaris. + ************************End Solaris flags hacks *********************** + */ +static void if_flags_mangle(struct interface *ifp, uint64_t *newflags) +{ + return; +} + +/* Update the flags field of the ifp with the new flag set provided. + * Take whatever actions are required for any changes in flags we care + * about. + * + * newflags should be the raw value, as obtained from the OS. + */ +void if_flags_update(struct interface *ifp, uint64_t newflags) +{ + if_flags_mangle(ifp, &newflags); + + if (if_is_no_ptm_operative(ifp)) { + /* operative -> inoperative? */ + ifp->flags = newflags; + if (!if_is_operative(ifp)) + if_down(ifp); + } else { + /* inoperative -> operative? */ + ifp->flags = newflags; + if (if_is_operative(ifp)) + if_up(ifp, true); + } +} + +/* Wake up configured address if it is not in current kernel + address. */ +void if_addr_wakeup(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct connected *ifc; + struct prefix *p; + enum zebra_dplane_result dplane_res; + + for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { + p = ifc->address; + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED) + && !CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED)) { + /* Address check. */ + if (p->family == AF_INET) { + if (!if_is_up(ifp)) { + /* Assume zebra is configured like + * following: + * + * interface gre0 + * ip addr 192.0.2.1/24 + * ! + * + * As soon as zebra becomes first aware + * that gre0 exists in the + * kernel, it will set gre0 up and + * configure its addresses. + * + * (This may happen at startup when the + * interface already exists + * or during runtime when the interface + * is added to the kernel) + * + * XXX: IRDP code is calling here via + * if_add_update - this seems + * somewhat weird. + * XXX: RUNNING is not a settable flag + * on any system + * I (paulj) am aware of. + */ + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == + ZEBRA_DPLANE_REQUEST_FAILURE) { + flog_err_sys( + EC_ZEBRA_IFACE_ADDR_ADD_FAILED, + "Can't set interface's address: %s", + dplane_res2str(dplane_res)); + continue; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra + * clients when the notification + * from the kernel has been received. + * It will also be added to the interface's + * subnet list then. */ + } + if (p->family == AF_INET6) { + if (!if_is_up(ifp)) { + /* See long comment above */ + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == + ZEBRA_DPLANE_REQUEST_FAILURE) { + flog_err_sys( + EC_ZEBRA_IFACE_ADDR_ADD_FAILED, + "Can't set interface's address: %s", + dplane_res2str(dplane_res)); + continue; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra + * clients when the notification + * from the kernel has been received. */ + } + } + } +} + +/* Handle interface addition */ +void if_add_update(struct interface *ifp) +{ + struct zebra_if *if_data; + struct zebra_ns *zns; + struct zebra_vrf *zvrf = ifp->vrf->info; + + /* case interface populate before vrf enabled */ + if (zvrf->zns) + zns = zvrf->zns; + else + zns = zebra_ns_lookup(NS_DEFAULT); + if_link_per_ns(zns, ifp); + if_data = ifp->info; + assert(if_data); + + if (if_data->multicast == IF_ZEBRA_DATA_ON) + if_set_flags(ifp, IFF_MULTICAST); + else if (if_data->multicast == IF_ZEBRA_DATA_OFF) + if_unset_flags(ifp, IFF_MULTICAST); + + zebra_ptm_if_set_ptm_state(ifp, if_data); + + zebra_interface_add_update(ifp); + + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + SET_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE); + + if (if_data->shutdown == IF_ZEBRA_DATA_ON) { + if (IS_ZEBRA_DEBUG_KERNEL) { + zlog_debug( + "interface %s vrf %s(%u) index %d is shutdown. Won't wake it up.", + ifp->name, ifp->vrf->name, + ifp->vrf->vrf_id, ifp->ifindex); + } + + return; + } + + if_addr_wakeup(ifp); + + if (if_data->mpls_config == IF_ZEBRA_DATA_ON) + dplane_intf_mpls_modify_state(ifp, true); + else if (if_data->mpls_config == IF_ZEBRA_DATA_OFF) + dplane_intf_mpls_modify_state(ifp, false); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "interface %s vrf %s(%u) index %d becomes active.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + + } else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s vrf %s(%u) index %d is added.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + } +} + +/* Install connected routes corresponding to an interface. */ +static void if_install_connected(struct interface *ifp) +{ + struct listnode *node; + struct listnode *next; + struct connected *ifc; + + if (ifp->connected) { + for (ALL_LIST_ELEMENTS(ifp->connected, node, next, ifc)) { + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + zebra_interface_address_add_update(ifp, ifc); + + connected_up(ifp, ifc); + } + } +} + +/* Uninstall connected routes corresponding to an interface. */ +static void if_uninstall_connected(struct interface *ifp) +{ + struct listnode *node; + struct listnode *next; + struct connected *ifc; + + if (ifp->connected) { + for (ALL_LIST_ELEMENTS(ifp->connected, node, next, ifc)) { + zebra_interface_address_delete_update(ifp, ifc); + connected_down(ifp, ifc); + } + } +} + +/* Uninstall and delete connected routes corresponding to an interface. */ +/* TODO - Check why IPv4 handling here is different from install or if_down */ +static void if_delete_connected(struct interface *ifp) +{ + struct connected *ifc; + struct prefix cp; + struct route_node *rn; + struct zebra_if *zebra_if; + struct listnode *node; + struct listnode *last = NULL; + + zebra_if = ifp->info; + + if (!ifp->connected) + return; + + while ((node = (last ? last->next : listhead(ifp->connected)))) { + ifc = listgetdata(node); + + cp = *CONNECTED_PREFIX(ifc); + apply_mask(&cp); + + if (cp.family == AF_INET + && (rn = route_node_lookup(zebra_if->ipv4_subnets, &cp))) { + struct listnode *anode; + struct listnode *next; + struct listnode *first; + struct list *addr_list; + + route_unlock_node(rn); + addr_list = (struct list *)rn->info; + + /* Remove addresses, secondaries first. */ + first = listhead(addr_list); + if (first) + for (anode = first->next; anode || first; + anode = next) { + if (!anode) { + anode = first; + first = NULL; + } + next = anode->next; + + ifc = listgetdata(anode); + connected_down(ifp, ifc); + + /* XXX: We have to send notifications + * here explicitly, because we destroy + * the ifc before receiving the + * notification about the address being + * deleted. + */ + zebra_interface_address_delete_update( + ifp, ifc); + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + /* Remove from subnet chain. */ + list_delete_node(addr_list, anode); + route_unlock_node(rn); + + /* Remove from interface address list + * (unconditionally). */ + if (!CHECK_FLAG(ifc->conf, + ZEBRA_IFC_CONFIGURED)) { + listnode_delete(ifp->connected, + ifc); + connected_free(&ifc); + } else + last = node; + } + + /* Free chain list and respective route node. */ + list_delete(&addr_list); + rn->info = NULL; + route_unlock_node(rn); + } else if (cp.family == AF_INET6) { + connected_down(ifp, ifc); + + zebra_interface_address_delete_update(ifp, ifc); + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + last = node; + else { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + } + } else { + last = node; + } + } +} + +/* Handle an interface delete event */ +void if_delete_update(struct interface **pifp) +{ + struct zebra_if *zif; + struct interface *ifp = *pifp; + + if (if_is_up(ifp)) { + flog_err( + EC_LIB_INTERFACE, + "interface %s vrf %s(%u) index %d is still up while being deleted.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + return; + } + + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) + return; + + /* Mark interface as inactive */ + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s vrf %s(%u) index %d is now inactive.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + + /* Delete connected routes from the kernel. */ + if_delete_connected(ifp); + + /* if the ifp is in a vrf, move it to default so vrf can be deleted if + * desired. This operation is not done for netns implementation to avoid + * collision with interface with the same name in the default vrf (can + * occur with this implementation whereas it is not possible with + * vrf-lite). + */ + if (ifp->vrf->vrf_id && !vrf_is_backend_netns()) + if_handle_vrf_change(ifp, VRF_DEFAULT); + + /* Send out notification on interface delete. */ + zebra_interface_delete_update(ifp); + + if_unlink_per_ns(ifp); + + /* Update ifindex after distributing the delete message. This is in + case any client needs to have the old value of ifindex available + while processing the deletion. Each client daemon is responsible + for setting ifindex to IFINDEX_INTERNAL after processing the + interface deletion message. */ + if_set_index(ifp, IFINDEX_INTERNAL); + ifp->node = NULL; + + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + + /* Reset some zebra interface params to default values. */ + zif = ifp->info; + if (zif) { + zebra_evpn_if_cleanup(zif); + zif->zif_type = ZEBRA_IF_OTHER; + zif->zif_slave_type = ZEBRA_IF_SLAVE_NONE; + memset(&zif->l2info, 0, sizeof(union zebra_l2if_info)); + memset(&zif->brslave_info, 0, + sizeof(struct zebra_l2info_brslave)); + zebra_evpn_mac_ifp_del(ifp); + } + + if (!ifp->configured) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s is being deleted from the system", + ifp->name); + if_delete(pifp); + } +} + +/* VRF change for an interface */ +void if_handle_vrf_change(struct interface *ifp, vrf_id_t vrf_id) +{ + vrf_id_t old_vrf_id; + + old_vrf_id = ifp->vrf->vrf_id; + + /* Uninstall connected routes. */ + if_uninstall_connected(ifp); + + /* Delete any IPv4 neighbors created to implement RFC 5549 */ + if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(ifp); + + /* Delete all neighbor addresses learnt through IPv6 RA */ + if_down_del_nbr_connected(ifp); + + /* Send out notification on interface VRF change. */ + /* This is to issue a DELETE, as appropriate. */ + zebra_interface_vrf_update_del(ifp, vrf_id); + + if (if_is_vrf(ifp)) + return; + + /* update VRF */ + if_update_to_new_vrf(ifp, vrf_id); + + /* Send out notification on interface VRF change. */ + /* This is to issue an ADD, if needed. */ + zebra_interface_vrf_update_add(ifp, old_vrf_id); +} + +static void ipv6_ll_address_to_mac(struct in6_addr *address, uint8_t *mac) +{ + mac[0] = address->s6_addr[8] ^ 0x02; + mac[1] = address->s6_addr[9]; + mac[2] = address->s6_addr[10]; + mac[3] = address->s6_addr[13]; + mac[4] = address->s6_addr[14]; + mac[5] = address->s6_addr[15]; +} + +void if_nbr_mac_to_ipv4ll_neigh_update(struct interface *ifp, + char mac[6], + struct in6_addr *address, + int add) +{ + struct zebra_vrf *zvrf = ifp->vrf->info; + struct zebra_if *zif = ifp->info; + char buf[16] = "169.254.0.1"; + struct in_addr ipv4_ll; + ns_id_t ns_id; + + inet_pton(AF_INET, buf, &ipv4_ll); + + ns_id = zvrf->zns->ns_id; + + /* + * Remove and re-add any existing neighbor entry for this address, + * since Netlink doesn't currently offer update message types. + */ + kernel_neigh_update(0, ifp->ifindex, (void *)&ipv4_ll.s_addr, mac, 6, + ns_id, AF_INET, true); + + /* Add new neighbor entry. + * + * We force installation even if current neighbor entry is the same. + * Since this function is used to refresh our MAC entries after an + * interface flap, if we don't force in our custom entries with their + * state set to PERMANENT or REACHABLE then the kernel will attempt to + * resolve our leftover entries, fail, mark them unreachable and then + * they'll be useless to us. + */ + if (add) + kernel_neigh_update(add, ifp->ifindex, (void *)&ipv4_ll.s_addr, + mac, 6, ns_id, AF_INET, true); + + memcpy(&zif->neigh_mac[0], &mac[0], 6); + + /* + * We need to note whether or not we originated a v6 + * neighbor entry for this interface. So that when + * someone unwisely accidentally deletes this entry + * we can shove it back in. + */ + zif->v6_2_v4_ll_neigh_entry = !!add; + memcpy(&zif->v6_2_v4_ll_addr6, address, sizeof(*address)); + + zvrf->neigh_updates++; +} + +void if_nbr_ipv6ll_to_ipv4ll_neigh_update(struct interface *ifp, + struct in6_addr *address, int add) +{ + + char mac[6]; + + ipv6_ll_address_to_mac(address, (uint8_t *)mac); + if_nbr_mac_to_ipv4ll_neigh_update(ifp, mac, address, add); +} + +static void if_nbr_ipv6ll_to_ipv4ll_neigh_add_all(struct interface *ifp) +{ + if (listhead(ifp->nbr_connected)) { + struct nbr_connected *nbr_connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, + nbr_connected)) + if_nbr_ipv6ll_to_ipv4ll_neigh_update( + ifp, &nbr_connected->address->u.prefix6, 1); + } +} + +void if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(struct interface *ifp) +{ + if (listhead(ifp->nbr_connected)) { + struct nbr_connected *nbr_connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, + nbr_connected)) + if_nbr_ipv6ll_to_ipv4ll_neigh_update( + ifp, &nbr_connected->address->u.prefix6, 0); + } +} + +static void if_down_del_nbr_connected(struct interface *ifp) +{ + struct nbr_connected *nbr_connected; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(ifp->nbr_connected, node, nnode, + nbr_connected)) { + listnode_delete(ifp->nbr_connected, nbr_connected); + nbr_connected_free(nbr_connected); + } +} + +void if_nhg_dependents_add(struct interface *ifp, struct nhg_hash_entry *nhe) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + nhg_connected_tree_add_nhe(&zif->nhg_dependents, nhe); + } +} + +void if_nhg_dependents_del(struct interface *ifp, struct nhg_hash_entry *nhe) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + nhg_connected_tree_del_nhe(&zif->nhg_dependents, nhe); + } +} + +unsigned int if_nhg_dependents_count(const struct interface *ifp) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + return nhg_connected_tree_count(&zif->nhg_dependents); + } + + return 0; +} + + +bool if_nhg_dependents_is_empty(const struct interface *ifp) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + return nhg_connected_tree_is_empty(&zif->nhg_dependents); + } + + return false; +} + +/* Interface is up. */ +void if_up(struct interface *ifp, bool install_connected) +{ + struct zebra_if *zif; + struct interface *link_if; + + zif = ifp->info; + zif->up_count++; + frr_timestamp(2, zif->up_last, sizeof(zif->up_last)); + + /* Notify the protocol daemons. */ + if (ifp->ptm_enable && (ifp->ptm_status == ZEBRA_PTM_STATUS_DOWN)) { + flog_warn(EC_ZEBRA_PTM_NOT_READY, + "%s: interface %s hasn't passed ptm check", + __func__, ifp->name); + return; + } + zebra_interface_up_update(ifp); + + if_nbr_ipv6ll_to_ipv4ll_neigh_add_all(ifp); + + rtadv_if_up(zif); + + /* Install connected routes to the kernel. */ + if (install_connected) + if_install_connected(ifp); + + /* Handle interface up for specific types for EVPN. Non-VxLAN interfaces + * are checked to see if (remote) neighbor entries need to be installed + * on them for ARP suppression. + */ + if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_vxlan_if_up(ifp); + else if (IS_ZEBRA_IF_BRIDGE(ifp)) { + link_if = ifp; + zebra_vxlan_svi_up(ifp, link_if); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + link_if = zif->link; + if (link_if) + zebra_vxlan_svi_up(ifp, link_if); + } else if (IS_ZEBRA_IF_MACVLAN(ifp)) { + zebra_vxlan_macvlan_up(ifp); + } + + if (zif->es_info.es) + zebra_evpn_es_if_oper_state_change(zif, true /*up*/); + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + zebra_evpn_mh_uplink_oper_update(zif); + + event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 0, + &zif->speed_update); + event_ignore_late_timer(zif->speed_update); +} + +/* Interface goes down. We have to manage different behavior of based + OS. */ +void if_down(struct interface *ifp) +{ + struct zebra_if *zif; + struct interface *link_if; + + zif = ifp->info; + zif->down_count++; + frr_timestamp(2, zif->down_last, sizeof(zif->down_last)); + + if_down_nhg_dependents(ifp); + + /* Handle interface down for specific types for EVPN. Non-VxLAN + * interfaces + * are checked to see if (remote) neighbor entries need to be purged + * for ARP suppression. + */ + if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_vxlan_if_down(ifp); + else if (IS_ZEBRA_IF_BRIDGE(ifp)) { + link_if = ifp; + zebra_vxlan_svi_down(ifp, link_if); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + link_if = zif->link; + if (link_if) + zebra_vxlan_svi_down(ifp, link_if); + } else if (IS_ZEBRA_IF_MACVLAN(ifp)) { + zebra_vxlan_macvlan_down(ifp); + } + + if (zif->es_info.es) + zebra_evpn_es_if_oper_state_change(zif, false /*up*/); + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + zebra_evpn_mh_uplink_oper_update(zif); + + /* Notify to the protocol daemons. */ + zebra_interface_down_update(ifp); + + /* Uninstall connected routes from the kernel. */ + if_uninstall_connected(ifp); + + if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(ifp); + + /* Delete all neighbor addresses learnt through IPv6 RA */ + if_down_del_nbr_connected(ifp); +} + +void if_refresh(struct interface *ifp) +{ +#ifndef GNU_LINUX + if_get_flags(ifp); +#endif +} + +void zebra_if_update_link(struct interface *ifp, ifindex_t link_ifindex, + ns_id_t ns_id) +{ + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + zif->link_nsid = ns_id; + zif->link_ifindex = link_ifindex; + zif->link = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + link_ifindex); +} + +/* + * during initial link dump kernel does not order lower devices before + * upper devices so we need to fixup link dependencies at the end of dump + */ +void zebra_if_update_all_links(struct zebra_ns *zns) +{ + struct route_node *rn; + struct interface *ifp; + struct zebra_if *zif; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_info("fixup link dependencies"); + + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + ifp = (struct interface *)rn->info; + if (!ifp) + continue; + zif = ifp->info; + /* update bond-member to bond linkages */ + if ((IS_ZEBRA_IF_BOND_SLAVE(ifp)) + && (zif->bondslave_info.bond_ifindex != IFINDEX_INTERNAL) + && !zif->bondslave_info.bond_if) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("bond mbr %s map to bond %d", + zif->ifp->name, + zif->bondslave_info.bond_ifindex); + zebra_l2_map_slave_to_bond(zif, ifp->vrf->vrf_id); + } + + /* update SVI linkages */ + if ((zif->link_ifindex != IFINDEX_INTERNAL) && !zif->link) { + zif->link = if_lookup_by_index_per_nsid( + zif->link_nsid, zif->link_ifindex); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s/%d's lower fixup to %s/%d", + ifp->name, ifp->ifindex, + zif->link?zif->link->name:"unk", + zif->link_ifindex); + } + + /* Update VLAN<=>SVI map */ + if (IS_ZEBRA_IF_VLAN(ifp)) + zebra_evpn_acc_bd_svi_set(zif, NULL, + !!if_is_operative(ifp)); + } +} + +static bool if_ignore_set_protodown(const struct interface *ifp, bool new_down, + uint32_t new_protodown_rc) +{ + struct zebra_if *zif; + bool old_down, old_set_down, old_unset_down; + + zif = ifp->info; + + /* + * FRR does not have enough data to make this request + */ + if (ifp->ifindex == IFINDEX_INTERNAL) + return true; + + /* Current state as we know it */ + old_down = !!(ZEBRA_IF_IS_PROTODOWN(zif)); + old_set_down = !!CHECK_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + old_unset_down = !!CHECK_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); + + if (new_protodown_rc == zif->protodown_rc) { + /* Early return if already down & reason bitfield matches */ + if (new_down == old_down) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Ignoring request to set protodown %s for interface %s (%u): protodown %s is already set (reason bitfield: old 0x%x new 0x%x)", + new_down ? "on" : "off", ifp->name, + ifp->ifindex, new_down ? "on" : "off", + zif->protodown_rc, new_protodown_rc); + + return true; + } + + /* Early return if already set queued & reason bitfield matches + */ + if (new_down && old_set_down) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Ignoring request to set protodown %s for interface %s (%u): protodown %s is already queued to dplane (reason bitfield: old 0x%x new 0x%x)", + new_down ? "on" : "off", ifp->name, + ifp->ifindex, new_down ? "on" : "off", + zif->protodown_rc, new_protodown_rc); + + return true; + } + + /* Early return if already unset queued & reason bitfield + * matches */ + if (!new_down && old_unset_down) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Ignoring request to set protodown %s for interface %s (%u): protodown %s is already queued to dplane (reason bitfield: old 0x%x new 0x%x)", + new_down ? "on" : "off", ifp->name, + ifp->ifindex, new_down ? "on" : "off", + zif->protodown_rc, new_protodown_rc); + + return true; + } + } + + return false; +} + +int zebra_if_update_protodown_rc(struct interface *ifp, bool new_down, + uint32_t new_protodown_rc) +{ + struct zebra_if *zif; + + zif = ifp->info; + + /* Check if we already have this state or it's queued */ + if (if_ignore_set_protodown(ifp, new_down, new_protodown_rc)) + return 1; + + zlog_info( + "Setting protodown %s - interface %s (%u): reason bitfield change from 0x%x --> 0x%x", + new_down ? "on" : "off", ifp->name, ifp->ifindex, + zif->protodown_rc, new_protodown_rc); + + zif->protodown_rc = new_protodown_rc; + + if (new_down) + SET_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + else + SET_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); + +#ifdef HAVE_NETLINK + dplane_intf_update(ifp); +#else + zlog_warn("Protodown is not supported on this platform"); +#endif + return 0; +} + +int zebra_if_set_protodown(struct interface *ifp, bool new_down, + enum protodown_reasons new_reason) +{ + struct zebra_if *zif; + uint32_t new_protodown_rc; + + zif = ifp->info; + + if (new_down) + new_protodown_rc = zif->protodown_rc | new_reason; + else + new_protodown_rc = zif->protodown_rc & ~new_reason; + + return zebra_if_update_protodown_rc(ifp, new_down, new_protodown_rc); +} + +/* + * Handle an interface events based on info in a dplane context object. + * This runs in the main pthread, using the info in the context object to + * modify an interface. + */ +static void zebra_if_addr_update_ctx(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + uint8_t flags = 0; + const char *label = NULL; + uint32_t metric = METRIC_MAX; + const struct prefix *addr, *dest = NULL; + enum dplane_op_e op; + + if (!ifp) + return; + + op = dplane_ctx_get_op(ctx); + addr = dplane_ctx_get_intf_addr(ctx); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: ifindex %s(%u), addr %pFX", __func__, + dplane_op2str(dplane_ctx_get_op(ctx)), ifp->name, + ifp->ifindex, addr); + + /* Is there a peer or broadcast address? */ + dest = dplane_ctx_get_intf_dest(ctx); + if (dest->prefixlen == 0) + dest = NULL; + + if (dplane_ctx_intf_is_connected(ctx)) + SET_FLAG(flags, ZEBRA_IFA_PEER); + + /* Flags. */ + if (dplane_ctx_intf_is_secondary(ctx)) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* Label? */ + if (dplane_ctx_intf_has_label(ctx)) + label = dplane_ctx_get_intf_label(ctx); + + if (label && strcmp(ifp->name, label) == 0) + label = NULL; + + metric = dplane_ctx_get_intf_metric(ctx); + + /* Register interface address to the interface. */ + if (addr->family == AF_INET) { + if (op == DPLANE_OP_INTF_ADDR_ADD) + connected_add_ipv4( + ifp, flags, &addr->u.prefix4, addr->prefixlen, + dest ? &dest->u.prefix4 : NULL, label, metric); + else if (CHECK_FLAG(flags, ZEBRA_IFA_PEER)) { + /* Delete with a peer address */ + connected_delete_ipv4(ifp, flags, &addr->u.prefix4, + addr->prefixlen, + &dest->u.prefix4); + } else + connected_delete_ipv4(ifp, flags, &addr->u.prefix4, + addr->prefixlen, NULL); + } + + if (addr->family == AF_INET6) { + if (op == DPLANE_OP_INTF_ADDR_ADD) { + connected_add_ipv6(ifp, flags, &addr->u.prefix6, + dest ? &dest->u.prefix6 : NULL, + addr->prefixlen, label, metric); + } else + connected_delete_ipv6(ifp, &addr->u.prefix6, NULL, + addr->prefixlen); + } + + /* + * Linux kernel does not send route delete on interface down/addr del + * so we have to re-process routes it owns (i.e. kernel routes) + */ + if (op != DPLANE_OP_INTF_ADDR_ADD) + rib_update(RIB_UPDATE_KERNEL); +} + +static void zebra_if_update_ctx(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + enum zebra_dplane_result dp_res; + struct zebra_if *zif; + bool pd_reason_val; + bool down; + + if (!ifp) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: Can't find ifp", __func__); + + return; + } + + dp_res = dplane_ctx_get_status(ctx); + pd_reason_val = dplane_ctx_get_intf_pd_reason_val(ctx); + down = dplane_ctx_intf_is_protodown(ctx); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: if %s(%u) ctx-protodown %s ctx-reason %d", + __func__, dplane_op2str(dplane_ctx_get_op(ctx)), + ifp->name, ifp->ifindex, down ? "on" : "off", + pd_reason_val); + + zif = ifp->info; + if (!zif) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: if %s(%u) zebra info pointer is NULL", + __func__, ifp->name, ifp->ifindex); + return; + } + + if (dp_res != ZEBRA_DPLANE_REQUEST_SUCCESS) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: if %s(%u) dplane update failed", + __func__, ifp->name, ifp->ifindex); + goto done; + } + + /* Update our info */ + COND_FLAG(zif->flags, ZIF_FLAG_PROTODOWN, down); + +done: + /* Clear our dplane flags */ + UNSET_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + UNSET_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); +} + +/* + * Handle netconf change from a dplane context object; runs in the main + * pthread so it can update zebra data structs. + */ +static void zebra_if_netconf_update_ctx(struct zebra_dplane_ctx *ctx, + struct interface *ifp, + ifindex_t ifindex) +{ + struct zebra_if *zif = NULL; + afi_t afi; + enum dplane_netconf_status_e mpls, mcast_on, linkdown; + bool *mcast_set, *linkdown_set; + + if (!ifp && ifindex != -1 && ifindex != -2) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: Can't find ifp(%u)", __func__, ifindex); + + return; + } + + afi = dplane_ctx_get_afi(ctx); + mpls = dplane_ctx_get_netconf_mpls(ctx); + linkdown = dplane_ctx_get_netconf_linkdown(ctx); + mcast_on = dplane_ctx_get_netconf_mcast(ctx); + + if (ifindex == DPLANE_NETCONF_IFINDEX_ALL) { + if (afi == AFI_IP) { + mcast_set = &zrouter.all_mc_forwardingv4; + linkdown_set = &zrouter.all_linkdownv4; + } else { + mcast_set = &zrouter.all_mc_forwardingv6; + linkdown_set = &zrouter.all_linkdownv6; + } + } else if (ifindex == DPLANE_NETCONF_IFINDEX_DEFAULT) { + if (afi == AFI_IP) { + mcast_set = &zrouter.default_mc_forwardingv4; + linkdown_set = &zrouter.default_linkdownv4; + } else { + mcast_set = &zrouter.default_mc_forwardingv6; + linkdown_set = &zrouter.default_linkdownv6; + } + } else { + zif = ifp->info; + if (!zif) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: if %s(%u) zebra info pointer is NULL", + __func__, ifp ? ifp->name : "(null)", + ifp ? ifp->ifindex : ifindex); + return; + } + if (afi == AFI_IP) { + mcast_set = &zif->v4mcast_on; + linkdown_set = &zif->linkdown; + } else { + mcast_set = &zif->v6mcast_on; + linkdown_set = &zif->linkdownv6; + } + + /* + * mpls netconf data is neither v4 or v6 it's AF_MPLS! + */ + if (mpls == DPLANE_NETCONF_STATUS_ENABLED) { + zif->mpls = true; + zebra_mpls_turned_on(); + } else if (mpls == DPLANE_NETCONF_STATUS_DISABLED) + zif->mpls = false; + } + + if (linkdown == DPLANE_NETCONF_STATUS_ENABLED) + *linkdown_set = true; + else if (linkdown == DPLANE_NETCONF_STATUS_DISABLED) + *linkdown_set = false; + + if (mcast_on == DPLANE_NETCONF_STATUS_ENABLED) + *mcast_set = true; + else if (mcast_on == DPLANE_NETCONF_STATUS_DISABLED) + *mcast_set = false; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: afi: %d if %s, ifindex %d, mpls %s mc_forwarding: %s linkdown %s", + __func__, afi, ifp ? ifp->name : "Global", + ifp ? ifp->ifindex : ifindex, + (zif ? (zif->mpls ? "ON" : "OFF") : "OFF"), + (*mcast_set ? "ON" : "OFF"), + (*linkdown_set ? "ON" : "OFF")); +} + +static void interface_vrf_change(enum dplane_op_e op, ifindex_t ifindex, + const char *name, uint32_t tableid, + ns_id_t ns_id) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf = NULL; + + if (op == DPLANE_OP_INTF_DELETE) { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("DPLANE_OP_INTF_DELETE for VRF %s(%u)", name, + ifindex); + + vrf = vrf_lookup_by_id((vrf_id_t)ifindex); + if (!vrf) { + flog_warn(EC_ZEBRA_VRF_NOT_FOUND, + "%s(%u): vrf not found", name, ifindex); + return; + } + + vrf_delete(vrf); + } else { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug( + "DPLANE_OP_INTF_UPDATE for VRF %s(%u) table %u", + name, ifindex, tableid); + + if (!vrf_lookup_by_id((vrf_id_t)ifindex)) { + vrf_id_t exist_id; + + exist_id = zebra_vrf_lookup_by_table(tableid, ns_id); + if (exist_id != VRF_DEFAULT) { + vrf = vrf_lookup_by_id(exist_id); + + if (vrf) + flog_err(EC_ZEBRA_VRF_MISCONFIGURED, + "VRF %s id %u table id overlaps existing vrf %s(%d), misconfiguration exiting", + name, ifindex, vrf->name, + vrf->vrf_id); + else + flog_err(EC_ZEBRA_VRF_NOT_FOUND, + "VRF %s id %u does not exist", + name, ifindex); + + exit(-1); + } + } + + vrf = vrf_update((vrf_id_t)ifindex, name); + if (!vrf) { + flog_err(EC_LIB_INTERFACE, "VRF %s id %u not created", + name, ifindex); + return; + } + + /* + * This is the only place that we get the actual kernel table_id + * being used. We need it to set the table_id of the routes + * we are passing to the kernel.... And to throw some totally + * awesome parties. that too. + * + * At this point we *must* have a zvrf because the vrf_create + * callback creates one. We *must* set the table id + * before the vrf_enable because of( at the very least ) + * static routes being delayed for installation until + * during the vrf_enable callbacks. + */ + zvrf = (struct zebra_vrf *)vrf->info; + zvrf->table_id = tableid; + + /* Enable the created VRF. */ + if (!vrf_enable(vrf)) { + flog_err(EC_LIB_INTERFACE, + "Failed to enable VRF %s id %u", name, + ifindex); + return; + } + } +} + +/* + * Note: on netlink systems, there should be a 1-to-1 mapping + * between interface names and ifindex values. + */ +static void set_ifindex(struct interface *ifp, ifindex_t ifi_index, + struct zebra_ns *zns) +{ + struct interface *oifp; + + oifp = if_lookup_by_index_per_ns(zns, ifi_index); + if ((oifp != NULL) && (oifp != ifp)) { + if (ifi_index == IFINDEX_INTERNAL) + flog_err( + EC_LIB_INTERFACE, + "Netlink is setting interface %s ifindex to reserved internal value %u", + ifp->name, ifi_index); + else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "interface index %d was renamed from %s to %s", + ifi_index, oifp->name, ifp->name); + if (if_is_up(oifp)) + flog_err( + EC_LIB_INTERFACE, + "interface rename detected on up interface: index %d was renamed from %s to %s, results are uncertain!", + ifi_index, oifp->name, ifp->name); + if_delete_update(&oifp); + } + } + if_set_index(ifp, ifi_index); +} + +static inline void zebra_if_set_ziftype(struct interface *ifp, + enum zebra_iftype zif_type, + enum zebra_slave_iftype zif_slave_type) +{ + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + zif->zif_slave_type = zif_slave_type; + + if (zif->zif_type != zif_type) { + zif->zif_type = zif_type; + /* If the if_type has been set to bond initialize ES info + * against it. XXX - note that we don't handle the case where + * a zif changes from bond to non-bond; it is really + * an unexpected/error condition. + */ + zebra_evpn_if_init(zif); + } +} + +static void interface_update_hw_addr(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + int i; + + ifp->hw_addr_len = dplane_ctx_get_ifp_hw_addr_len(ctx); + memcpy(ifp->hw_addr, dplane_ctx_get_ifp_hw_addr(ctx), ifp->hw_addr_len); + + for (i = 0; i < ifp->hw_addr_len; i++) + if (ifp->hw_addr[i] != 0) + break; + + if (i == ifp->hw_addr_len) + ifp->hw_addr_len = 0; +} + +static void interface_update_l2info(struct zebra_dplane_ctx *ctx, + struct interface *ifp, + enum zebra_iftype zif_type, int add, + ns_id_t link_nsid) +{ + const struct zebra_l2info_vxlan *vxlan_info; + const struct zebra_l2info_gre *gre_info; + + switch (zif_type) { + case ZEBRA_IF_BRIDGE: + zebra_l2_bridge_add_update(ifp, + dplane_ctx_get_ifp_bridge_info(ctx)); + break; + case ZEBRA_IF_VLAN: + zebra_l2_vlanif_update(ifp, dplane_ctx_get_ifp_vlan_info(ctx)); + zebra_evpn_acc_bd_svi_set(ifp->info, NULL, + !!if_is_operative(ifp)); + break; + case ZEBRA_IF_VXLAN: + vxlan_info = dplane_ctx_get_ifp_vxlan_info(ctx); + zebra_l2_vxlanif_add_update(ifp, vxlan_info, add); + if (link_nsid != NS_UNKNOWN && vxlan_info->ifindex_link) + zebra_if_update_link(ifp, vxlan_info->ifindex_link, + link_nsid); + break; + case ZEBRA_IF_GRE: + gre_info = dplane_ctx_get_ifp_gre_info(ctx); + zebra_l2_greif_add_update(ifp, gre_info, add); + if (link_nsid != NS_UNKNOWN && gre_info->ifindex_link) + zebra_if_update_link(ifp, gre_info->ifindex_link, + link_nsid); + break; + case ZEBRA_IF_OTHER: + case ZEBRA_IF_VRF: + case ZEBRA_IF_MACVLAN: + case ZEBRA_IF_VETH: + case ZEBRA_IF_BOND: + break; + } +} + +static bool is_if_protodown_reason_only_frr(uint32_t rc_bitfield) +{ + uint8_t frr_protodown_r_bit = if_netlink_get_frr_protodown_r_bit(); + + return (rc_bitfield == (((uint32_t)1) << frr_protodown_r_bit)); +} + +static void interface_if_protodown(struct interface *ifp, bool protodown, + uint32_t rc_bitfield) +{ + struct zebra_if *zif = ifp->info; + bool old_protodown; + + /* + * Set our reason code to note it wasn't us. + * If the reason we got from the kernel is ONLY frr though, don't + * set it. + */ + COND_FLAG(zif->protodown_rc, ZEBRA_PROTODOWN_EXTERNAL, + protodown && rc_bitfield && + !is_if_protodown_reason_only_frr(rc_bitfield)); + + + old_protodown = !!ZEBRA_IF_IS_PROTODOWN(zif); + if (protodown == old_protodown) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("interface %s dplane change, protodown %s", + ifp->name, protodown ? "on" : "off"); + + /* Set protodown, respectively */ + COND_FLAG(zif->flags, ZIF_FLAG_PROTODOWN, protodown); + + if (zebra_evpn_is_es_bond_member(ifp)) { + /* Check it's not already being sent to the dplane first */ + if (protodown && + CHECK_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "bond mbr %s protodown on recv'd but already sent protodown on to the dplane", + ifp->name); + return; + } + + if (!protodown && + CHECK_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "bond mbr %s protodown off recv'd but already sent protodown off to the dplane", + ifp->name); + return; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "bond mbr %s reinstate protodown %s in the dplane", + ifp->name, old_protodown ? "on" : "off"); + + if (old_protodown) + SET_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + else + SET_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); + + dplane_intf_update(zif->ifp); + } +} + +static void if_sweep_protodown(struct zebra_if *zif) +{ + bool protodown; + + protodown = !!ZEBRA_IF_IS_PROTODOWN(zif); + + if (!protodown) + return; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s sweeping protodown %s reason 0x%x", + zif->ifp->name, protodown ? "on" : "off", + zif->protodown_rc); + + /* Only clear our reason codes, leave external if it was set */ + UNSET_FLAG(zif->protodown_rc, ZEBRA_PROTODOWN_ALL); + dplane_intf_update(zif->ifp); +} + +static void +interface_bridge_vxlan_vlan_vni_map_update(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + const struct zebra_vxlan_vni_array *vniarray = + dplane_ctx_get_ifp_vxlan_vni_array(ctx); + struct zebra_vxlan_vni vni_start, vni_end; + struct hash *vni_table = NULL; + struct zebra_vxlan_vni vni, *vnip; + vni_t vni_id; + vlanid_t vid; + int i; + + memset(&vni_start, 0, sizeof(vni_start)); + memset(&vni_end, 0, sizeof(vni_end)); + + for (i = 0; i < vniarray->count; i++) { + uint16_t flags = vniarray->vnis[i].flags; + + if (flags & DPLANE_BRIDGE_VLAN_INFO_RANGE_BEGIN) { + vni_start = vniarray->vnis[i]; + continue; + } + + if (flags & DPLANE_BRIDGE_VLAN_INFO_RANGE_END) + vni_end = vniarray->vnis[i]; + + if (!(flags & DPLANE_BRIDGE_VLAN_INFO_RANGE_END)) { + vni_start = vniarray->vnis[i]; + vni_end = vniarray->vnis[i]; + } + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug( + "Vlan-Vni(%d:%d-%d:%d) update for VxLAN IF %s(%u)", + vni_start.access_vlan, vni_end.access_vlan, + vni_start.vni, vni_end.vni, ifp->name, + ifp->ifindex); + + if (!vni_table) { + vni_table = zebra_vxlan_vni_table_create(); + if (!vni_table) + return; + } + + for (vid = vni_start.access_vlan, vni_id = vni_start.vni; + vid <= vni_end.access_vlan; vid++, vni_id++) { + + memset(&vni, 0, sizeof(vni)); + vni.vni = vni_id; + vni.access_vlan = vid; + vnip = hash_get(vni_table, &vni, zebra_vxlan_vni_alloc); + if (!vnip) + return; + } + + memset(&vni_start, 0, sizeof(vni_start)); + memset(&vni_end, 0, sizeof(vni_end)); + } + + if (vni_table) + zebra_vxlan_if_vni_table_add_update(ifp, vni_table); +} + +static void interface_bridge_vxlan_update(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + const struct zebra_dplane_bridge_vlan_info *bvinfo; + + if (dplane_ctx_get_ifp_no_afspec(ctx)) + return; + + if (IS_ZEBRA_VXLAN_IF_SVD(zif)) + interface_bridge_vxlan_vlan_vni_map_update(ctx, ifp); + + if (dplane_ctx_get_ifp_no_bridge_vlan_info(ctx)) + return; + + bvinfo = dplane_ctx_get_ifp_bridge_vlan_info(ctx); + + if (!(bvinfo->flags & DPLANE_BRIDGE_VLAN_INFO_PVID)) + return; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Access VLAN %u for VxLAN IF %s(%u)", bvinfo->vid, + ifp->name, ifp->ifindex); + + zebra_l2_vxlanif_update_access_vlan(ifp, bvinfo->vid); +} + +static void interface_bridge_vlan_update(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + const struct zebra_dplane_bridge_vlan_info_array *bvarray; + struct zebra_dplane_bridge_vlan_info bvinfo; + bitfield_t old_vlan_bitmap; + uint16_t vid_range_start = 0; + int32_t i; + + /* cache the old bitmap addrs */ + old_vlan_bitmap = zif->vlan_bitmap; + /* create a new bitmap space for re-eval */ + bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX); + + /* Could we have multiple bridge vlan infos? */ + bvarray = dplane_ctx_get_ifp_bridge_vlan_info_array(ctx); + if (!bvarray) + return; + + for (i = 0; i < bvarray->count; i++) { + bvinfo = bvarray->array[i]; + + if (bvinfo.flags & DPLANE_BRIDGE_VLAN_INFO_RANGE_BEGIN) { + vid_range_start = bvinfo.vid; + continue; + } + + if (!(bvinfo.flags & DPLANE_BRIDGE_VLAN_INFO_RANGE_END)) + vid_range_start = bvinfo.vid; + + zebra_vlan_bitmap_compute(ifp, vid_range_start, bvinfo.vid); + } + + zebra_vlan_mbr_re_eval(ifp, old_vlan_bitmap); + bf_free(old_vlan_bitmap); +} + +static void interface_bridge_handling(struct zebra_dplane_ctx *ctx, + struct interface *ifp, + enum zebra_iftype zif_type) +{ + struct zebra_if *zif; + + if (!ifp) { + zlog_warn("Cannot find bridge if %s(%u)", + dplane_ctx_get_ifname(ctx), + dplane_ctx_get_ifindex(ctx)); + return; + } + + if (IS_ZEBRA_IF_VXLAN(ifp)) + return interface_bridge_vxlan_update(ctx, ifp); + + /* + * build vlan bitmap associated with this interface if that + * device type is interested in the vlans + */ + zif = ifp->info; + if (bf_is_inited(zif->vlan_bitmap)) + interface_bridge_vlan_update(ctx, ifp); +} + +static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op = dplane_ctx_get_op(ctx); + const char *name = dplane_ctx_get_ifname(ctx); + ns_id_t ns_id = dplane_ctx_get_ns_id(ctx); + ifindex_t ifindex = dplane_ctx_get_ifindex(ctx); + ifindex_t bond_ifindex = dplane_ctx_get_ifp_bond_ifindex(ctx); + uint32_t tableid = dplane_ctx_get_ifp_table_id(ctx); + enum zebra_iftype zif_type = dplane_ctx_get_ifp_zif_type(ctx); + struct interface *ifp; + struct zebra_ns *zns; + + zns = zebra_ns_lookup(ns_id); + if (!zns) { + zlog_err("Where is our namespace?"); + return; + } + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s for %s(%u)", dplane_op2str(op), name, ifindex); + + ifp = if_lookup_by_name_per_ns(zns, name); + if (op == DPLANE_OP_INTF_DELETE) { + /* Delete interface notification from kernel */ + if (ifp == NULL) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "Delete LINK received for unknown interface %s(%u)", + name, ifindex); + return; + } + + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, false); + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, false); + /* Special handling for bridge or VxLAN interfaces. */ + if (IS_ZEBRA_IF_BRIDGE(ifp)) + zebra_l2_bridge_del(ifp); + else if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_l2_vxlanif_del(ifp); + + if_delete_update(&ifp); + + if (zif_type == ZEBRA_IF_VRF && !vrf_is_backend_netns()) + interface_vrf_change(op, ifindex, name, tableid, ns_id); + } else { + ifindex_t master_ifindex, bridge_ifindex, bond_ifindex, + link_ifindex; + enum zebra_slave_iftype zif_slave_type; + uint8_t bypass; + uint64_t flags; + vrf_id_t vrf_id; + uint32_t mtu; + ns_id_t link_nsid; + struct zebra_if *zif; + bool protodown, protodown_set, startup; + uint32_t rc_bitfield; + uint8_t old_hw_addr[INTERFACE_HWADDR_MAX]; + char *desc; + uint8_t family; + + /* If VRF, create or update the VRF structure itself. */ + if (zif_type == ZEBRA_IF_VRF && !vrf_is_backend_netns()) + interface_vrf_change(op, ifindex, name, tableid, ns_id); + + master_ifindex = dplane_ctx_get_ifp_master_ifindex(ctx); + zif_slave_type = dplane_ctx_get_ifp_zif_slave_type(ctx); + bridge_ifindex = dplane_ctx_get_ifp_bridge_ifindex(ctx); + bond_ifindex = dplane_ctx_get_ifp_bond_ifindex(ctx); + bypass = dplane_ctx_get_ifp_bypass(ctx); + flags = dplane_ctx_get_ifp_flags(ctx); + vrf_id = dplane_ctx_get_ifp_vrf_id(ctx); + mtu = dplane_ctx_get_ifp_mtu(ctx); + link_ifindex = dplane_ctx_get_ifp_link_ifindex(ctx); + link_nsid = dplane_ctx_get_ifp_link_nsid(ctx); + protodown_set = dplane_ctx_get_ifp_protodown_set(ctx); + protodown = dplane_ctx_get_ifp_protodown(ctx); + rc_bitfield = dplane_ctx_get_ifp_rc_bitfield(ctx); + startup = dplane_ctx_get_ifp_startup(ctx); + desc = dplane_ctx_get_ifp_desc(ctx); + family = dplane_ctx_get_ifp_family(ctx); + +#ifndef AF_BRIDGE + /* + * Work around to make free bsd happy at the moment + */ +#define AF_BRIDGE 7 +#endif + if (family == AF_BRIDGE) + return interface_bridge_handling(ctx, ifp, zif_type); + + if (ifp == NULL || + !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + /* Add interface notification from kernel */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK ADD for %s(%u) vrf_id %u type %d sl_type %d master %u", + name, ifindex, vrf_id, zif_type, + zif_slave_type, master_ifindex); + + if (ifp == NULL) { + /* unknown interface */ + ifp = if_get_by_name(name, vrf_id, NULL); + } else { + /* pre-configured interface, learnt now */ + if (ifp->vrf->vrf_id != vrf_id) + if_update_to_new_vrf(ifp, vrf_id); + } + + zif = ifp->info; + + /* Update interface information. */ + set_ifindex(ifp, ifindex, zns); + ifp->flags = flags; + ifp->mtu6 = ifp->mtu = mtu; + ifp->metric = 0; + ifp->speed = kernel_get_speed(ifp, NULL); + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + ifp->txqlen = dplane_ctx_get_intf_txqlen(ctx); + + /* Set interface type */ + zebra_if_set_ziftype(ifp, zif_type, zif_slave_type); + if (IS_ZEBRA_IF_VRF(ifp)) + SET_FLAG(ifp->status, + ZEBRA_INTERFACE_VRF_LOOPBACK); + + /* Update link. */ + zebra_if_update_link(ifp, link_ifindex, link_nsid); + + ifp->ll_type = dplane_ctx_get_ifp_zltype(ctx); + interface_update_hw_addr(ctx, ifp); + + /* Inform clients, install any configured addresses. */ + if_add_update(ifp); + + /* + * Extract and save L2 interface information, take + * additional actions. + */ + interface_update_l2info(ctx, ifp, zif_type, 1, + link_nsid); + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, true); + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) + zebra_l2if_update_bridge_slave( + ifp, bridge_ifindex, ns_id, + ZEBRA_BRIDGE_NO_ACTION); + else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, + !!bypass); + + if (protodown_set) { + interface_if_protodown(ifp, protodown, + rc_bitfield); + if (startup) + if_sweep_protodown(zif); + } + + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK ADD for %s(%u), vlan-aware %d", + name, ifp->ifindex, + IS_ZEBRA_IF_BRIDGE_VLAN_AWARE( + zif)); + } + } else if (ifp->vrf->vrf_id != vrf_id) { + /* VRF change for an interface. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK vrf-change for %s(%u) vrf_id %u -> %u", + name, ifp->ifindex, ifp->vrf->vrf_id, + vrf_id); + + if_handle_vrf_change(ifp, vrf_id); + } else { + bool was_bridge_slave, was_bond_slave; + uint8_t chgflags = ZEBRA_BRIDGE_NO_ACTION; + + zif = ifp->info; + + /* Interface update. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK update for %s(%u) sl_type %d master %u", + name, ifp->ifindex, zif_slave_type, + master_ifindex); + + set_ifindex(ifp, ifindex, zns); + ifp->mtu6 = ifp->mtu = mtu; + ifp->metric = 0; + ifp->txqlen = dplane_ctx_get_intf_txqlen(ctx); + + /* + * Update interface type - NOTE: Only slave_type can + * change. + */ + was_bridge_slave = IS_ZEBRA_IF_BRIDGE_SLAVE(ifp); + was_bond_slave = IS_ZEBRA_IF_BOND_SLAVE(ifp); + zebra_if_set_ziftype(ifp, zif_type, zif_slave_type); + + memcpy(old_hw_addr, ifp->hw_addr, INTERFACE_HWADDR_MAX); + + /* Update link. */ + zebra_if_update_link(ifp, link_ifindex, link_nsid); + + ifp->ll_type = dplane_ctx_get_ifp_zltype(ctx); + interface_update_hw_addr(ctx, ifp); + + if (protodown_set) + interface_if_protodown(ifp, protodown, + rc_bitfield); + + if (if_is_no_ptm_operative(ifp)) { + bool is_up = if_is_operative(ifp); + + ifp->flags = flags; + if (!if_is_no_ptm_operative(ifp) || + CHECK_FLAG(zif->flags, + ZIF_FLAG_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) has gone DOWN", + name, ifp->ifindex); + if_down(ifp); + rib_update(RIB_UPDATE_KERNEL); + } else if (if_is_operative(ifp)) { + bool mac_updated = false; + + /* + * Must notify client daemons of new + * interface status. + */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) PTM up, notifying clients", + name, ifp->ifindex); + if_up(ifp, !is_up); + + /* + * Update EVPN VNI when SVI MAC change + */ + if (memcmp(old_hw_addr, ifp->hw_addr, + INTERFACE_HWADDR_MAX)) + mac_updated = true; + if (IS_ZEBRA_IF_VLAN(ifp) && + mac_updated) { + struct interface *link_if; + + link_if = if_lookup_by_index_per_ns( + zebra_ns_lookup( + NS_DEFAULT), + link_ifindex); + if (link_if) + zebra_vxlan_svi_up( + ifp, link_if); + } else if (mac_updated && + IS_ZEBRA_IF_BRIDGE(ifp)) { + zlog_debug( + "Intf %s(%u) bridge changed MAC address", + name, ifp->ifindex); + chgflags = + ZEBRA_BRIDGE_MASTER_MAC_CHANGE; + } + } + } else { + ifp->flags = flags; + if (if_is_operative(ifp) && + !CHECK_FLAG(zif->flags, + ZIF_FLAG_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) has come UP", + name, ifp->ifindex); + if_up(ifp, true); + if (IS_ZEBRA_IF_BRIDGE(ifp)) + chgflags = + ZEBRA_BRIDGE_MASTER_UP; + } else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) has gone DOWN", + name, ifp->ifindex); + if_down(ifp); + rib_update(RIB_UPDATE_KERNEL); + } + } + + /* + * Extract and save L2 interface information, take + * additional actions. + */ + interface_update_l2info(ctx, ifp, zif_type, 0, + link_nsid); + if (IS_ZEBRA_IF_BRIDGE(ifp)) + zebra_l2if_update_bridge(ifp, chgflags); + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, true); + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || was_bridge_slave) + zebra_l2if_update_bridge_slave( + ifp, bridge_ifindex, ns_id, chgflags); + else if (IS_ZEBRA_IF_BOND_SLAVE(ifp) || was_bond_slave) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, + !!bypass); + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK update for %s(%u), vlan-aware %d", + name, ifp->ifindex, + IS_ZEBRA_IF_BRIDGE_VLAN_AWARE( + zif)); + } + } + + zif = ifp->info; + if (zif) { + XFREE(MTYPE_ZIF_DESC, zif->desc); + if (desc[0]) + zif->desc = XSTRDUP(MTYPE_ZIF_DESC, desc); + } + } +} + +void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx) +{ + struct zebra_ns *zns; + struct interface *ifp; + ns_id_t ns_id; + enum dplane_op_e op; + enum zebra_dplane_result dp_res; + ifindex_t ifindex; + + ns_id = dplane_ctx_get_ns_id(ctx); + dp_res = dplane_ctx_get_status(ctx); + op = dplane_ctx_get_op(ctx); + ifindex = dplane_ctx_get_ifindex(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Intf dplane ctx %p, op %s, ifindex (%u), result %s", + ctx, dplane_op2str(op), ifindex, + dplane_res2str(dp_res)); + + zns = zebra_ns_lookup(ns_id); + if (zns == NULL) { + /* No ns - deleted maybe? */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: can't find zns id %u", __func__, ns_id); + + return; + } + + ifp = if_lookup_by_index_per_ns(zns, ifindex); + + switch (op) { + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + zebra_if_addr_update_ctx(ctx, ifp); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + /* + * Queued from the dplane means it is something + * that we need to handle( create/delete the + * interface as needed ) + */ + if (dp_res == ZEBRA_DPLANE_REQUEST_QUEUED) + zebra_if_dplane_ifp_handling(ctx); + else + zebra_if_update_ctx(ctx, ifp); + break; + + case DPLANE_OP_INTF_NETCONFIG: + zebra_if_netconf_update_ctx(ctx, ifp, ifindex); + break; + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_NONE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + case DPLANE_OP_STARTUP_STAGE: + break; /* should never hit here */ + } +} + +/* Dump if address information to vty. */ +static void connected_dump_vty(struct vty *vty, json_object *json, + struct connected *connected) +{ + struct prefix *p; + json_object *json_addr = NULL; + + /* Print interface address. */ + p = connected->address; + + if (json) { + json_addr = json_object_new_object(); + json_object_array_add(json, json_addr); + json_object_string_addf(json_addr, "address", "%pFX", p); + } else { + vty_out(vty, " %s %pFX", prefix_family_str(p), p); + } + + /* If there is destination address, print it. */ + if (CONNECTED_PEER(connected) && connected->destination) { + if (json) { + json_object_string_addf(json_addr, "peer", "%pFX", + connected->destination); + } else { + vty_out(vty, " peer %pFX", connected->destination); + } + } + + if (json) + json_object_boolean_add( + json_addr, "secondary", + CHECK_FLAG(connected->flags, ZEBRA_IFA_SECONDARY)); + else if (CHECK_FLAG(connected->flags, ZEBRA_IFA_SECONDARY)) + vty_out(vty, " secondary"); + + if (json) + json_object_boolean_add( + json_addr, "unnumbered", + CHECK_FLAG(connected->flags, ZEBRA_IFA_UNNUMBERED)); + else if (CHECK_FLAG(connected->flags, ZEBRA_IFA_UNNUMBERED)) + vty_out(vty, " unnumbered"); + + if (connected->label) { + if (json) + json_object_string_add(json_addr, "label", + connected->label); + else + vty_out(vty, " %s", connected->label); + } + + if (!json) + vty_out(vty, "\n"); +} + +/* Dump interface neighbor address information to vty. */ +static void nbr_connected_dump_vty(struct vty *vty, json_object *json, + struct nbr_connected *connected) +{ + struct prefix *p; + char buf[PREFIX2STR_BUFFER]; + + /* Print interface address. */ + p = connected->address; + if (json) + json_array_string_add(json, prefix2str(p, buf, sizeof(buf))); + else + vty_out(vty, " %s %pFX\n", prefix_family_str(p), p); +} + +static const char * +zebra_zifslavetype_2str(enum zebra_slave_iftype zif_slave_type) +{ + switch (zif_slave_type) { + case ZEBRA_IF_SLAVE_BRIDGE: + return "Bridge"; + case ZEBRA_IF_SLAVE_VRF: + return "Vrf"; + case ZEBRA_IF_SLAVE_BOND: + return "Bond"; + case ZEBRA_IF_SLAVE_OTHER: + return "Other"; + case ZEBRA_IF_SLAVE_NONE: + return "None"; + } + return "None"; +} + +static const char *zebra_ziftype_2str(enum zebra_iftype zif_type) +{ + switch (zif_type) { + case ZEBRA_IF_OTHER: + return "Other"; + + case ZEBRA_IF_BRIDGE: + return "Bridge"; + + case ZEBRA_IF_VLAN: + return "Vlan"; + + case ZEBRA_IF_VXLAN: + return "Vxlan"; + + case ZEBRA_IF_VRF: + return "VRF"; + + case ZEBRA_IF_VETH: + return "VETH"; + + case ZEBRA_IF_BOND: + return "bond"; + + case ZEBRA_IF_MACVLAN: + return "macvlan"; + + case ZEBRA_IF_GRE: + return "GRE"; + + default: + return "Unknown"; + } +} + +/* Interface's brief information print out to vty interface. */ +static void ifs_dump_brief_vty(struct vty *vty, struct vrf *vrf) +{ + struct connected *connected; + struct listnode *node; + struct route_node *rn; + struct zebra_if *zebra_if; + struct prefix *p; + struct interface *ifp; + bool print_header = true; + + FOR_ALL_INTERFACES (vrf, ifp) { + bool first_pfx_printed = false; + + if (print_header) { + vty_out(vty, "%-16s%-8s%-16s%s\n", "Interface", + "Status", "VRF", "Addresses"); + vty_out(vty, "%-16s%-8s%-16s%s\n", "---------", + "------", "---", "---------"); + print_header = false; /* We have at least 1 iface */ + } + zebra_if = ifp->info; + + vty_out(vty, "%-16s", ifp->name); + + if (if_is_up(ifp)) + vty_out(vty, "%-8s", "up"); + else + vty_out(vty, "%-8s", "down"); + + vty_out(vty, "%-16s", vrf->name); + + for (rn = route_top(zebra_if->ipv4_subnets); rn; + rn = route_next(rn)) { + if (!rn->info) + continue; + uint32_t list_size = listcount((struct list *)rn->info); + + for (ALL_LIST_ELEMENTS_RO((struct list *)rn->info, node, + connected)) { + if (!CHECK_FLAG(connected->flags, + ZEBRA_IFA_SECONDARY)) { + p = connected->address; + if (first_pfx_printed) { + /* padding to prepare row only + * for ip addr */ + vty_out(vty, "%-40s", ""); + if (list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } else { + if (list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } + first_pfx_printed = true; + break; + } + } + } + + uint32_t v6_list_size = 0; + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + v6_list_size++; + } + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && !CHECK_FLAG(connected->flags, + ZEBRA_IFA_SECONDARY) + && (connected->address->family == AF_INET6)) { + p = connected->address; + /* Don't print link local pfx */ + if (!IN6_IS_ADDR_LINKLOCAL(&p->u.prefix6)) { + if (first_pfx_printed) { + /* padding to prepare row only + * for ip addr */ + vty_out(vty, "%-40s", ""); + if (v6_list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } else { + if (v6_list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } + first_pfx_printed = true; + break; + } + } + } + if (!first_pfx_printed) + vty_out(vty, "\n"); + } + vty_out(vty, "\n"); +} + +static void ifs_dump_brief_vty_json(json_object *json, struct vrf *vrf) +{ + struct connected *connected; + struct listnode *node; + struct interface *ifp; + + FOR_ALL_INTERFACES (vrf, ifp) { + json_object *json_if; + json_object *json_addrs; + + json_if = json_object_new_object(); + json_object_object_add(json, ifp->name, json_if); + + json_object_string_add(json_if, "status", + if_is_up(ifp) ? "up" : "down"); + json_object_string_add(json_if, "vrfName", vrf->name); + + json_addrs = json_object_new_array(); + json_object_object_add(json_if, "addresses", json_addrs); + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && !CHECK_FLAG(connected->flags, + ZEBRA_IFA_SECONDARY) + && !(connected->address->family == AF_INET6 + && IN6_IS_ADDR_LINKLOCAL( + &connected->address->u.prefix6))) { + char buf[PREFIX2STR_BUFFER]; + + json_array_string_add( + json_addrs, + prefix2str(connected->address, buf, + sizeof(buf))); + } + } + } +} + +const char *zebra_protodown_rc_str(uint32_t protodown_rc, char *pd_buf, + uint32_t pd_buf_len) +{ + pd_buf[0] = '\0'; + size_t len; + + strlcat(pd_buf, "(", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_EXTERNAL)) + strlcat(pd_buf, "external,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY)) + strlcat(pd_buf, "startup-delay,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN)) + strlcat(pd_buf, "uplinks-down,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_VRRP)) + strlcat(pd_buf, "vrrp,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_SHARP)) + strlcat(pd_buf, "sharp,", pd_buf_len); + + len = strnlen(pd_buf, pd_buf_len); + + /* Remove trailing comma */ + if (pd_buf[len - 1] == ',') + pd_buf[len - 1] = '\0'; + + strlcat(pd_buf, ")", pd_buf_len); + + return pd_buf; +} + +static inline bool if_is_protodown_applicable(struct interface *ifp) +{ + if (IS_ZEBRA_IF_BOND(ifp)) + return false; + + return true; +} + +static void zebra_vxlan_if_vni_dump_vty(struct vty *vty, + struct zebra_vxlan_vni *vni) +{ + char str[INET6_ADDRSTRLEN]; + + vty_out(vty, " VxLAN Id %u", vni->vni); + if (vni->access_vlan) + vty_out(vty, " Access VLAN Id %u\n", vni->access_vlan); + + if (vni->mcast_grp.s_addr != INADDR_ANY) + vty_out(vty, " Mcast Group %s", + inet_ntop(AF_INET, &vni->mcast_grp, str, sizeof(str))); +} + +static void zebra_vxlan_if_vni_hash_dump_vty(struct hash_bucket *bucket, + void *ctxt) +{ + struct vty *vty; + struct zebra_vxlan_vni *vni; + + vni = (struct zebra_vxlan_vni *)bucket->data; + vty = (struct vty *)ctxt; + + zebra_vxlan_if_vni_dump_vty(vty, vni); +} + +static void zebra_vxlan_if_dump_vty(struct vty *vty, struct zebra_if *zebra_if) +{ + struct zebra_l2info_vxlan *vxlan_info; + struct zebra_vxlan_vni_info *vni_info; + + vxlan_info = &zebra_if->l2info.vxl; + vni_info = &vxlan_info->vni_info; + + if (vxlan_info->vtep_ip.s_addr != INADDR_ANY) + vty_out(vty, " VTEP IP: %pI4", &vxlan_info->vtep_ip); + + if (vxlan_info->ifindex_link && (vxlan_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(vxlan_info->link_nsid), + vxlan_info->ifindex_link); + vty_out(vty, " Link Interface %s", + ifp == NULL ? "Unknown" : ifp->name); + } + + if (IS_ZEBRA_VXLAN_IF_VNI(zebra_if)) { + zebra_vxlan_if_vni_dump_vty(vty, &vni_info->vni); + } else { + hash_iterate(vni_info->vni_table, + zebra_vxlan_if_vni_hash_dump_vty, vty); + } + + vty_out(vty, "\n"); +} + +/* Interface's information print out to vty interface. */ +static void if_dump_vty(struct vty *vty, struct interface *ifp) +{ + struct connected *connected; + struct nbr_connected *nbr_connected; + struct listnode *node; + struct route_node *rn; + struct zebra_if *zebra_if; + char pd_buf[ZEBRA_PROTODOWN_RC_STR_LEN]; + + zebra_if = ifp->info; + + vty_out(vty, "Interface %s is ", ifp->name); + if (if_is_up(ifp)) { + vty_out(vty, "up, line protocol "); + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION)) { + if (if_is_running(ifp)) + vty_out(vty, "is up\n"); + else + vty_out(vty, "is down\n"); + } else { + vty_out(vty, "detection is disabled\n"); + } + } else { + vty_out(vty, "down\n"); + } + + vty_out(vty, " Link ups: %5u last: %s\n", zebra_if->up_count, + zebra_if->up_last[0] ? zebra_if->up_last : "(never)"); + vty_out(vty, " Link downs: %5u last: %s\n", zebra_if->down_count, + zebra_if->down_last[0] ? zebra_if->down_last : "(never)"); + + zebra_ptm_show_status(vty, NULL, ifp); + + vty_out(vty, " vrf: %s\n", ifp->vrf->name); + + if (ifp->desc) + vty_out(vty, " Description: %s\n", ifp->desc); + if (zebra_if->desc) + vty_out(vty, " OS Description: %s\n", zebra_if->desc); + + if (ifp->ifindex == IFINDEX_INTERNAL) { + vty_out(vty, " pseudo interface\n"); + return; + } else if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + vty_out(vty, " index %d inactive interface\n", ifp->ifindex); + return; + } + + vty_out(vty, " index %d metric %d mtu %d speed %u txqlen %u", + ifp->ifindex, ifp->metric, ifp->mtu, ifp->speed, ifp->txqlen); + if (ifp->mtu6 != ifp->mtu) + vty_out(vty, "mtu6 %d ", ifp->mtu6); + vty_out(vty, "\n flags: %s\n", if_flag_dump(ifp->flags)); + + if (zebra_if->mpls) + vty_out(vty, " MPLS enabled\n"); + + if (zebra_if->linkdown) + vty_out(vty, " Ignore all v4 routes with linkdown\n"); + if (zebra_if->linkdownv6) + vty_out(vty, " Ignore all v6 routes with linkdown\n"); + + if (zebra_if->v4mcast_on) + vty_out(vty, " v4 Multicast forwarding is on\n"); + if (zebra_if->v6mcast_on) + vty_out(vty, " v6 Multicast forwarding is on\n"); + + /* Hardware address. */ + vty_out(vty, " Type: %s\n", if_link_type_str(ifp->ll_type)); + if (ifp->hw_addr_len != 0) { + int i; + + vty_out(vty, " HWaddr: "); + for (i = 0; i < ifp->hw_addr_len; i++) + vty_out(vty, "%s%02x", i == 0 ? "" : ":", + ifp->hw_addr[i]); + vty_out(vty, "\n"); + } + + /* Bandwidth in Mbps */ + if (ifp->bandwidth != 0) { + vty_out(vty, " bandwidth %u Mbps", ifp->bandwidth); + vty_out(vty, "\n"); + } + + for (rn = route_top(zebra_if->ipv4_subnets); rn; rn = route_next(rn)) { + if (!rn->info) + continue; + + for (ALL_LIST_ELEMENTS_RO((struct list *)rn->info, node, + connected)) + connected_dump_vty(vty, NULL, connected); + } + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + connected_dump_vty(vty, NULL, connected); + } + + vty_out(vty, " Interface Type %s\n", + zebra_ziftype_2str(zebra_if->zif_type)); + vty_out(vty, " Interface Slave Type %s\n", + zebra_zifslavetype_2str(zebra_if->zif_slave_type)); + + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + vty_out(vty, " Bridge VLAN-aware: %s\n", + IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zebra_if) ? "yes" : "no"); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + struct zebra_l2info_vlan *vlan_info; + + vlan_info = &zebra_if->l2info.vl; + vty_out(vty, " VLAN Id %u\n", vlan_info->vid); + } else if (IS_ZEBRA_IF_VXLAN(ifp)) { + zebra_vxlan_if_dump_vty(vty, zebra_if); + } else if (IS_ZEBRA_IF_GRE(ifp)) { + struct zebra_l2info_gre *gre_info; + + gre_info = &zebra_if->l2info.gre; + if (gre_info->vtep_ip.s_addr != INADDR_ANY) { + vty_out(vty, " VTEP IP: %pI4", &gre_info->vtep_ip); + if (gre_info->vtep_ip_remote.s_addr != INADDR_ANY) + vty_out(vty, " , remote %pI4", + &gre_info->vtep_ip_remote); + vty_out(vty, "\n"); + } + if (gre_info->ifindex_link && + (gre_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(gre_info->link_nsid), + gre_info->ifindex_link); + vty_out(vty, " Link Interface %s\n", + ifp == NULL ? "Unknown" : + ifp->name); + } + } + + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) { + struct zebra_l2info_brslave *br_slave; + + br_slave = &zebra_if->brslave_info; + if (br_slave->bridge_ifindex != IFINDEX_INTERNAL) { + if (br_slave->br_if) + vty_out(vty, " Master interface: %s\n", + br_slave->br_if->name); + else + vty_out(vty, " Master ifindex: %u\n", + br_slave->bridge_ifindex); + } + } + + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) { + struct zebra_l2info_bondslave *bond_slave; + + bond_slave = &zebra_if->bondslave_info; + if (bond_slave->bond_ifindex != IFINDEX_INTERNAL) { + if (bond_slave->bond_if) + vty_out(vty, " Master interface: %s\n", + bond_slave->bond_if->name); + else + vty_out(vty, " Master ifindex: %u\n", + bond_slave->bond_ifindex); + } + } + + if (zebra_if->flags & ZIF_FLAG_LACP_BYPASS) + vty_out(vty, " LACP bypass: on\n"); + + zebra_evpn_if_es_print(vty, NULL, zebra_if); + vty_out(vty, " protodown: %s %s\n", + (ZEBRA_IF_IS_PROTODOWN(zebra_if)) ? "on" : "off", + if_is_protodown_applicable(ifp) ? "" : "(n/a)"); + if (zebra_if->protodown_rc) + vty_out(vty, " protodown reasons: %s\n", + zebra_protodown_rc_str(zebra_if->protodown_rc, pd_buf, + sizeof(pd_buf))); + + if (zebra_if->link_ifindex != IFINDEX_INTERNAL) { + if (zebra_if->link) + vty_out(vty, " Parent interface: %s\n", zebra_if->link->name); + else + vty_out(vty, " Parent ifindex: %d\n", zebra_if->link_ifindex); + } + + if (HAS_LINK_PARAMS(ifp)) { + int i; + struct if_link_params *iflp = ifp->link_params; + vty_out(vty, " Traffic Engineering Link Parameters:\n"); + if (IS_PARAM_SET(iflp, LP_TE_METRIC)) + vty_out(vty, " TE metric %u\n", iflp->te_metric); + if (IS_PARAM_SET(iflp, LP_MAX_BW)) + vty_out(vty, " Maximum Bandwidth %g (Byte/s)\n", + iflp->max_bw); + if (IS_PARAM_SET(iflp, LP_MAX_RSV_BW)) + vty_out(vty, + " Maximum Reservable Bandwidth %g (Byte/s)\n", + iflp->max_rsv_bw); + if (IS_PARAM_SET(iflp, LP_UNRSV_BW)) { + vty_out(vty, + " Unreserved Bandwidth per Class Type in Byte/s:\n"); + for (i = 0; i < MAX_CLASS_TYPE; i += 2) + vty_out(vty, + " [%d]: %g (Bytes/sec),\t[%d]: %g (Bytes/sec)\n", + i, iflp->unrsv_bw[i], i + 1, + iflp->unrsv_bw[i + 1]); + } + + if (IS_PARAM_SET(iflp, LP_ADM_GRP)) + vty_out(vty, " Administrative Group:%u\n", + iflp->admin_grp); + if (IS_PARAM_SET(iflp, LP_DELAY)) { + vty_out(vty, " Link Delay Average: %u (micro-sec.)", + iflp->av_delay); + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + vty_out(vty, " Min: %u (micro-sec.)", + iflp->min_delay); + vty_out(vty, " Max: %u (micro-sec.)", + iflp->max_delay); + } + vty_out(vty, "\n"); + } + if (IS_PARAM_SET(iflp, LP_DELAY_VAR)) + vty_out(vty, + " Link Delay Variation %u (micro-sec.)\n", + iflp->delay_var); + if (IS_PARAM_SET(iflp, LP_PKT_LOSS)) + vty_out(vty, " Link Packet Loss %g (in %%)\n", + iflp->pkt_loss); + if (IS_PARAM_SET(iflp, LP_AVA_BW)) + vty_out(vty, " Available Bandwidth %g (Byte/s)\n", + iflp->ava_bw); + if (IS_PARAM_SET(iflp, LP_RES_BW)) + vty_out(vty, " Residual Bandwidth %g (Byte/s)\n", + iflp->res_bw); + if (IS_PARAM_SET(iflp, LP_USE_BW)) + vty_out(vty, " Utilized Bandwidth %g (Byte/s)\n", + iflp->use_bw); + if (IS_PARAM_SET(iflp, LP_RMT_AS)) + vty_out(vty, " Neighbor ASBR IP: %pI4 AS: %u \n", + &iflp->rmt_ip, iflp->rmt_as); + } + + hook_call(zebra_if_extra_info, vty, ifp); + + if (listhead(ifp->nbr_connected)) + vty_out(vty, " Neighbor address(s):\n"); + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, nbr_connected)) + nbr_connected_dump_vty(vty, NULL, nbr_connected); + +#ifdef HAVE_PROC_NET_DEV + /* Statistics print out using proc file system. */ + vty_out(vty, + " %lu input packets (%lu multicast), %lu bytes, %lu dropped\n", + ifp->stats.rx_packets, ifp->stats.rx_multicast, + ifp->stats.rx_bytes, ifp->stats.rx_dropped); + + vty_out(vty, + " %lu input errors, %lu length, %lu overrun, %lu CRC, %lu frame\n", + ifp->stats.rx_errors, ifp->stats.rx_length_errors, + ifp->stats.rx_over_errors, ifp->stats.rx_crc_errors, + ifp->stats.rx_frame_errors); + + vty_out(vty, " %lu fifo, %lu missed\n", ifp->stats.rx_fifo_errors, + ifp->stats.rx_missed_errors); + + vty_out(vty, " %lu output packets, %lu bytes, %lu dropped\n", + ifp->stats.tx_packets, ifp->stats.tx_bytes, + ifp->stats.tx_dropped); + + vty_out(vty, + " %lu output errors, %lu aborted, %lu carrier, %lu fifo, %lu heartbeat\n", + ifp->stats.tx_errors, ifp->stats.tx_aborted_errors, + ifp->stats.tx_carrier_errors, ifp->stats.tx_fifo_errors, + ifp->stats.tx_heartbeat_errors); + + vty_out(vty, " %lu window, %lu collisions\n", + ifp->stats.tx_window_errors, ifp->stats.collisions); +#endif /* HAVE_PROC_NET_DEV */ + +#ifdef HAVE_NET_RT_IFLIST + /* Statistics print out using sysctl (). */ + vty_out(vty, + " input packets %llu, bytes %llu, dropped %llu, multicast packets %llu\n", + (unsigned long long)ifp->stats.ifi_ipackets, + (unsigned long long)ifp->stats.ifi_ibytes, + (unsigned long long)ifp->stats.ifi_iqdrops, + (unsigned long long)ifp->stats.ifi_imcasts); + + vty_out(vty, " input errors %llu\n", + (unsigned long long)ifp->stats.ifi_ierrors); + + vty_out(vty, + " output packets %llu, bytes %llu, multicast packets %llu\n", + (unsigned long long)ifp->stats.ifi_opackets, + (unsigned long long)ifp->stats.ifi_obytes, + (unsigned long long)ifp->stats.ifi_omcasts); + + vty_out(vty, " output errors %llu\n", + (unsigned long long)ifp->stats.ifi_oerrors); + + vty_out(vty, " collisions %llu\n", + (unsigned long long)ifp->stats.ifi_collisions); +#endif /* HAVE_NET_RT_IFLIST */ +} + +static void zebra_vxlan_if_vni_dump_vty_json(json_object *json_if, + struct zebra_vxlan_vni *vni) +{ + json_object_int_add(json_if, "vxlanId", vni->vni); + if (vni->access_vlan) + json_object_int_add(json_if, "accessVlanId", vni->access_vlan); + if (vni->mcast_grp.s_addr != INADDR_ANY) + json_object_string_addf(json_if, "mcastGroup", "%pI4", + &vni->mcast_grp); +} + +static void zebra_vxlan_if_vni_hash_dump_vty_json(struct hash_bucket *bucket, + void *ctxt) +{ + json_object *json_if; + struct zebra_vxlan_vni *vni; + + vni = (struct zebra_vxlan_vni *)bucket->data; + json_if = (json_object *)ctxt; + + zebra_vxlan_if_vni_dump_vty_json(json_if, vni); +} + +static void zebra_vxlan_if_dump_vty_json(json_object *json_if, + struct zebra_if *zebra_if) +{ + struct zebra_l2info_vxlan *vxlan_info; + struct zebra_vxlan_vni_info *vni_info; + + vxlan_info = &zebra_if->l2info.vxl; + vni_info = &vxlan_info->vni_info; + + if (vxlan_info->vtep_ip.s_addr != INADDR_ANY) + json_object_string_addf(json_if, "vtepIp", "%pI4", + &vxlan_info->vtep_ip); + + if (vxlan_info->ifindex_link && (vxlan_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(vxlan_info->link_nsid), + vxlan_info->ifindex_link); + json_object_string_add(json_if, "linkInterface", + ifp == NULL ? "Unknown" : ifp->name); + } + if (IS_ZEBRA_VXLAN_IF_VNI(zebra_if)) { + zebra_vxlan_if_vni_dump_vty_json(json_if, &vni_info->vni); + } else { + hash_iterate(vni_info->vni_table, + zebra_vxlan_if_vni_hash_dump_vty_json, json_if); + } +} + +static void if_dump_vty_json(struct vty *vty, struct interface *ifp, + json_object *json) +{ + struct connected *connected; + struct nbr_connected *nbr_connected; + struct listnode *node; + struct route_node *rn; + struct zebra_if *zebra_if; + char pd_buf[ZEBRA_PROTODOWN_RC_STR_LEN]; + char buf[BUFSIZ]; + json_object *json_if; + json_object *json_addrs; + + json_if = json_object_new_object(); + json_object_object_add(json, ifp->name, json_if); + + if (if_is_up(ifp)) { + json_object_string_add(json_if, "administrativeStatus", "up"); + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION)) { + json_object_string_add(json_if, "operationalStatus", + if_is_running(ifp) ? "up" + : "down"); + json_object_boolean_add(json_if, "linkDetection", true); + } else { + json_object_boolean_add(json_if, "linkDetection", + false); + } + } else { + json_object_string_add(json_if, "administrativeStatus", "down"); + } + + zebra_if = ifp->info; + + json_object_int_add(json_if, "linkUps", zebra_if->up_count); + json_object_int_add(json_if, "linkDowns", zebra_if->down_count); + if (zebra_if->up_last[0]) + json_object_string_add(json_if, "lastLinkUp", + zebra_if->up_last); + if (zebra_if->down_last[0]) + json_object_string_add(json_if, "lastLinkDown", + zebra_if->down_last); + + zebra_ptm_show_status(vty, json_if, ifp); + + json_object_string_add(json_if, "vrfName", ifp->vrf->name); + + if (ifp->desc) + json_object_string_add(json_if, "description", ifp->desc); + if (zebra_if->desc) + json_object_string_add(json_if, "OsDescription", + zebra_if->desc); + + json_object_boolean_add(json_if, "mplsEnabled", zebra_if->mpls); + json_object_boolean_add(json_if, "linkDown", zebra_if->linkdown); + json_object_boolean_add(json_if, "linkDownV6", zebra_if->linkdownv6); + json_object_boolean_add(json_if, "mcForwardingV4", + zebra_if->v4mcast_on); + json_object_boolean_add(json_if, "mcForwardingV6", + zebra_if->v6mcast_on); + + if (ifp->ifindex == IFINDEX_INTERNAL) { + json_object_boolean_add(json_if, "pseudoInterface", true); + return; + } else if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + json_object_int_add(json_if, "index", ifp->ifindex); + return; + } + + json_object_boolean_add(json_if, "pseudoInterface", false); + json_object_int_add(json_if, "index", ifp->ifindex); + json_object_int_add(json_if, "metric", ifp->metric); + json_object_int_add(json_if, "mtu", ifp->mtu); + if (ifp->mtu6 != ifp->mtu) + json_object_int_add(json_if, "mtu6", ifp->mtu6); + json_object_int_add(json_if, "speed", ifp->speed); + json_object_int_add(json_if, "txqlen", ifp->txqlen); + json_object_string_add(json_if, "flags", if_flag_dump(ifp->flags)); + + /* Hardware address. */ + json_object_string_add(json_if, "type", if_link_type_str(ifp->ll_type)); + if (ifp->hw_addr_len != 0) { + char hwbuf[BUFSIZ]; + + hwbuf[0] = '\0'; + for (int i = 0; i < ifp->hw_addr_len; i++) { + snprintf(buf, sizeof(buf), "%s%02x", i == 0 ? "" : ":", + ifp->hw_addr[i]); + strlcat(hwbuf, buf, sizeof(hwbuf)); + } + json_object_string_add(json_if, "hardwareAddress", hwbuf); + } + + /* Bandwidth in Mbps */ + if (ifp->bandwidth != 0) + json_object_int_add(json_if, "bandwidth", ifp->bandwidth); + + + /* IP addresses. */ + json_addrs = json_object_new_array(); + json_object_object_add(json_if, "ipAddresses", json_addrs); + + for (rn = route_top(zebra_if->ipv4_subnets); rn; rn = route_next(rn)) { + if (!rn->info) + continue; + + for (ALL_LIST_ELEMENTS_RO((struct list *)rn->info, node, + connected)) + connected_dump_vty(vty, json_addrs, connected); + } + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + connected_dump_vty(vty, json_addrs, connected); + } + + json_object_string_add(json_if, "interfaceType", + zebra_ziftype_2str(zebra_if->zif_type)); + json_object_string_add( + json_if, "interfaceSlaveType", + zebra_zifslavetype_2str(zebra_if->zif_slave_type)); + + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + struct zebra_l2info_bridge *bridge_info; + + bridge_info = &zebra_if->l2info.br; + json_object_boolean_add(json_if, "bridgeVlanAware", + bridge_info->bridge.vlan_aware); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + struct zebra_l2info_vlan *vlan_info; + + vlan_info = &zebra_if->l2info.vl; + json_object_int_add(json_if, "vlanId", vlan_info->vid); + } else if (IS_ZEBRA_IF_VXLAN(ifp)) { + zebra_vxlan_if_dump_vty_json(json_if, zebra_if); + + } else if (IS_ZEBRA_IF_GRE(ifp)) { + struct zebra_l2info_gre *gre_info; + + gre_info = &zebra_if->l2info.gre; + if (gre_info->vtep_ip.s_addr != INADDR_ANY) { + json_object_string_addf(json_if, "vtepIp", "%pI4", + &gre_info->vtep_ip); + if (gre_info->vtep_ip_remote.s_addr != INADDR_ANY) + json_object_string_addf( + json_if, "vtepRemoteIp", "%pI4", + &gre_info->vtep_ip_remote); + } + if (gre_info->ifindex_link + && (gre_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(gre_info->link_nsid), + gre_info->ifindex_link); + json_object_string_add(json_if, "linkInterface", + ifp == NULL ? "Unknown" + : ifp->name); + } + } + + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) { + struct zebra_l2info_brslave *br_slave; + + br_slave = &zebra_if->brslave_info; + if (br_slave->bridge_ifindex != IFINDEX_INTERNAL) { + if (br_slave->br_if) + json_object_string_add(json_if, + "masterInterface", + br_slave->br_if->name); + else + json_object_int_add(json_if, "masterIfindex", + br_slave->bridge_ifindex); + } + } + + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) { + struct zebra_l2info_bondslave *bond_slave; + + bond_slave = &zebra_if->bondslave_info; + if (bond_slave->bond_ifindex != IFINDEX_INTERNAL) { + if (bond_slave->bond_if) + json_object_string_add( + json_if, "masterInterface", + bond_slave->bond_if->name); + else + json_object_int_add(json_if, "masterIfindex", + bond_slave->bond_ifindex); + } + } + + json_object_boolean_add( + json_if, "lacpBypass", + CHECK_FLAG(zebra_if->flags, ZIF_FLAG_LACP_BYPASS)); + + zebra_evpn_if_es_print(vty, json_if, zebra_if); + + if (if_is_protodown_applicable(ifp)) { + json_object_string_add( + json_if, "protodown", + (ZEBRA_IF_IS_PROTODOWN(zebra_if)) ? "on" : "off"); + if (zebra_if->protodown_rc) + json_object_string_add( + json_if, "protodownReason", + zebra_protodown_rc_str(zebra_if->protodown_rc, + pd_buf, sizeof(pd_buf))); + } + + if (zebra_if->link_ifindex != IFINDEX_INTERNAL) { + if (zebra_if->link) + json_object_string_add(json_if, "parentInterface", + zebra_if->link->name); + else + json_object_int_add(json_if, "parentIfindex", + zebra_if->link_ifindex); + } + + if (HAS_LINK_PARAMS(ifp)) { + struct if_link_params *iflp = ifp->link_params; + json_object *json_te; + + json_te = json_object_new_object(); + json_object_object_add( + json_if, "trafficEngineeringLinkParameters", json_te); + + if (IS_PARAM_SET(iflp, LP_TE_METRIC)) + json_object_int_add(json_te, "teMetric", + iflp->te_metric); + if (IS_PARAM_SET(iflp, LP_MAX_BW)) + json_object_double_add(json_te, "maximumBandwidth", + iflp->max_bw); + if (IS_PARAM_SET(iflp, LP_MAX_RSV_BW)) + json_object_double_add(json_te, + "maximumReservableBandwidth", + iflp->max_rsv_bw); + if (IS_PARAM_SET(iflp, LP_UNRSV_BW)) { + json_object *json_bws; + + json_bws = json_object_new_object(); + json_object_object_add(json_te, "unreservedBandwidth", + json_bws); + for (unsigned int i = 0; i < MAX_CLASS_TYPE; ++i) { + char buf_ct[64]; + + snprintf(buf_ct, sizeof(buf_ct), "classType%u", + i); + json_object_double_add(json_bws, buf_ct, + iflp->unrsv_bw[i]); + } + } + + if (IS_PARAM_SET(iflp, LP_ADM_GRP)) + json_object_int_add(json_te, "administrativeGroup", + iflp->admin_grp); + if (IS_PARAM_SET(iflp, LP_DELAY)) { + json_object_int_add(json_te, "linkDelayAverage", + iflp->av_delay); + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + json_object_int_add(json_te, "linkDelayMinimum", + iflp->min_delay); + json_object_int_add(json_te, "linkDelayMaximum", + iflp->max_delay); + } + } + if (IS_PARAM_SET(iflp, LP_DELAY_VAR)) + json_object_int_add(json_te, "linkDelayVariation", + iflp->delay_var); + if (IS_PARAM_SET(iflp, LP_PKT_LOSS)) + json_object_double_add(json_te, "linkPacketLoss", + iflp->pkt_loss); + if (IS_PARAM_SET(iflp, LP_AVA_BW)) + json_object_double_add(json_te, "availableBandwidth", + iflp->ava_bw); + if (IS_PARAM_SET(iflp, LP_RES_BW)) + json_object_double_add(json_te, "residualBandwidth", + iflp->res_bw); + if (IS_PARAM_SET(iflp, LP_USE_BW)) + json_object_double_add(json_te, "utilizedBandwidth", + iflp->use_bw); + if (IS_PARAM_SET(iflp, LP_RMT_AS)) + json_object_string_addf(json_te, "neighborAsbrIp", + "%pI4", &iflp->rmt_ip); + json_object_int_add(json_te, "neighborAsbrAs", iflp->rmt_as); + } + + if (listhead(ifp->nbr_connected)) { + json_object *json_nbr_addrs; + + json_nbr_addrs = json_object_new_array(); + json_object_object_add(json_if, "neighborIpAddresses", + json_nbr_addrs); + + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, + nbr_connected)) + nbr_connected_dump_vty(vty, json_nbr_addrs, + nbr_connected); + } + +#ifdef HAVE_PROC_NET_DEV + json_object_int_add(json_if, "inputPackets", stats.rx_packets); + json_object_int_add(json_if, "inputBytes", ifp->stats.rx_bytes); + json_object_int_add(json_if, "inputDropped", ifp->stats.rx_dropped); + json_object_int_add(json_if, "inputMulticastPackets", + ifp->stats.rx_multicast); + json_object_int_add(json_if, "inputErrors", ifp->stats.rx_errors); + json_object_int_add(json_if, "inputLengthErrors", + ifp->stats.rx_length_errors); + json_object_int_add(json_if, "inputOverrunErrors", + ifp->stats.rx_over_errors); + json_object_int_add(json_if, "inputCrcErrors", + ifp->stats.rx_crc_errors); + json_object_int_add(json_if, "inputFrameErrors", + ifp->stats.rx_frame_errors); + json_object_int_add(json_if, "inputFifoErrors", + ifp->stats.rx_fifo_errors); + json_object_int_add(json_if, "inputMissedErrors", + ifp->stats.rx_missed_errors); + json_object_int_add(json_if, "outputPackets", ifp->stats.tx_packets); + json_object_int_add(json_if, "outputBytes", ifp->stats.tx_bytes); + json_object_int_add(json_if, "outputDroppedPackets", + ifp->stats.tx_dropped); + json_object_int_add(json_if, "outputErrors", ifp->stats.tx_errors); + json_object_int_add(json_if, "outputAbortedErrors", + ifp->stats.tx_aborted_errors); + json_object_int_add(json_if, "outputCarrierErrors", + ifp->stats.tx_carrier_errors); + json_object_int_add(json_if, "outputFifoErrors", + ifp->stats.tx_fifo_errors); + json_object_int_add(json_if, "outputHeartbeatErrors", + ifp->stats.tx_heartbeat_errors); + json_object_int_add(json_if, "outputWindowErrors", + ifp->stats.tx_window_errors); + json_object_int_add(json_if, "collisions", ifp->stats.collisions); +#endif /* HAVE_PROC_NET_DEV */ + +#ifdef HAVE_NET_RT_IFLIST + json_object_int_add(json_if, "inputPackets", ifp->stats.ifi_ipackets); + json_object_int_add(json_if, "inputBytes", ifp->stats.ifi_ibytes); + json_object_int_add(json_if, "inputDropd", ifp->stats.ifi_iqdrops); + json_object_int_add(json_if, "inputMulticastPackets", + ifp->stats.ifi_imcasts); + json_object_int_add(json_if, "inputErrors", ifp->stats.ifi_ierrors); + json_object_int_add(json_if, "outputPackets", ifp->stats.ifi_opackets); + json_object_int_add(json_if, "outputBytes", ifp->stats.ifi_obytes); + json_object_int_add(json_if, "outputMulticastPackets", + ifp->stats.ifi_omcasts); + json_object_int_add(json_if, "outputErrors", ifp->stats.ifi_oerrors); + json_object_int_add(json_if, "collisions", ifp->stats.ifi_collisions); +#endif /* HAVE_NET_RT_IFLIST */ +} + +static void interface_update_stats(void) +{ +#ifdef HAVE_PROC_NET_DEV + /* If system has interface statistics via proc file system, update + statistics. */ + ifstat_update_proc(); +#endif /* HAVE_PROC_NET_DEV */ +#ifdef HAVE_NET_RT_IFLIST + ifstat_update_sysctl(); +#endif /* HAVE_NET_RT_IFLIST */ +} + +#include "zebra/interface_clippy.c" +/* Show all interfaces to vty. */ +DEFPY(show_interface, show_interface_cmd, + "show interface vrf NAME$vrf_name [brief$brief] [json$uj]", + SHOW_STR + "Interface status and configuration\n" + VRF_CMD_HELP_STR + "Interface status and configuration summary\n" + JSON_STR) +{ + struct vrf *vrf; + struct interface *ifp; + json_object *json = NULL; + + interface_update_stats(); + + vrf = vrf_lookup_by_name(vrf_name); + if (!vrf) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VRF %s not found\n", vrf_name); + return CMD_WARNING; + } + + if (uj) + json = json_object_new_object(); + + if (brief) { + if (json) + ifs_dump_brief_vty_json(json, vrf); + else + ifs_dump_brief_vty(vty, vrf); + } else { + FOR_ALL_INTERFACES (vrf, ifp) { + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + } + } + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + + +/* Show all interfaces to vty. */ +DEFPY (show_interface_vrf_all, + show_interface_vrf_all_cmd, + "show interface [vrf all] [brief$brief] [json$uj]", + SHOW_STR + "Interface status and configuration\n" + VRF_ALL_CMD_HELP_STR + "Interface status and configuration summary\n" + JSON_STR) +{ + struct vrf *vrf; + struct interface *ifp; + json_object *json = NULL; + + interface_update_stats(); + + if (uj) + json = json_object_new_object(); + + /* All interface print. */ + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if (brief) { + if (json) + ifs_dump_brief_vty_json(json, vrf); + else + ifs_dump_brief_vty(vty, vrf); + } else { + FOR_ALL_INTERFACES (vrf, ifp) { + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + } + } + } + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + +/* Show specified interface to vty. */ + +DEFPY (show_interface_name_vrf, + show_interface_name_vrf_cmd, + "show interface IFNAME$ifname vrf NAME$vrf_name [json$uj]", + SHOW_STR + "Interface status and configuration\n" + "Interface name\n" + VRF_CMD_HELP_STR + JSON_STR) +{ + struct interface *ifp; + struct vrf *vrf; + json_object *json = NULL; + + interface_update_stats(); + + vrf = vrf_lookup_by_name(vrf_name); + if (!vrf) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VRF %s not found\n", vrf_name); + return CMD_WARNING; + } + + ifp = if_lookup_by_name_vrf(ifname, vrf); + if (ifp == NULL) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% Can't find interface %s\n", ifname); + return CMD_WARNING; + } + + if (uj) + json = json_object_new_object(); + + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + +/* Show specified interface to vty. */ +DEFPY (show_interface_name_vrf_all, + show_interface_name_vrf_all_cmd, + "show interface IFNAME$ifname [vrf all] [json$uj]", + SHOW_STR + "Interface status and configuration\n" + "Interface name\n" + VRF_ALL_CMD_HELP_STR + JSON_STR) +{ + struct interface *ifp = NULL; + struct interface *ifptmp; + struct vrf *vrf; + json_object *json = NULL; + int count = 0; + + interface_update_stats(); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + ifptmp = if_lookup_by_name_vrf(ifname, vrf); + if (ifptmp) { + ifp = ifptmp; + count++; + if (!vrf_is_backend_netns()) + break; + } + } + + if (ifp == NULL) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% Can't find interface %s\n", ifname); + return CMD_WARNING; + } + if (count > 1) { + if (uj) { + vty_out(vty, "{}\n"); + } else { + vty_out(vty, + "%% There are multiple interfaces with name %s\n", + ifname); + vty_out(vty, "%% You must specify the VRF name\n"); + } + return CMD_WARNING; + } + + if (uj) + json = json_object_new_object(); + + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + +static void if_show_description(struct vty *vty, struct vrf *vrf) +{ + struct interface *ifp; + + vty_out(vty, "Interface Status Protocol Description\n"); + FOR_ALL_INTERFACES (vrf, ifp) { + int len; + struct zebra_if *zif; + bool intf_desc; + + intf_desc = false; + + len = vty_out(vty, "%s", ifp->name); + vty_out(vty, "%*s", (16 - len), " "); + + if (if_is_up(ifp)) { + vty_out(vty, "up "); + if (CHECK_FLAG(ifp->status, + ZEBRA_INTERFACE_LINKDETECTION)) { + if (if_is_running(ifp)) + vty_out(vty, "up "); + else + vty_out(vty, "down "); + } else { + vty_out(vty, "unknown "); + } + } else { + vty_out(vty, "down down "); + } + + if (ifp->desc) { + intf_desc = true; + vty_out(vty, "%s", ifp->desc); + } + zif = ifp->info; + if (zif && zif->desc) { + vty_out(vty, "%s%s", + intf_desc + ? "\n " + : "", + zif->desc); + } + + vty_out(vty, "\n"); + } +} + +DEFUN (show_interface_desc, + show_interface_desc_cmd, + "show interface description vrf NAME", + SHOW_STR + "Interface status and configuration\n" + "Interface description\n" + VRF_CMD_HELP_STR) +{ + struct vrf *vrf; + + vrf = vrf_lookup_by_name(argv[4]->arg); + if (!vrf) { + vty_out(vty, "%% VRF %s not found\n", argv[4]->arg); + return CMD_WARNING; + } + + if_show_description(vty, vrf); + + return CMD_SUCCESS; +} + + +DEFUN (show_interface_desc_vrf_all, + show_interface_desc_vrf_all_cmd, + "show interface description [vrf all]", + SHOW_STR + "Interface status and configuration\n" + "Interface description\n" + VRF_ALL_CMD_HELP_STR) +{ + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + if (!RB_EMPTY(if_name_head, &vrf->ifaces_by_name)) { + vty_out(vty, "\n\tVRF %s(%u)\n\n", VRF_LOGNAME(vrf), + vrf->vrf_id); + if_show_description(vty, vrf); + } + + return CMD_SUCCESS; +} + +int if_multicast_set(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + if (if_set_flags(ifp, IFF_MULTICAST) < 0) { + zlog_debug("Can't set multicast flag on interface %s", + ifp->name); + return -1; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_ON; + + return 0; +} + +DEFUN (multicast, + multicast_cmd, + "multicast", + "Set multicast flag to interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + ret = if_set_flags(ifp, IFF_MULTICAST); + if (ret < 0) { + vty_out(vty, "Can't set multicast flag\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_ON; + + return CMD_SUCCESS; +} + +DEFPY (mpls, + mpls_cmd, + "[no] mpls ", + NO_STR + MPLS_STR + "Set mpls to be on for the interface\n" + "Set mpls to be off for the interface\n") +{ + if (!no) + nb_cli_enqueue_change(vty, "./frr-zebra:zebra/mpls", + NB_OP_CREATE, on ? "true" : "false"); + else + nb_cli_enqueue_change(vty, "./frr-zebra:zebra/mpls", + NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +int if_multicast_unset(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + if (if_unset_flags(ifp, IFF_MULTICAST) < 0) { + zlog_debug("Can't unset multicast flag on interface %s", + ifp->name); + return -1; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_OFF; + + return 0; +} + +DEFUN (no_multicast, + no_multicast_cmd, + "no multicast", + NO_STR + "Unset multicast flag to interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + ret = if_unset_flags(ifp, IFF_MULTICAST); + if (ret < 0) { + vty_out(vty, "Can't unset multicast flag\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_OFF; + + return CMD_SUCCESS; +} + +int if_linkdetect(struct interface *ifp, bool detect) +{ + int if_was_operative; + + if_was_operative = if_is_no_ptm_operative(ifp); + if (detect) { + SET_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION); + + /* When linkdetection is enabled, if might come down */ + if (!if_is_no_ptm_operative(ifp) && if_was_operative) + if_down(ifp); + } else { + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION); + + /* Interface may come up after disabling link detection */ + if (if_is_operative(ifp) && !if_was_operative) + if_up(ifp, true); + } + /* FIXME: Will defer status change forwarding if interface + does not come down! */ + return 0; +} + +DEFUN(linkdetect, linkdetect_cmd, "link-detect", + "Enable link detection on interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + if_linkdetect(ifp, true); + + return CMD_SUCCESS; +} + + +DEFUN (no_linkdetect, + no_linkdetect_cmd, + "no link-detect", + NO_STR + "Disable link detection on interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + if_linkdetect(ifp, false); + + return CMD_SUCCESS; +} + +int if_shutdown(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv_stop_ra(ifp); + if (if_unset_flags(ifp, IFF_UP) < 0) { + zlog_debug("Can't shutdown interface %s", ifp->name); + return -1; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_ON; + + return 0; +} + +DEFUN (shutdown_if, + shutdown_if_cmd, + "shutdown", + "Shutdown the selected interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv_stop_ra(ifp); + ret = if_unset_flags(ifp, IFF_UP); + if (ret < 0) { + vty_out(vty, "Can't shutdown interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_ON; + + return CMD_SUCCESS; +} + +int if_no_shutdown(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + if (if_set_flags(ifp, IFF_UP | IFF_RUNNING) < 0) { + zlog_debug("Can't up interface %s", ifp->name); + return -1; + } + if_refresh(ifp); + + /* Some addresses (in particular, IPv6 addresses on Linux) get + * removed when the interface goes down. They need to be + * readded. + */ + if_addr_wakeup(ifp); + } + + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_OFF; + + return 0; +} + +DEFUN (no_shutdown_if, + no_shutdown_if_cmd, + "no shutdown", + NO_STR + "Shutdown the selected interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + ret = if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if (ret < 0) { + vty_out(vty, "Can't up interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + + /* Some addresses (in particular, IPv6 addresses on Linux) get + * removed when the interface goes down. They need to be + * readded. + */ + if_addr_wakeup(ifp); + } + + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_OFF; + + return CMD_SUCCESS; +} + +DEFUN (bandwidth_if, + bandwidth_if_cmd, + "bandwidth (1-100000)", + "Set bandwidth informational parameter\n" + "Bandwidth in megabits\n") +{ + int idx_number = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + unsigned int bandwidth; + + bandwidth = strtol(argv[idx_number]->arg, NULL, 10); + + /* bandwidth range is <1-100000> */ + if (bandwidth < 1 || bandwidth > 100000) { + vty_out(vty, "Bandwidth is invalid\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ifp->bandwidth = bandwidth; + + /* force protocols to recalculate routes due to cost change */ + if (if_is_operative(ifp)) + zebra_interface_up_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (no_bandwidth_if, + no_bandwidth_if_cmd, + "no bandwidth [(1-100000)]", + NO_STR + "Set bandwidth informational parameter\n" + "Bandwidth in megabits\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + ifp->bandwidth = 0; + + /* force protocols to recalculate routes due to cost change */ + if (if_is_operative(ifp)) + zebra_interface_up_update(ifp); + + return CMD_SUCCESS; +} + + +struct cmd_node link_params_node = { + .name = "link-params", + .node = LINK_PARAMS_NODE, + .parent_node = INTERFACE_NODE, + .prompt = "%s(config-link-params)# ", + .no_xpath = true, +}; + +static void link_param_cmd_set_uint32(struct interface *ifp, uint32_t *field, + uint32_t type, uint32_t value) +{ + /* Update field as needed */ + if (IS_PARAM_UNSET(ifp->link_params, type) || *field != value) { + *field = value; + SET_PARAM(ifp->link_params, type); + + /* force protocols to update LINK STATE due to parameters change + */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + } +} +static void link_param_cmd_set_float(struct interface *ifp, float *field, + uint32_t type, float value) +{ + + /* Update field as needed */ + if (IS_PARAM_UNSET(ifp->link_params, type) || *field != value) { + *field = value; + SET_PARAM(ifp->link_params, type); + + /* force protocols to update LINK STATE due to parameters change + */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + } +} + +static void link_param_cmd_unset(struct interface *ifp, uint32_t type) +{ + if (ifp->link_params == NULL) + return; + + /* Unset field */ + UNSET_PARAM(ifp->link_params, type); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); +} + +DEFUN_NOSH (link_params, + link_params_cmd, + "link-params", + LINK_PARAMS_STR) +{ + /* vty->qobj_index stays the same @ interface pointer */ + vty->node = LINK_PARAMS_NODE; + + return CMD_SUCCESS; +} + +DEFUN_NOSH (exit_link_params, + exit_link_params_cmd, + "exit-link-params", + "Exit from Link Params configuration mode\n") +{ + if (vty->node == LINK_PARAMS_NODE) + vty->node = INTERFACE_NODE; + return CMD_SUCCESS; +} + +/* Specific Traffic Engineering parameters commands */ +DEFUN (link_params_enable, + link_params_enable_cmd, + "enable", + "Activate link parameters on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* This command could be issue at startup, when activate MPLS TE */ + /* on a new interface or after a ON / OFF / ON toggle */ + /* In all case, TE parameters are reset to their default factory */ + if (IS_ZEBRA_DEBUG_EVENT || IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "Link-params: enable TE link parameters on interface %s", + ifp->name); + + if (!if_link_params_get(ifp)) + if_link_params_enable(ifp); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_enable, + no_link_params_enable_cmd, + "no enable", + NO_STR + "Disable link parameters on this interface\n") +{ + char xpath[XPATH_MAXLEN]; + int ret; + VTY_DECLVAR_CONTEXT(interface, ifp); + + if (IS_ZEBRA_DEBUG_EVENT || IS_ZEBRA_DEBUG_MPLS) + zlog_debug("MPLS-TE: disable TE link parameters on interface %s", + ifp->name); + + if_link_params_free(ifp); + + snprintf( + xpath, sizeof(xpath), + "/frr-interface:lib/interface[name='%s']/frr-zebra:zebra/link-params/affinities", + ifp->name); + if (yang_dnode_exists(running_config->dnode, xpath)) + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + ret = nb_cli_apply_changes(vty, NULL); + + if (ret != CMD_SUCCESS) + return ret; + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +/* STANDARD TE metrics */ +DEFUN (link_params_metric, + link_params_metric_cmd, + "metric (0-4294967295)", + "Link metric for MPLS-TE purpose\n" + "Metric value in decimal\n") +{ + int idx_number = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + uint32_t metric; + + metric = strtoul(argv[idx_number]->arg, NULL, 10); + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update TE metric if needed */ + link_param_cmd_set_uint32(ifp, &iflp->te_metric, LP_TE_METRIC, metric); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_metric, + no_link_params_metric_cmd, + "no metric", + NO_STR + "Disable Link Metric on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset TE Metric */ + link_param_cmd_unset(ifp, LP_TE_METRIC); + + return CMD_SUCCESS; +} + +DEFUN (link_params_maxbw, + link_params_maxbw_cmd, + "max-bw BANDWIDTH", + "Maximum bandwidth that can be used\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_maxbw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that Maximum bandwidth is not lower than other bandwidth + * parameters */ + if (iflp && ((bw <= iflp->max_rsv_bw) || (bw <= iflp->unrsv_bw[0]) || + (bw <= iflp->unrsv_bw[1]) || (bw <= iflp->unrsv_bw[2]) || + (bw <= iflp->unrsv_bw[3]) || (bw <= iflp->unrsv_bw[4]) || + (bw <= iflp->unrsv_bw[5]) || (bw <= iflp->unrsv_bw[6]) || + (bw <= iflp->unrsv_bw[7]) || (bw <= iflp->ava_bw) || + (bw <= iflp->res_bw) || (bw <= iflp->use_bw))) { + vty_out(vty, + "Maximum Bandwidth could not be lower than others bandwidth\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Maximum Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->max_bw, LP_MAX_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (link_params_max_rsv_bw, + link_params_max_rsv_bw_cmd, + "max-rsv-bw BANDWIDTH", + "Maximum bandwidth that may be reserved\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_max_rsv_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (iflp && bw > iflp->max_bw) { + vty_out(vty, + "Maximum Reservable Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Maximum Reservable Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->max_rsv_bw, LP_MAX_RSV_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (link_params_unrsv_bw, + link_params_unrsv_bw_cmd, + "unrsv-bw (0-7) BANDWIDTH", + "Unreserved bandwidth at each priority level\n" + "Priority\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_number = 1; + int idx_bandwidth = 2; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + int priority; + float bw; + + /* We don't have to consider about range check here. */ + if (sscanf(argv[idx_number]->arg, "%d", &priority) != 1) { + vty_out(vty, "link_params_unrsv_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_unrsv_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (iflp && bw > iflp->max_bw) { + vty_out(vty, + "UnReserved Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Unreserved Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->unrsv_bw[priority], LP_UNRSV_BW, + bw); + + return CMD_SUCCESS; +} + +DEFPY_YANG(link_params_admin_grp, link_params_admin_grp_cmd, + "admin-grp BITPATTERN", + "Administrative group membership\n" + "32-bit Hexadecimal value (e.g. 0xa1)\n") +{ + char xpath[XPATH_MAXLEN]; + int idx_bitpattern = 1; + unsigned long value; + char value_str[11]; + + VTY_DECLVAR_CONTEXT(interface, ifp); + + snprintf( + xpath, sizeof(xpath), + "/frr-interface:lib/interface[name='%s']/frr-zebra:zebra/link-params/affinities", + ifp->name); + if (yang_dnode_exists(running_config->dnode, xpath)) { + vty_out(vty, + "cannot use the admin-grp command when affinity is set\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (sscanf(argv[idx_bitpattern]->arg, "0x%lx", &value) != 1) { + vty_out(vty, "link_params_admin_grp: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + if (value > 0xFFFFFFFF) { + vty_out(vty, "value must be not be superior to 0xFFFFFFFF\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + snprintf(value_str, sizeof(value_str), "%ld", value); + + nb_cli_enqueue_change( + vty, "./frr-zebra:zebra/link-params/legacy-admin-group", + NB_OP_MODIFY, value_str); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG(no_link_params_admin_grp, no_link_params_admin_grp_cmd, + "no admin-grp", + NO_STR "Disable Administrative group membership on this interface\n") +{ + nb_cli_enqueue_change( + vty, "./frr-zebra:zebra/link-params/legacy-admin-group", + NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +/* RFC5392 & RFC5316: INTER-AS */ +DEFUN (link_params_inter_as, + link_params_inter_as_cmd, + "neighbor A.B.C.D as (1-4294967295)", + "Configure remote ASBR information (Neighbor IP address and AS number)\n" + "Remote IP address in dot decimal A.B.C.D\n" + "Remote AS number\n" + "AS number in the range <1-4294967295>\n") +{ + int idx_ipv4 = 1; + int idx_number = 3; + + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + struct in_addr addr; + uint32_t as; + + if (!inet_aton(argv[idx_ipv4]->arg, &addr)) { + vty_out(vty, "Please specify Router-Addr by A.B.C.D\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + as = strtoul(argv[idx_number]->arg, NULL, 10); + + /* Update Remote IP and Remote AS fields if needed */ + if (IS_PARAM_UNSET(iflp, LP_RMT_AS) || iflp->rmt_as != as + || iflp->rmt_ip.s_addr != addr.s_addr) { + + iflp->rmt_as = as; + iflp->rmt_ip.s_addr = addr.s_addr; + SET_PARAM(iflp, LP_RMT_AS); + + /* force protocols to update LINK STATE due to parameters change + */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + } + return CMD_SUCCESS; +} + +DEFUN (no_link_params_inter_as, + no_link_params_inter_as_cmd, + "no neighbor", + NO_STR + "Remove Neighbor IP address and AS number for Inter-AS TE\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + + if (!iflp) + return CMD_SUCCESS; + + /* Reset Remote IP and AS neighbor */ + iflp->rmt_as = 0; + iflp->rmt_ip.s_addr = 0; + UNSET_PARAM(iflp, LP_RMT_AS); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +/* RFC7471: OSPF Traffic Engineering (TE) Metric extensions & + * draft-ietf-isis-metric-extensions-07.txt */ +DEFUN (link_params_delay, + link_params_delay_cmd, + "delay (0-16777215) [min (0-16777215) max (0-16777215)]", + "Unidirectional Average Link Delay\n" + "Average delay in micro-second as decimal (0...16777215)\n" + "Minimum delay\n" + "Minimum delay in micro-second as decimal (0...16777215)\n" + "Maximum delay\n" + "Maximum delay in micro-second as decimal (0...16777215)\n") +{ + /* Get and Check new delay values */ + uint32_t delay = 0, low = 0, high = 0; + delay = strtoul(argv[1]->arg, NULL, 10); + if (argc == 6) { + low = strtoul(argv[3]->arg, NULL, 10); + high = strtoul(argv[5]->arg, NULL, 10); + } + + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + uint8_t update = 0; + + if (argc == 2) { + /* + * Check new delay value against old Min and Max delays if set + * + * RFC 7471 Section 4.2.7: + * It is possible for min delay and max delay to be + * the same value. + * + * Therefore, it is also allowed that the average + * delay be equal to the min delay or max delay. + */ + if (iflp && IS_PARAM_SET(iflp, LP_MM_DELAY) && + (delay < iflp->min_delay || delay > iflp->max_delay)) { + vty_out(vty, + "Average delay should be in range Min (%d) - Max (%d) delay\n", + iflp->min_delay, iflp->max_delay); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update delay if value is not set or change */ + if (IS_PARAM_UNSET(iflp, LP_DELAY) || iflp->av_delay != delay) { + iflp->av_delay = delay; + SET_PARAM(iflp, LP_DELAY); + update = 1; + } + /* Unset Min and Max delays if already set */ + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + iflp->min_delay = 0; + iflp->max_delay = 0; + UNSET_PARAM(iflp, LP_MM_DELAY); + update = 1; + } + } else { + /* + * Check new delays value coherency. See above note + * regarding average delay equal to min/max allowed + */ + if (delay < low || delay > high) { + vty_out(vty, + "Average delay should be in range Min (%d) - Max (%d) delay\n", + low, high); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Delays if needed */ + if (IS_PARAM_UNSET(iflp, LP_DELAY) + || IS_PARAM_UNSET(iflp, LP_MM_DELAY) + || iflp->av_delay != delay || iflp->min_delay != low + || iflp->max_delay != high) { + iflp->av_delay = delay; + SET_PARAM(iflp, LP_DELAY); + iflp->min_delay = low; + iflp->max_delay = high; + SET_PARAM(iflp, LP_MM_DELAY); + update = 1; + } + } + + /* force protocols to update LINK STATE due to parameters change */ + if (update == 1 && if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_delay, + no_link_params_delay_cmd, + "no delay", + NO_STR + "Disable Unidirectional Average, Min & Max Link Delay on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + + if (!iflp) + return CMD_SUCCESS; + + /* Unset Delays */ + iflp->av_delay = 0; + UNSET_PARAM(iflp, LP_DELAY); + iflp->min_delay = 0; + iflp->max_delay = 0; + UNSET_PARAM(iflp, LP_MM_DELAY); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (link_params_delay_var, + link_params_delay_var_cmd, + "delay-variation (0-16777215)", + "Unidirectional Link Delay Variation\n" + "delay variation in micro-second as decimal (0...16777215)\n") +{ + int idx_number = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + uint32_t value; + + value = strtoul(argv[idx_number]->arg, NULL, 10); + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Delay Variation if needed */ + link_param_cmd_set_uint32(ifp, &iflp->delay_var, LP_DELAY_VAR, value); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_delay_var, + no_link_params_delay_var_cmd, + "no delay-variation", + NO_STR + "Disable Unidirectional Delay Variation on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Delay Variation */ + link_param_cmd_unset(ifp, LP_DELAY_VAR); + + return CMD_SUCCESS; +} + +DEFUN (link_params_pkt_loss, + link_params_pkt_loss_cmd, + "packet-loss PERCENTAGE", + "Unidirectional Link Packet Loss\n" + "percentage of total traffic by 0.000003% step and less than 50.331642%\n") +{ + int idx_percentage = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float fval; + + if (sscanf(argv[idx_percentage]->arg, "%g", &fval) != 1) { + vty_out(vty, "link_params_pkt_loss: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + if (fval > MAX_PKT_LOSS) + fval = MAX_PKT_LOSS; + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Packet Loss if needed */ + link_param_cmd_set_float(ifp, &iflp->pkt_loss, LP_PKT_LOSS, fval); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_pkt_loss, + no_link_params_pkt_loss_cmd, + "no packet-loss", + NO_STR + "Disable Unidirectional Link Packet Loss on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Packet Loss */ + link_param_cmd_unset(ifp, LP_PKT_LOSS); + + return CMD_SUCCESS; +} + +DEFUN (link_params_res_bw, + link_params_res_bw_cmd, + "res-bw BANDWIDTH", + "Unidirectional Residual Bandwidth\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_res_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (iflp && bw > iflp->max_bw) { + vty_out(vty, + "Residual Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Residual Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->res_bw, LP_RES_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_res_bw, + no_link_params_res_bw_cmd, + "no res-bw", + NO_STR + "Disable Unidirectional Residual Bandwidth on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Residual Bandwidth */ + link_param_cmd_unset(ifp, LP_RES_BW); + + return CMD_SUCCESS; +} + +DEFUN (link_params_ava_bw, + link_params_ava_bw_cmd, + "ava-bw BANDWIDTH", + "Unidirectional Available Bandwidth\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_ava_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (iflp && bw > iflp->max_bw) { + vty_out(vty, + "Available Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Residual Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->ava_bw, LP_AVA_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_ava_bw, + no_link_params_ava_bw_cmd, + "no ava-bw", + NO_STR + "Disable Unidirectional Available Bandwidth on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Available Bandwidth */ + link_param_cmd_unset(ifp, LP_AVA_BW); + + return CMD_SUCCESS; +} + +DEFUN (link_params_use_bw, + link_params_use_bw_cmd, + "use-bw BANDWIDTH", + "Unidirectional Utilised Bandwidth\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_use_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (iflp && bw > iflp->max_bw) { + vty_out(vty, + "Utilised Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!iflp) + iflp = if_link_params_enable(ifp); + + /* Update Utilized Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->use_bw, LP_USE_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_use_bw, + no_link_params_use_bw_cmd, + "no use-bw", + NO_STR + "Disable Unidirectional Utilised Bandwidth on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Utilised Bandwidth */ + link_param_cmd_unset(ifp, LP_USE_BW); + + return CMD_SUCCESS; +} + +static int ag_change(struct vty *vty, int argc, struct cmd_token **argv, + const char *xpath, bool no, int start_idx) +{ + for (int i = start_idx; i < argc; i++) + nb_cli_enqueue_change(vty, xpath, + no ? NB_OP_DESTROY : NB_OP_CREATE, + argv[i]->arg); + return nb_cli_apply_changes(vty, NULL); +} + +/* + * XPath: + * /frr-interface:lib/interface/frr-zebra:zebra/link-params/affinities/affinity + */ +DEFPY_YANG(link_params_affinity, link_params_affinity_cmd, + "[no] affinity NAME...", + NO_STR + "Interface affinities\n" + "Affinity names\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + char xpath[XPATH_MAXLEN]; + + snprintf( + xpath, sizeof(xpath), + "/frr-interface:lib/interface[name='%s']/frr-zebra:zebra/link-params/legacy-admin-group", + ifp->name); + if (yang_dnode_exists(running_config->dnode, xpath)) { + vty_out(vty, + "cannot use the affinity command when admin-grp is set\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return ag_change(vty, argc, argv, + "./frr-zebra:zebra/link-params/affinities/affinity", + no, no ? 2 : 1); +} + + +/* + * XPath: + * /frr-interface:lib/interface/frr-zebra:zebra/link-params/affinities/affinity-mode + */ +DEFPY_YANG(link_params_affinity_mode, link_params_affinity_mode_cmd, + "affinity-mode $affmode", + "Interface affinity mode\n" + "Standard Admin-Group only RFC3630,5305,5329 (default)\n" + "Extended Admin-Group only RFC7308\n" + "Standard and extended Admin-Group format\n") +{ + const char *xpath = "./frr-zebra:zebra/link-params/affinity-mode"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_MODIFY, affmode); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG(no_link_params_affinity_mode, no_link_params_affinity_mode_cmd, + "no affinity-mode []", + NO_STR + "Interface affinity mode\n" + "Standard Admin-Group only RFC3630,5305,5329 (default)\n" + "Extended Admin-Group only RFC7308\n" + "Standard and extended Admin-Group format\n") +{ + const char *xpath = "./frr-zebra:zebra/link-params/affinity-mode"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_MODIFY, "standard"); + + return nb_cli_apply_changes(vty, NULL); +} + +static int ag_iter_cb(const struct lyd_node *dnode, void *arg) +{ + struct vty *vty = (struct vty *)arg; + + vty_out(vty, " %s", yang_dnode_get_string(dnode, ".")); + return YANG_ITER_CONTINUE; +} + +void cli_show_legacy_admin_group(struct vty *vty, const struct lyd_node *dnode, + bool show_defaults) +{ + if (!yang_dnode_exists(dnode, "./legacy-admin-group")) + return; + + vty_out(vty, " admin-group 0x%x\n", + yang_dnode_get_uint32(dnode, "./legacy-admin-group")); +} + +void cli_show_affinity_mode(struct vty *vty, const struct lyd_node *dnode, + bool show_defaults) +{ + enum affinity_mode affinity_mode = yang_dnode_get_enum(dnode, "."); + + if (affinity_mode == AFFINITY_MODE_STANDARD) + vty_out(vty, " affinity-mode standard\n"); + else if (affinity_mode == AFFINITY_MODE_BOTH) + vty_out(vty, " affinity-mode both\n"); +} + +void cli_show_affinity(struct vty *vty, const struct lyd_node *dnode, + bool show_defaults) +{ + if (!yang_dnode_exists(dnode, "./affinity")) + return; + + vty_out(vty, " affinity"); + yang_dnode_iterate(ag_iter_cb, vty, dnode, "./affinity"); + vty_out(vty, "\n"); +} + +int if_ip_address_install(struct interface *ifp, struct prefix *prefix, + const char *label, struct prefix *pp) +{ + struct zebra_if *if_data; + struct prefix_ipv4 lp; + struct prefix_ipv4 *p; + struct connected *ifc; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + lp.family = prefix->family; + lp.prefix = prefix->u.prefix4; + lp.prefixlen = prefix->prefixlen; + apply_mask_ipv4(&lp); + + ifc = connected_check_ptp(ifp, &lp, pp ? pp : NULL); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv4_new(); + *p = lp; + ifc->address = (struct prefix *)p; + + if (pp) { + SET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + p = prefix_ipv4_new(); + *p = *(struct prefix_ipv4 *)pp; + ifc->destination = (struct prefix *)p; + } + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + zlog_debug( + "dplane can't set interface IP address: %s.", + dplane_res2str(dplane_res)); + return NB_ERR; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. + * It will also be added to the subnet chain list, then. */ + } + + return 0; +} + +static int ip_address_install(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct zebra_if *if_data; + struct prefix_ipv4 lp, pp; + struct connected *ifc; + struct prefix_ipv4 *p; + int ret; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + ret = str2prefix_ipv4(addr_str, &lp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (ipv4_martian(&lp.prefix)) { + vty_out(vty, "%% Invalid address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (peer_str) { + if (lp.prefixlen != IPV4_MAX_BITLEN) { + vty_out(vty, + "%% Local prefix length for P-t-P address must be /32\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = str2prefix_ipv4(peer_str, &pp); + if (ret <= 0) { + vty_out(vty, "%% Malformed peer address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + ifc = connected_check_ptp(ifp, &lp, peer_str ? &pp : NULL); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv4_new(); + *p = lp; + ifc->address = (struct prefix *)p; + + if (peer_str) { + SET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + p = prefix_ipv4_new(); + *p = pp; + ifc->destination = (struct prefix *)p; + } + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't set interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. + * It will also be added to the subnet chain list, then. */ + } + + return CMD_SUCCESS; +} + +int if_ip_address_uinstall(struct interface *ifp, struct prefix *prefix) +{ + struct connected *ifc = NULL; + enum zebra_dplane_result dplane_res; + + if (prefix->family == AF_INET) { + /* Check current interface address. */ + ifc = connected_check_ptp(ifp, prefix, NULL); + if (!ifc) { + zlog_debug("interface %s Can't find address", + ifp->name); + return -1; + } + + } else if (prefix->family == AF_INET6) { + /* Check current interface address. */ + ifc = connected_check(ifp, prefix); + } + + if (!ifc) { + zlog_debug("interface %s Can't find address", ifp->name); + return -1; + } + UNSET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is real route. */ + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + zlog_debug("Can't unset interface IP address: %s.", + dplane_res2str(dplane_res)); + return -1; + } + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + return 0; +} + +static int ip_address_uninstall(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct prefix_ipv4 lp, pp; + struct connected *ifc; + int ret; + enum zebra_dplane_result dplane_res; + + /* Convert to prefix structure. */ + ret = str2prefix_ipv4(addr_str, &lp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (peer_str) { + if (lp.prefixlen != IPV4_MAX_BITLEN) { + vty_out(vty, + "%% Local prefix length for P-t-P address must be /32\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = str2prefix_ipv4(peer_str, &pp); + if (ret <= 0) { + vty_out(vty, "%% Malformed peer address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + /* Check current interface address. */ + ifc = connected_check_ptp(ifp, &lp, peer_str ? &pp : NULL); + if (!ifc) { + vty_out(vty, "%% Can't find address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is not configured address. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + return CMD_WARNING_CONFIG_FAILED; + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is real route. */ + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't unset interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* we will receive a kernel notification about this route being removed. + * this will trigger its removal from the connected list. */ + return CMD_SUCCESS; +} + +DEFUN (ip_address, + ip_address_cmd, + "ip address A.B.C.D/M", + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP address (e.g. 10.0.0.1/8)\n") +{ + int idx_ipv4_prefixlen = 2; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_install(vty, ifp, argv[idx_ipv4_prefixlen]->arg, NULL, + NULL); +} + +DEFUN (no_ip_address, + no_ip_address_cmd, + "no ip address A.B.C.D/M", + NO_STR + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP Address (e.g. 10.0.0.1/8)\n") +{ + int idx_ipv4_prefixlen = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_uninstall(vty, ifp, argv[idx_ipv4_prefixlen]->arg, + NULL, NULL); +} + +DEFUN(ip_address_peer, + ip_address_peer_cmd, + "ip address A.B.C.D peer A.B.C.D/M", + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "Local IP (e.g. 10.0.0.1) for P-t-P address\n" + "Specify P-t-P address\n" + "Peer IP address (e.g. 10.0.0.1/8)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_install(vty, ifp, argv[2]->arg, argv[4]->arg, NULL); +} + +DEFUN(no_ip_address_peer, + no_ip_address_peer_cmd, + "no ip address A.B.C.D peer A.B.C.D/M", + NO_STR + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "Local IP (e.g. 10.0.0.1) for P-t-P address\n" + "Specify P-t-P address\n" + "Peer IP address (e.g. 10.0.0.1/8)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_uninstall(vty, ifp, argv[3]->arg, argv[5]->arg, NULL); +} + +#ifdef HAVE_NETLINK +DEFUN (ip_address_label, + ip_address_label_cmd, + "ip address A.B.C.D/M label LINE", + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP address (e.g. 10.0.0.1/8)\n" + "Label of this address\n" + "Label\n") +{ + int idx_ipv4_prefixlen = 2; + int idx_line = 4; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_install(vty, ifp, argv[idx_ipv4_prefixlen]->arg, NULL, + argv[idx_line]->arg); +} + +DEFUN (no_ip_address_label, + no_ip_address_label_cmd, + "no ip address A.B.C.D/M label LINE", + NO_STR + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP address (e.g. 10.0.0.1/8)\n" + "Label of this address\n" + "Label\n") +{ + int idx_ipv4_prefixlen = 3; + int idx_line = 5; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_uninstall(vty, ifp, argv[idx_ipv4_prefixlen]->arg, + NULL, argv[idx_line]->arg); +} +#endif /* HAVE_NETLINK */ + +int if_ipv6_address_install(struct interface *ifp, struct prefix *prefix, + const char *label) +{ + struct zebra_if *if_data; + struct prefix_ipv6 cp; + struct connected *ifc; + struct prefix_ipv6 *p; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + cp.family = prefix->family; + cp.prefixlen = prefix->prefixlen; + cp.prefix = prefix->u.prefix6; + apply_mask_ipv6(&cp); + + ifc = connected_check(ifp, (struct prefix *)&cp); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv6_new(); + *p = cp; + ifc->address = (struct prefix *)p; + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + zlog_debug( + "dplane can't set interface IP address: %s.", + dplane_res2str(dplane_res)); + return NB_ERR; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. */ + } + + return 0; +} + +static int ipv6_address_install(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct zebra_if *if_data; + struct prefix_ipv6 cp; + struct connected *ifc; + struct prefix_ipv6 *p; + int ret; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + ret = str2prefix_ipv6(addr_str, &cp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (ipv6_martian(&cp.prefix)) { + vty_out(vty, "%% Invalid address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ifc = connected_check(ifp, (struct prefix *)&cp); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv6_new(); + *p = cp; + ifc->address = (struct prefix *)p; + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't set interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. */ + } + + return CMD_SUCCESS; +} + +/* Return true if an ipv6 address is configured on ifp */ +int ipv6_address_configured(struct interface *ifp) +{ + struct connected *connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + return 1; + + return 0; +} + +static int ipv6_address_uninstall(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct prefix_ipv6 cp; + struct connected *ifc; + int ret; + enum zebra_dplane_result dplane_res; + + /* Convert to prefix structure. */ + ret = str2prefix_ipv6(addr_str, &cp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check current interface address. */ + ifc = connected_check(ifp, (struct prefix *)&cp); + if (!ifc) { + vty_out(vty, "%% Can't find address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is not configured address. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + return CMD_WARNING_CONFIG_FAILED; + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is real route. */ + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't unset interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* This information will be propagated to the zclients when the + * kernel notification is received. */ + return CMD_SUCCESS; +} + +DEFUN (ipv6_address, + ipv6_address_cmd, + "ipv6 address X:X::X:X/M", + "Interface IPv6 config commands\n" + "Set the IP address of an interface\n" + "IPv6 address (e.g. 3ffe:506::1/48)\n") +{ + int idx_ipv6_prefixlen = 2; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ipv6_address_install(vty, ifp, argv[idx_ipv6_prefixlen]->arg, + NULL, NULL); +} + +DEFUN (no_ipv6_address, + no_ipv6_address_cmd, + "no ipv6 address X:X::X:X/M", + NO_STR + "Interface IPv6 config commands\n" + "Set the IP address of an interface\n" + "IPv6 address (e.g. 3ffe:506::1/48)\n") +{ + int idx_ipv6_prefixlen = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ipv6_address_uninstall(vty, ifp, argv[idx_ipv6_prefixlen]->arg, + NULL, NULL); +} + +static int link_params_config_write(struct vty *vty, struct interface *ifp) +{ + const struct lyd_node *dnode; + char xpath[XPATH_MAXLEN]; + int i; + + if ((ifp == NULL) || !HAS_LINK_PARAMS(ifp)) + return -1; + + struct if_link_params *iflp = ifp->link_params; + + vty_out(vty, " link-params\n"); + vty_out(vty, " enable\n"); + if (IS_PARAM_SET(iflp, LP_TE_METRIC) && iflp->te_metric != ifp->metric) + vty_out(vty, " metric %u\n", iflp->te_metric); + if (IS_PARAM_SET(iflp, LP_MAX_BW) && iflp->max_bw != iflp->default_bw) + vty_out(vty, " max-bw %g\n", iflp->max_bw); + if (IS_PARAM_SET(iflp, LP_MAX_RSV_BW) + && iflp->max_rsv_bw != iflp->default_bw) + vty_out(vty, " max-rsv-bw %g\n", iflp->max_rsv_bw); + if (IS_PARAM_SET(iflp, LP_UNRSV_BW)) { + for (i = 0; i < 8; i++) + if (iflp->unrsv_bw[i] != iflp->default_bw) + vty_out(vty, " unrsv-bw %d %g\n", i, + iflp->unrsv_bw[i]); + } + + snprintf( + xpath, sizeof(xpath), + "/frr-interface:lib/interface[name='%s']/frr-zebra:zebra/link-params", + ifp->name); + dnode = yang_dnode_get(running_config->dnode, xpath); + if (dnode) + nb_cli_show_dnode_cmds(vty, dnode, false); + + if (IS_PARAM_SET(iflp, LP_DELAY)) { + vty_out(vty, " delay %u", iflp->av_delay); + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + vty_out(vty, " min %u", iflp->min_delay); + vty_out(vty, " max %u", iflp->max_delay); + } + vty_out(vty, "\n"); + } + if (IS_PARAM_SET(iflp, LP_DELAY_VAR)) + vty_out(vty, " delay-variation %u\n", iflp->delay_var); + if (IS_PARAM_SET(iflp, LP_PKT_LOSS)) + vty_out(vty, " packet-loss %g\n", iflp->pkt_loss); + if (IS_PARAM_SET(iflp, LP_AVA_BW)) + vty_out(vty, " ava-bw %g\n", iflp->ava_bw); + if (IS_PARAM_SET(iflp, LP_RES_BW)) + vty_out(vty, " res-bw %g\n", iflp->res_bw); + if (IS_PARAM_SET(iflp, LP_USE_BW)) + vty_out(vty, " use-bw %g\n", iflp->use_bw); + if (IS_PARAM_SET(iflp, LP_RMT_AS)) + vty_out(vty, " neighbor %pI4 as %u\n", &iflp->rmt_ip, + iflp->rmt_as); + + vty_out(vty, " exit-link-params\n"); + return 0; +} + +static int if_config_write(struct vty *vty) +{ + struct vrf *vrf; + struct interface *ifp; + + zebra_ptm_write(vty); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + FOR_ALL_INTERFACES (vrf, ifp) { + struct zebra_if *if_data; + struct listnode *addrnode; + struct connected *ifc; + struct prefix *p; + + if_data = ifp->info; + + if_vty_config_start(vty, ifp); + + if (if_data) { + if (if_data->shutdown == IF_ZEBRA_DATA_ON) + vty_out(vty, " shutdown\n"); + + zebra_ptm_if_write(vty, if_data); + } + + if (ifp->desc) + vty_out(vty, " description %s\n", ifp->desc); + + /* Assign bandwidth here to avoid unnecessary interface + flap + while processing config script */ + if (ifp->bandwidth != 0) + vty_out(vty, " bandwidth %u\n", ifp->bandwidth); + + if (!CHECK_FLAG(ifp->status, + ZEBRA_INTERFACE_LINKDETECTION)) + vty_out(vty, " no link-detect\n"); + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, addrnode, + ifc)) { + if (CHECK_FLAG(ifc->conf, + ZEBRA_IFC_CONFIGURED)) { + char buf[INET6_ADDRSTRLEN]; + p = ifc->address; + vty_out(vty, " ip%s address %s", + p->family == AF_INET ? "" + : "v6", + inet_ntop(p->family, + &p->u.prefix, buf, + sizeof(buf))); + if (CONNECTED_PEER(ifc)) { + p = ifc->destination; + vty_out(vty, " peer %s", + inet_ntop(p->family, + &p->u.prefix, + buf, + sizeof(buf))); + } + vty_out(vty, "/%d", p->prefixlen); + + if (ifc->label) + vty_out(vty, " label %s", + ifc->label); + + vty_out(vty, "\n"); + } + } + + if (if_data) { + if (if_data->multicast != IF_ZEBRA_DATA_UNSPEC) + vty_out(vty, " %smulticast\n", + if_data->multicast == + IF_ZEBRA_DATA_ON + ? "" + : "no "); + + if (if_data->mpls_config == IF_ZEBRA_DATA_ON) + vty_out(vty, " mpls enable\n"); + else if (if_data->mpls_config == + IF_ZEBRA_DATA_OFF) + vty_out(vty, " mpls disable\n"); + } + + hook_call(zebra_if_config_wr, vty, ifp); + zebra_evpn_mh_if_write(vty, ifp); + link_params_config_write(vty, ifp); + + if_vty_config_end(vty); + } + return 0; +} + +/* Allocate and initialize interface vector. */ +void zebra_if_init(void) +{ + /* Initialize interface and new hook. */ + hook_register_prio(if_add, 0, if_zebra_new_hook); + hook_register_prio(if_del, 0, if_zebra_delete_hook); + + /* Install configuration write function. */ + if_cmd_init(if_config_write); + install_node(&link_params_node); + /* + * This is *intentionally* setting this to NULL, signaling + * that interface creation for zebra acts differently + */ + if_zapi_callbacks(NULL, NULL, NULL, NULL); + + install_element(VIEW_NODE, &show_interface_cmd); + install_element(VIEW_NODE, &show_interface_vrf_all_cmd); + install_element(VIEW_NODE, &show_interface_name_vrf_cmd); + install_element(VIEW_NODE, &show_interface_name_vrf_all_cmd); + + install_element(ENABLE_NODE, &show_interface_desc_cmd); + install_element(ENABLE_NODE, &show_interface_desc_vrf_all_cmd); + install_element(INTERFACE_NODE, &multicast_cmd); + install_element(INTERFACE_NODE, &no_multicast_cmd); + install_element(INTERFACE_NODE, &mpls_cmd); + install_element(INTERFACE_NODE, &linkdetect_cmd); + install_element(INTERFACE_NODE, &no_linkdetect_cmd); + install_element(INTERFACE_NODE, &shutdown_if_cmd); + install_element(INTERFACE_NODE, &no_shutdown_if_cmd); + install_element(INTERFACE_NODE, &bandwidth_if_cmd); + install_element(INTERFACE_NODE, &no_bandwidth_if_cmd); + install_element(INTERFACE_NODE, &ip_address_cmd); + install_element(INTERFACE_NODE, &no_ip_address_cmd); + install_element(INTERFACE_NODE, &ip_address_peer_cmd); + install_element(INTERFACE_NODE, &no_ip_address_peer_cmd); + install_element(INTERFACE_NODE, &ipv6_address_cmd); + install_element(INTERFACE_NODE, &no_ipv6_address_cmd); +#ifdef HAVE_NETLINK + install_element(INTERFACE_NODE, &ip_address_label_cmd); + install_element(INTERFACE_NODE, &no_ip_address_label_cmd); +#endif /* HAVE_NETLINK */ + install_element(INTERFACE_NODE, &link_params_cmd); + install_default(LINK_PARAMS_NODE); + install_element(LINK_PARAMS_NODE, &link_params_enable_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_enable_cmd); + install_element(LINK_PARAMS_NODE, &link_params_metric_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_metric_cmd); + install_element(LINK_PARAMS_NODE, &link_params_maxbw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_max_rsv_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_unrsv_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_admin_grp_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_admin_grp_cmd); + install_element(LINK_PARAMS_NODE, &link_params_inter_as_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_inter_as_cmd); + install_element(LINK_PARAMS_NODE, &link_params_delay_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_delay_cmd); + install_element(LINK_PARAMS_NODE, &link_params_delay_var_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_delay_var_cmd); + install_element(LINK_PARAMS_NODE, &link_params_pkt_loss_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_pkt_loss_cmd); + install_element(LINK_PARAMS_NODE, &link_params_ava_bw_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_ava_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_res_bw_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_res_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_use_bw_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_use_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_affinity_cmd); + install_element(LINK_PARAMS_NODE, &link_params_affinity_mode_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_affinity_mode_cmd); + install_element(LINK_PARAMS_NODE, &exit_link_params_cmd); + + /* setup EVPN MH elements */ + zebra_evpn_interface_init(); +} diff --git a/zebra/interface.h b/zebra/interface.h new file mode 100644 index 0000000..3b67995 --- /dev/null +++ b/zebra/interface.h @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Interface function header. + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_INTERFACE_H +#define _ZEBRA_INTERFACE_H + +#include "redistribute.h" +#include "vrf.h" +#include "hook.h" +#include "bitfield.h" + +#include "zebra/zebra_l2.h" +#include "zebra/zebra_l2_bridge_if.h" +#include "zebra/zebra_nhg_private.h" +#include "zebra/zebra_router.h" +#include "zebra/rtadv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* For interface configuration. */ +#define IF_ZEBRA_DATA_UNSPEC 0 +#define IF_ZEBRA_DATA_ON 1 +#define IF_ZEBRA_DATA_OFF 2 + +#define IF_VLAN_BITMAP_MAX 4096 + +/* Zebra interface type - ones of interest. */ +enum zebra_iftype { + ZEBRA_IF_OTHER = 0, /* Anything else */ + ZEBRA_IF_VXLAN, /* VxLAN interface */ + ZEBRA_IF_VRF, /* VRF device */ + ZEBRA_IF_BRIDGE, /* bridge device */ + ZEBRA_IF_VLAN, /* VLAN sub-interface */ + ZEBRA_IF_MACVLAN, /* MAC VLAN interface*/ + ZEBRA_IF_VETH, /* VETH interface*/ + ZEBRA_IF_BOND, /* Bond */ + ZEBRA_IF_GRE, /* GRE interface */ +}; + +/* Zebra "slave" interface type */ +enum zebra_slave_iftype { + ZEBRA_IF_SLAVE_NONE, /* Not a slave */ + ZEBRA_IF_SLAVE_VRF, /* Member of a VRF */ + ZEBRA_IF_SLAVE_BRIDGE, /* Member of a bridge */ + ZEBRA_IF_SLAVE_BOND, /* Bond member */ + ZEBRA_IF_SLAVE_OTHER, /* Something else - e.g., bond slave */ +}; + +struct irdp_interface; + +/* Ethernet segment info used for setting up EVPN multihoming */ +struct zebra_evpn_es; +struct zebra_es_if_info { + /* type-3 esi config */ + struct ethaddr sysmac; + uint32_t lid; /* local-id; has to be unique per-ES-sysmac */ + + esi_t esi; + + uint16_t df_pref; + uint8_t flags; +#define ZIF_CFG_ES_FLAG_BYPASS (1 << 0) + + struct zebra_evpn_es *es; /* local ES */ +}; + +enum zebra_if_flags { + /* device has been configured as an uplink for + * EVPN multihoming + */ + ZIF_FLAG_EVPN_MH_UPLINK = (1 << 0), + ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP = (1 << 1), + + /* Dataplane protodown-on */ + ZIF_FLAG_PROTODOWN = (1 << 2), + /* Dataplane protodown-on Queued to the dplane */ + ZIF_FLAG_SET_PROTODOWN = (1 << 3), + /* Dataplane protodown-off Queued to the dplane */ + ZIF_FLAG_UNSET_PROTODOWN = (1 << 4), + + /* LACP bypass state is set by the dataplane on a bond member + * and inherited by the bond (if one or more bond members are in + * a bypass state the bond is placed in a bypass state) + */ + ZIF_FLAG_LACP_BYPASS = (1 << 5) +}; + +#define ZEBRA_IF_IS_PROTODOWN(zif) ((zif)->flags & ZIF_FLAG_PROTODOWN) +#define ZEBRA_IF_IS_PROTODOWN_ONLY_EXTERNAL(zif) \ + ((zif)->protodown_rc == ZEBRA_PROTODOWN_EXTERNAL) + +/* Mem type for zif desc */ +DECLARE_MTYPE(ZIF_DESC); + +/* `zebra' daemon local interface structure. */ +struct zebra_if { + /* back pointer to the interface */ + struct interface *ifp; + + enum zebra_if_flags flags; + + /* Shutdown configuration. */ + uint8_t shutdown; + + /* Multicast configuration. */ + uint8_t multicast; + + /* MPLS status. */ + bool mpls; + + /* MPLS configuration */ + uint8_t mpls_config; + + /* Linkdown status */ + bool linkdown, linkdownv6; + + /* Is Multicast Forwarding on? */ + bool v4mcast_on, v6mcast_on; + + /* Router advertise configuration. */ + uint8_t rtadv_enable; + + /* Installed addresses chains tree. */ + struct route_table *ipv4_subnets; + + /* Nexthops pointing to this interface */ + /** + * Any nexthop that we get should have an + * interface. When an interface goes down, + * we will use this list to update the nexthops + * pointing to it with that info. + */ + struct nhg_connected_tree_head nhg_dependents; + + /* Information about up/down changes */ + unsigned int up_count; + char up_last[FRR_TIMESTAMP_LEN]; + unsigned int down_count; + char down_last[FRR_TIMESTAMP_LEN]; + + struct rtadvconf rtadv; + unsigned int ra_sent, ra_rcvd; + + struct irdp_interface *irdp; + +#ifdef HAVE_STRUCT_SOCKADDR_DL + union { + /* note that sdl_storage is never accessed, it only exists to + * make space. + * all actual uses refer to sdl - but use sizeof(sdl_storage)! + * this fits + * best with C aliasing rules. */ + struct sockaddr_dl sdl; + struct sockaddr_storage sdl_storage; + }; +#endif + + /* ptm enable configuration */ + uint8_t ptm_enable; + + /* Zebra interface and "slave" interface type */ + enum zebra_iftype zif_type; + enum zebra_slave_iftype zif_slave_type; + + /* Additional L2 info, depends on zif_type */ + union zebra_l2if_info l2info; + + /* For members of a bridge, link to bridge. */ + /* Note: If additional fields become necessary, this can be modified to + * be a pointer to a dynamically allocd struct. + */ + struct zebra_l2info_brslave brslave_info; + + struct zebra_l2info_bondslave bondslave_info; + struct zebra_l2info_bond bond_info; + + /* ethernet segment */ + struct zebra_es_if_info es_info; + + /* bitmap of vlans associated with this interface */ + bitfield_t vlan_bitmap; + + /* An interface can be error-disabled if a protocol (such as EVPN or + * VRRP) detects a problem with keeping it operationally-up. + * If any of the protodown bits are set protodown-on is programmed + * in the dataplane. This results in a carrier/L1 down on the + * physical device. + */ + uint32_t protodown_rc; + + /* list of zebra_mac entries using this interface as destination */ + struct list *mac_list; + + /* Link fields - for sub-interfaces. */ + ns_id_t link_nsid; + ifindex_t link_ifindex; + struct interface *link; + + uint8_t speed_update_count; + struct event *speed_update; + + /* + * Does this interface have a v6 to v4 ll neighbor entry + * for bgp unnumbered? + */ + bool v6_2_v4_ll_neigh_entry; + char neigh_mac[6]; + struct in6_addr v6_2_v4_ll_addr6; + + /* The description of the interface */ + char *desc; +}; + +DECLARE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp), + (vty, ifp)); +DECLARE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp), + (vty, ifp)); + +#define IS_ZEBRA_IF_VRF(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VRF) + +#define IS_ZEBRA_IF_BRIDGE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_BRIDGE) + +#define IS_ZEBRA_IF_VLAN(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VLAN) + +#define IS_ZEBRA_IF_VXLAN(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VXLAN) + +#define IS_ZEBRA_IF_MACVLAN(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_MACVLAN) + +#define IS_ZEBRA_IF_VETH(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VETH) + +#define IS_ZEBRA_IF_BOND(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_BOND) + +#define IS_ZEBRA_IF_GRE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_GRE) + +#define IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_slave_type \ + == ZEBRA_IF_SLAVE_BRIDGE) + +#define IS_ZEBRA_IF_VRF_SLAVE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_slave_type == ZEBRA_IF_SLAVE_VRF) + +#define IS_ZEBRA_IF_BOND_SLAVE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_slave_type \ + == ZEBRA_IF_SLAVE_BOND) + +extern void zebra_if_init(void); + +extern struct interface *if_lookup_by_index_per_ns(struct zebra_ns *, uint32_t); +extern struct interface *if_lookup_by_name_per_ns(struct zebra_ns *, + const char *); +extern struct interface *if_link_per_ns(struct zebra_ns *, struct interface *); +extern struct interface *if_lookup_by_index_per_nsid(ns_id_t nsid, + uint32_t ifindex); +extern const char *ifindex2ifname_per_ns(struct zebra_ns *, unsigned int); + +extern void if_unlink_per_ns(struct interface *); +extern void if_nbr_mac_to_ipv4ll_neigh_update(struct interface *fip, + char mac[6], + struct in6_addr *address, + int add); +extern void if_nbr_ipv6ll_to_ipv4ll_neigh_update(struct interface *ifp, + struct in6_addr *address, + int add); +extern void if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(struct interface *ifp); +extern void if_delete_update(struct interface **ifp); +extern void if_add_update(struct interface *ifp); +extern void if_up(struct interface *ifp, bool install_connected); +extern void if_down(struct interface *); +extern void if_refresh(struct interface *); +extern void if_flags_update(struct interface *, uint64_t); +extern int if_subnet_add(struct interface *, struct connected *); +extern int if_subnet_delete(struct interface *, struct connected *); +extern int ipv6_address_configured(struct interface *ifp); +extern void if_handle_vrf_change(struct interface *ifp, vrf_id_t vrf_id); +extern void zebra_if_update_link(struct interface *ifp, ifindex_t link_ifindex, + ns_id_t ns_id); +extern void zebra_if_update_all_links(struct zebra_ns *zns); +/** + * Directly update entire protodown & reason code bitfield. + */ +extern int zebra_if_update_protodown_rc(struct interface *ifp, bool new_down, + uint32_t new_protodown_rc); + +extern void cli_show_legacy_admin_group(struct vty *vty, + const struct lyd_node *dnode, + bool show_defaults); +extern void cli_show_affinity_mode(struct vty *vty, + const struct lyd_node *dnode, + bool show_defaults); +extern void cli_show_affinity(struct vty *vty, const struct lyd_node *dnode, + bool show_defaults); + +/** + * Set protodown with single reason. + */ +extern int zebra_if_set_protodown(struct interface *ifp, bool down, + enum protodown_reasons new_reason); +extern int if_ip_address_install(struct interface *ifp, struct prefix *prefix, + const char *label, struct prefix *pp); +extern int if_ipv6_address_install(struct interface *ifp, struct prefix *prefix, + const char *label); +extern int if_ip_address_uinstall(struct interface *ifp, struct prefix *prefix); +extern int if_shutdown(struct interface *ifp); +extern int if_no_shutdown(struct interface *ifp); +extern int if_multicast_set(struct interface *ifp); +extern int if_multicast_unset(struct interface *ifp); +extern int if_linkdetect(struct interface *ifp, bool detect); +extern void if_addr_wakeup(struct interface *ifp); + +/* Nexthop group connected functions */ +extern void if_nhg_dependents_add(struct interface *ifp, + struct nhg_hash_entry *nhe); +extern void if_nhg_dependents_del(struct interface *ifp, + struct nhg_hash_entry *nhe); +extern unsigned int if_nhg_dependents_count(const struct interface *ifp); +extern bool if_nhg_dependents_is_empty(const struct interface *ifp); + +extern void vrf_add_update(struct vrf *vrfp); +extern void zebra_l2_map_slave_to_bond(struct zebra_if *zif, vrf_id_t vrf); +extern void zebra_l2_unmap_slave_from_bond(struct zebra_if *zif); +extern const char *zebra_protodown_rc_str(uint32_t protodown_rc, char *pd_buf, + uint32_t pd_buf_len); +void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx); + +#ifdef HAVE_PROC_NET_DEV +extern void ifstat_update_proc(void); +#endif /* HAVE_PROC_NET_DEV */ +#ifdef HAVE_NET_RT_IFLIST +extern void ifstat_update_sysctl(void); + +#endif /* HAVE_NET_RT_IFLIST */ +#ifdef HAVE_PROC_NET_DEV +extern int interface_list_proc(void); +#endif /* HAVE_PROC_NET_DEV */ +#ifdef HAVE_PROC_NET_IF_INET6 +extern int ifaddr_proc_ipv6(void); +#endif /* HAVE_PROC_NET_IF_INET6 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_INTERFACE_H */ diff --git a/zebra/ioctl.c b/zebra/ioctl.c new file mode 100644 index 0000000..8da1ae3 --- /dev/null +++ b/zebra/ioctl.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Common ioctl functions. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#include "linklist.h" +#include "if.h" +#include "prefix.h" +#include "ioctl.h" +#include "log.h" +#include "privs.h" +#include "lib_errors.h" + +#include "vty.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/zebra_errors.h" +#include "zebra/debug.h" + +#ifdef HAVE_BSD_LINK_DETECT +#include +#endif /* HAVE_BSD_LINK_DETECT*/ + +extern struct zebra_privs_t zserv_privs; + +/* clear and set interface name string */ +void ifreq_set_name(struct ifreq *ifreq, struct interface *ifp) +{ + strlcpy(ifreq->ifr_name, ifp->name, sizeof(ifreq->ifr_name)); +} + +#ifndef HAVE_NETLINK +/* call ioctl system call */ +int if_ioctl(unsigned long request, caddr_t buffer) +{ + int sock; + int ret; + int err = 0; + + frr_with_privs(&zserv_privs) { + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + zlog_err("Cannot create UDP socket: %s", + safe_strerror(errno)); + exit(1); + } + if ((ret = ioctl(sock, request, buffer)) < 0) + err = errno; + } + close(sock); + + if (ret < 0) { + errno = err; + return ret; + } + return 0; +} +#endif + +/* call ioctl system call */ +int vrf_if_ioctl(unsigned long request, caddr_t buffer, vrf_id_t vrf_id) +{ + int sock; + int ret; + int err = 0; + + frr_with_privs(&zserv_privs) { + sock = vrf_socket(AF_INET, SOCK_DGRAM, 0, vrf_id, NULL); + if (sock < 0) { + zlog_err("Cannot create UDP socket: %s", + safe_strerror(errno)); + exit(1); + } + ret = vrf_ioctl(vrf_id, sock, request, buffer); + if (ret < 0) + err = errno; + } + close(sock); + + if (ret < 0) { + errno = err; + return ret; + } + return 0; +} + +#ifndef HAVE_NETLINK +static int if_ioctl_ipv6(unsigned long request, caddr_t buffer) +{ + int sock; + int ret; + int err = 0; + + frr_with_privs(&zserv_privs) { + sock = socket(AF_INET6, SOCK_DGRAM, 0); + if (sock < 0) { + zlog_err("Cannot create IPv6 datagram socket: %s", + safe_strerror(errno)); + exit(1); + } + + if ((ret = ioctl(sock, request, buffer)) < 0) + err = errno; + } + close(sock); + + if (ret < 0) { + errno = err; + return ret; + } + return 0; +} + +/* + * get interface metric + * -- if value is not avaliable set -1 + */ +void if_get_metric(struct interface *ifp) +{ +#ifdef SIOCGIFMETRIC + struct ifreq ifreq = {}; + + ifreq_set_name(&ifreq, ifp); + + if (vrf_if_ioctl(SIOCGIFMETRIC, (caddr_t)&ifreq, ifp->vrf->vrf_id) < 0) + return; + ifp->metric = ifreq.ifr_metric; + if (ifp->metric == 0) + ifp->metric = 1; +#else /* SIOCGIFMETRIC */ + ifp->metric = -1; +#endif /* SIOCGIFMETRIC */ +} + +/* get interface MTU */ +void if_get_mtu(struct interface *ifp) +{ + struct ifreq ifreq = {}; + + ifreq_set_name(&ifreq, ifp); + +#if defined(SIOCGIFMTU) + if (vrf_if_ioctl(SIOCGIFMTU, (caddr_t)&ifreq, ifp->vrf->vrf_id) < 0) { + zlog_info("Can't lookup mtu by ioctl(SIOCGIFMTU) for %s(%u)", + ifp->name, ifp->vrf->vrf_id); + ifp->mtu6 = ifp->mtu = -1; + return; + } + + ifp->mtu6 = ifp->mtu = ifreq.ifr_mtu; + + /* propogate */ + zebra_interface_up_update(ifp); + +#else + zlog_info("Can't lookup mtu on this system for %s(%u)", ifp->name, + ifp->vrf->vrf_id); + ifp->mtu6 = ifp->mtu = -1; +#endif +} +#endif /* ! HAVE_NETLINK */ + +/* + * Handler for interface address programming via the zebra dplane, + * for non-netlink platforms. This handler dispatches to per-platform + * helpers, based on the operation requested. + */ +#ifndef HAVE_NETLINK + +/* Prototypes: these are placed in this block so that they're only seen + * on non-netlink platforms. + */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx) +{ + int ret = -1; + const struct prefix *p; + + p = dplane_ctx_get_intf_addr(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) { + if (p->family == AF_INET) + ret = if_set_prefix_ctx(ctx); + else + ret = if_set_prefix6_ctx(ctx); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_UNINSTALL) { + if (p->family == AF_INET) + ret = if_unset_prefix_ctx(ctx); + else + ret = if_unset_prefix6_ctx(ctx); + } else { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Invalid op in interface-addr install"); + } + + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); +} + +#ifdef HAVE_STRUCT_IFALIASREQ + +/* + * Helper for interface-addr install, non-netlink + */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifaliasreq addreq; + struct sockaddr_in addr, mask, peer; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, dplane_ctx_get_ifname(ctx), + sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin_addr = p->prefix; + addr.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in)); + + if (dplane_ctx_intf_is_connected(ctx)) { + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_dest(ctx); + memset(&mask, 0, sizeof(mask)); + peer.sin_addr = p->prefix; + peer.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + peer.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_broadaddr, &peer, + sizeof(struct sockaddr_in)); + } + + memset(&mask, 0, sizeof(mask)); + masklen2ip(p->prefixlen, &mask.sin_addr); + mask.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_mask, &mask, sizeof(struct sockaddr_in)); + + ret = if_ioctl(SIOCAIFADDR, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; + +} + +/* + * Helper for interface-addr un-install, non-netlink + */ +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifaliasreq addreq; + struct sockaddr_in addr, mask, peer; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, dplane_ctx_get_ifname(ctx), + sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin_addr = p->prefix; + addr.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in)); + + if (dplane_ctx_intf_is_connected(ctx)) { + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_dest(ctx); + memset(&mask, 0, sizeof(mask)); + peer.sin_addr = p->prefix; + peer.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + peer.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_broadaddr, &peer, + sizeof(struct sockaddr_in)); + } + + memset(&mask, 0, sizeof(mask)); + masklen2ip(p->prefixlen, &mask.sin_addr); + mask.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_mask, &mask, sizeof(struct sockaddr_in)); + + ret = if_ioctl(SIOCDIFADDR, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; +} +#else +/* Set up interface's address, netmask (and broadcas? ). Linux or + Solaris uses ifname:number semantics to set IP address aliases. */ +int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifreq ifreq; + struct sockaddr_in addr; + struct sockaddr_in broad; + struct sockaddr_in mask; + struct prefix_ipv4 ifaddr; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + ifaddr = *p; + + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); + + addr.sin_addr = p->prefix; + addr.sin_family = p->family; + memcpy(&ifreq.ifr_addr, &addr, sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFADDR, (caddr_t)&ifreq); + if (ret < 0) + return ret; + + /* We need mask for make broadcast addr. */ + masklen2ip(p->prefixlen, &mask.sin_addr); + + if (dplane_ctx_intf_is_broadcast(ctx)) { + apply_mask_ipv4(&ifaddr); + addr.sin_addr = ifaddr.prefix; + + broad.sin_addr.s_addr = + (addr.sin_addr.s_addr | ~mask.sin_addr.s_addr); + broad.sin_family = p->family; + + memcpy(&ifreq.ifr_broadaddr, &broad, + sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFBRDADDR, (caddr_t)&ifreq); + if (ret < 0) + return ret; + } + + mask.sin_family = p->family; + memcpy(&ifreq.ifr_addr, &mask, sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFNETMASK, (caddr_t)&ifreq); + if (ret < 0) + return ret; + + return 0; +} + +/* Set up interface's address, netmask (and broadcas? ). Linux or + Solaris uses ifname:number semantics to set IP address aliases. */ +int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifreq ifreq; + struct sockaddr_in addr; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = p->family; + memcpy(&ifreq.ifr_addr, &addr, sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFADDR, (caddr_t)&ifreq); + if (ret < 0) + return ret; + + return 0; +} +#endif /* HAVE_STRUCT_IFALIASREQ */ +#endif /* HAVE_NETLINK */ + +/* get interface flags */ +void if_get_flags(struct interface *ifp) +{ + int ret; + struct ifreq ifreqflags = {}; + struct ifreq ifreqdata = {}; + + ifreq_set_name(&ifreqflags, ifp); + ifreq_set_name(&ifreqdata, ifp); + + ret = vrf_if_ioctl(SIOCGIFFLAGS, (caddr_t)&ifreqflags, + ifp->vrf->vrf_id); + if (ret < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "vrf_if_ioctl(SIOCGIFFLAGS %s) failed: %s", + ifp->name, safe_strerror(errno)); + return; + } + + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION)) + goto out; + + /* Per-default, IFF_RUNNING is held high, unless link-detect + * says otherwise - we abuse IFF_RUNNING inside zebra as a + * link-state flag, following practice on Linux and Solaris + * kernels + */ + +#ifdef SIOCGIFDATA + /* + * BSD gets link state from ifi_link_link in struct if_data. + * All BSD's have this in getifaddrs(3) ifa_data for AF_LINK + * addresses. We can also access it via SIOCGIFDATA. + */ + +#ifdef __NetBSD__ + struct ifdatareq ifdr = {.ifdr_data.ifi_link_state = 0}; + struct if_data *ifdata = &ifdr.ifdr_data; + + strlcpy(ifdr.ifdr_name, ifp->name, sizeof(ifdr.ifdr_name)); + ret = vrf_if_ioctl(SIOCGIFDATA, (caddr_t)&ifdr, ifp->vrf->vrf_id); +#else + struct if_data ifd = {.ifi_link_state = 0}; + struct if_data *ifdata = &ifd; + + ifreqdata.ifr_data = (caddr_t)ifdata; + ret = vrf_if_ioctl(SIOCGIFDATA, (caddr_t)&ifreqdata, ifp->vrf->vrf_id); +#endif + + if (ret == -1) + /* Very unlikely. Did the interface disappear? */ + flog_err_sys(EC_LIB_SYSTEM_CALL, + "if_ioctl(SIOCGIFDATA %s) failed: %s", ifp->name, + safe_strerror(errno)); + else { + if (ifdata->ifi_link_state >= LINK_STATE_UP) + SET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + else if (ifdata->ifi_link_state == LINK_STATE_UNKNOWN) + /* BSD traditionally treats UNKNOWN as UP */ + SET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + else + UNSET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + } + +#elif defined(HAVE_BSD_LINK_DETECT) + /* + * This is only needed for FreeBSD older than FreeBSD-13. + * Valid and active media generally means the link state is + * up, but this is not always the case. + * For example, some BSD's with a net80211 interface in MONITOR + * mode will treat the media as valid and active but the + * link state is down - because we cannot send anything. + * Also, virtual interfaces such as PPP, VLAN, etc generally + * don't support media at all, so the ioctl will just fail. + */ + struct ifmediareq ifmr = {.ifm_status = 0}; + + strlcpy(ifmr.ifm_name, ifp->name, sizeof(ifmr.ifm_name)); + + if (if_ioctl(SIOCGIFMEDIA, (caddr_t)&ifmr) == -1) { + if (errno != EINVAL) + flog_err_sys(EC_LIB_SYSTEM_CALL, + "if_ioctl(SIOCGIFMEDIA %s) failed: %s", + ifp->name, safe_strerror(errno)); + } else if (ifmr.ifm_status & IFM_AVALID) { /* media state is valid */ + if (ifmr.ifm_status & IFM_ACTIVE) /* media is active */ + SET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + else + UNSET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + } +#endif /* HAVE_BSD_LINK_DETECT */ + +out: + if_flags_update(ifp, (ifreqflags.ifr_flags & 0x0000ffff)); +} + +/* Set interface flags */ +int if_set_flags(struct interface *ifp, uint64_t flags) +{ + int ret; + struct ifreq ifreq; + + memset(&ifreq, 0, sizeof(ifreq)); + ifreq_set_name(&ifreq, ifp); + + ifreq.ifr_flags = ifp->flags; + ifreq.ifr_flags |= flags; + + ret = vrf_if_ioctl(SIOCSIFFLAGS, (caddr_t)&ifreq, ifp->vrf->vrf_id); + + if (ret < 0) { + zlog_info("can't set interface %s(%u) flags %" PRIu64, + ifp->name, ifp->vrf->vrf_id, flags); + return ret; + } + return 0; +} + +/* Unset interface's flag. */ +int if_unset_flags(struct interface *ifp, uint64_t flags) +{ + int ret; + struct ifreq ifreq; + + memset(&ifreq, 0, sizeof(ifreq)); + ifreq_set_name(&ifreq, ifp); + + ifreq.ifr_flags = ifp->flags; + ifreq.ifr_flags &= ~flags; + + ret = vrf_if_ioctl(SIOCSIFFLAGS, (caddr_t)&ifreq, ifp->vrf->vrf_id); + + if (ret < 0) { + zlog_warn("can't unset interface %s(%u) flags %" PRIu64, + ifp->name, ifp->vrf->vrf_id, flags); + return ret; + } + return 0; +} + +#ifndef LINUX_IPV6 /* Netlink has its own code */ + +#ifdef HAVE_STRUCT_IN6_ALIASREQ +#ifndef ND6_INFINITE_LIFETIME +#define ND6_INFINITE_LIFETIME 0xffffffffL +#endif /* ND6_INFINITE_LIFETIME */ + +/* + * Helper for interface-addr install, non-netlink + */ +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct in6_aliasreq addreq; + struct sockaddr_in6 addr; + struct sockaddr_in6 mask; + struct prefix_ipv6 *p; + + p = (struct prefix_ipv6 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, + dplane_ctx_get_ifname(ctx), sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin6_addr = p->prefix; + addr.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in6)); + + memset(&mask, 0, sizeof(mask)); + masklen2ip6(p->prefixlen, &mask.sin6_addr); + mask.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_prefixmask, &mask, sizeof(struct sockaddr_in6)); + + addreq.ifra_lifetime.ia6t_vltime = 0xffffffff; + addreq.ifra_lifetime.ia6t_pltime = 0xffffffff; + +#ifdef HAVE_STRUCT_IF6_ALIASREQ_IFRA_LIFETIME + addreq.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + addreq.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; +#endif + + ret = if_ioctl_ipv6(SIOCAIFADDR_IN6, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; +} + +/* + * Helper for interface-addr un-install, non-netlink + */ +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct in6_aliasreq addreq; + struct sockaddr_in6 addr; + struct sockaddr_in6 mask; + struct prefix_ipv6 *p; + + p = (struct prefix_ipv6 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, + dplane_ctx_get_ifname(ctx), sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin6_addr = p->prefix; + addr.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in6)); + + memset(&mask, 0, sizeof(mask)); + masklen2ip6(p->prefixlen, &mask.sin6_addr); + mask.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_prefixmask, &mask, sizeof(struct sockaddr_in6)); + +#ifdef HAVE_STRUCT_IF6_ALIASREQ_IFRA_LIFETIME + addreq.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + addreq.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; +#endif + + ret = if_ioctl_ipv6(SIOCDIFADDR_IN6, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; +} +#else +/* The old, pre-dataplane code here just returned, so we're retaining that + * choice. + */ +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + return 0; +} + +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + return 0; +} +#endif /* HAVE_STRUCT_IN6_ALIASREQ */ + +#endif /* LINUX_IPV6 */ diff --git a/zebra/ioctl.h b/zebra/ioctl.h new file mode 100644 index 0000000..8a97cbc --- /dev/null +++ b/zebra/ioctl.h @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Common ioctl functions. + * Copyright (C) 1998 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_IOCTL_H +#define _ZEBRA_IOCTL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prototypes. */ +extern void ifreq_set_name(struct ifreq *, struct interface *); +extern int if_ioctl(unsigned long, caddr_t); +extern int vrf_if_ioctl(unsigned long request, caddr_t buffer, vrf_id_t vrf_id); + +extern int if_set_flags(struct interface *, uint64_t); +extern int if_unset_flags(struct interface *, uint64_t); +extern void if_get_flags(struct interface *); + +extern void if_get_metric(struct interface *); +extern void if_get_mtu(struct interface *); + +#define AF_IOCTL(af, request, buffer) if_ioctl(request, buffer) + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_IOCTL_H */ diff --git a/zebra/ipforward.h b/zebra/ipforward.h new file mode 100644 index 0000000..e56e042 --- /dev/null +++ b/zebra/ipforward.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* IP forward settings. + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_IPFORWARD_H +#define _ZEBRA_IPFORWARD_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern int ipforward(void); +extern int ipforward_on(void); +extern int ipforward_off(void); + +extern int ipforward_ipv6(void); +extern int ipforward_ipv6_on(void); +extern int ipforward_ipv6_off(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_IPFORWARD_H */ diff --git a/zebra/ipforward_proc.c b/zebra/ipforward_proc.c new file mode 100644 index 0000000..08fbfed --- /dev/null +++ b/zebra/ipforward_proc.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Fetch ipforward value by reading /proc filesystem. + * Copyright (C) 1997 Kunihiro Ishiguro + */ + +#include + +#ifdef GNU_LINUX + +#include "log.h" +#include "privs.h" + +#include "zebra/ipforward.h" + +extern struct zebra_privs_t zserv_privs; + +static const char proc_net_snmp[] = "/proc/net/snmp"; + +static void dropline(FILE *fp) +{ + while (getc(fp) != '\n') + ; +} + +int ipforward(void) +{ + int ret = 0; + FILE *fp; + int ipforwarding = 0; + char buf[10]; + + fp = fopen(proc_net_snmp, "r"); + + if (fp == NULL) + return -1; + + /* We don't care about the first line. */ + dropline(fp); + + /* Get ip_statistics.IpForwarding : + 1 => ip forwarding enabled + 2 => ip forwarding off. */ + if (fgets(buf, 6, fp)) + ret = sscanf(buf, "Ip: %d", &ipforwarding); + + fclose(fp); + + if (ret == 1 && ipforwarding == 1) + return 1; + + return 0; +} + +/* char proc_ipv4_forwarding[] = "/proc/sys/net/ipv4/conf/all/forwarding"; */ +static const char proc_ipv4_forwarding[] = "/proc/sys/net/ipv4/ip_forward"; + +int ipforward_on(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv4_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "1\n"); + + fclose(fp); + + } + + return ipforward(); +} + +int ipforward_off(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv4_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "0\n"); + + fclose(fp); + + } + + return ipforward(); +} + +static const char proc_ipv6_forwarding[] = + "/proc/sys/net/ipv6/conf/all/forwarding"; + +int ipforward_ipv6(void) +{ + int ret = 0; + FILE *fp; + char buf[5]; + int ipforwarding = 0; + + fp = fopen(proc_ipv6_forwarding, "r"); + + if (fp == NULL) + return -1; + + if (fgets(buf, 2, fp)) + ret = sscanf(buf, "%d", &ipforwarding); + + fclose(fp); + + if (ret != 1) + return 0; + + return ipforwarding; +} + +int ipforward_ipv6_on(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv6_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "1\n"); + + fclose(fp); + + } + + return ipforward_ipv6(); +} + + +int ipforward_ipv6_off(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv6_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "0\n"); + + fclose(fp); + + } + + return ipforward_ipv6(); +} + +#endif /* GNU_LINUX */ diff --git a/zebra/ipforward_sysctl.c b/zebra/ipforward_sysctl.c new file mode 100644 index 0000000..bc9d12b --- /dev/null +++ b/zebra/ipforward_sysctl.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* IP forward control by sysctl function. + * Copyright (C) 1997, 1999 Kunihiro Ishiguro + */ + +#include + +#if !defined(GNU_LINUX) + +#include "privs.h" +#include "zebra/ipforward.h" +#include "zebra/zebra_errors.h" + +#include "log.h" +#include "lib_errors.h" + +#define MIB_SIZ 4 + +extern struct zebra_privs_t zserv_privs; + +/* IPv4 forwarding control MIB. */ +int mib[MIB_SIZ] = {CTL_NET, PF_INET, IPPROTO_IP, IPCTL_FORWARDING}; + +int ipforward(void) +{ + size_t len; + int ipforwarding = 0; + + len = sizeof(ipforwarding); + if (sysctl(mib, MIB_SIZ, &ipforwarding, &len, 0, 0) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "Can't get ipforwarding value"); + return -1; + } + return ipforwarding; +} + +int ipforward_on(void) +{ + size_t len; + int ipforwarding = 1; + + len = sizeof(ipforwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib, MIB_SIZ, NULL, NULL, &ipforwarding, len) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "Can't set ipforwarding on"); + return -1; + } + } + return ipforwarding; +} + +int ipforward_off(void) +{ + size_t len; + int ipforwarding = 0; + + len = sizeof(ipforwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib, MIB_SIZ, NULL, NULL, &ipforwarding, len) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "Can't set ipforwarding on"); + return -1; + } + } + return ipforwarding; +} + +/* IPv6 forwarding control MIB. */ +int mib_ipv6[MIB_SIZ] = {CTL_NET, PF_INET6, +#if defined(BSD_V6_SYSCTL) + IPPROTO_IPV6, IPV6CTL_FORWARDING +#else /* NOT BSD_V6_SYSCTL */ + IPPROTO_IP, IP6CTL_FORWARDING +#endif /* BSD_V6_SYSCTL */ +}; + +int ipforward_ipv6(void) +{ + size_t len; + int ip6forwarding = 0; + + len = sizeof(ip6forwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib_ipv6, MIB_SIZ, &ip6forwarding, &len, 0, 0) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "can't get ip6forwarding value"); + return -1; + } + } + return ip6forwarding; +} + +int ipforward_ipv6_on(void) +{ + size_t len; + int ip6forwarding = 1; + + len = sizeof(ip6forwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib_ipv6, MIB_SIZ, NULL, NULL, &ip6forwarding, len) + < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "can't get ip6forwarding value"); + return -1; + } + } + return ip6forwarding; +} + +int ipforward_ipv6_off(void) +{ + size_t len; + int ip6forwarding = 0; + + len = sizeof(ip6forwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib_ipv6, MIB_SIZ, NULL, NULL, &ip6forwarding, len) + < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "can't get ip6forwarding value"); + return -1; + } + } + return ip6forwarding; +} + +#endif /* !defined(GNU_LINUX) */ diff --git a/zebra/irdp.h b/zebra/irdp.h new file mode 100644 index 0000000..4330734 --- /dev/null +++ b/zebra/irdp.h @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* ICMP Router Discovery Messages + * Copyright (C) 1997, 2000 Kunihiro Ishiguro + */ + +/* + * This file is modified and completed for the Zebra IRDP implementation + * by Robert Olsson, Swedish University of Agricultural Sciences + */ + +#ifndef _IRDP_H +#define _IRDP_H + +#include "lib/vty.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* ICMP Messages */ +#ifndef ICMP_ROUTERADVERT +#define ICMP_ROUTERADVERT 9 +#endif /* ICMP_ROUTERADVERT */ + +#ifndef ICMP_ROUTERSOLICIT +#define ICMP_ROUTERSOLICIT 10 +#endif /* ICMP_ROUTERSOLICT */ + +/* Multicast groups */ +#ifndef INADDR_ALLHOSTS_GROUP +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#endif /* INADDR_ALLHOSTS_GROUP */ + +#ifndef INADDR_ALLRTRS_GROUP +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#endif /* INADDR_ALLRTRS_GROUP */ + +/* Default irdp packet interval */ +#define IRDP_DEFAULT_INTERVAL 300 + +/* Router constants from RFC1256 */ +#define MAX_INITIAL_ADVERT_INTERVAL 16 +#define MAX_INITIAL_ADVERTISEMENTS 3 +#define MAX_RESPONSE_DELAY 2 + +#define IRDP_MAXADVERTINTERVAL 600 +#define IRDP_MINADVERTINTERVAL 450 /* 0.75*600 */ +#define IRDP_LIFETIME 1350 /* 3*450 */ +#define IRDP_PREFERENCE 0 + +#define ICMP_MINLEN 8 + +#define IRDP_LAST_ADVERT_MESSAGES 2 /* The last adverts with Holdtime 0 */ + +#define IRDP_RX_BUF 1500 + +/* + Comments comes from RFC1256 ICMP Router Discovery Messages. + + The IP destination address to be used for multicast Router + Advertisements sent from the interface. The only permissible + values are the all-systems multicast address, 224.0.0.1, or the + limited-broadcast address, 255.255.255.255. (The all-systems + address is preferred wherever possible, i.e., on any link where + all listening hosts support IP multicast.) + + Default: 224.0.0.1 if the router supports IP multicast on the + interface, else 255.255.255.255 + + The maximum time allowed between sending multicast Router + Advertisements from the interface, in seconds. Must be no less + than 4 seconds and no greater than 1800 seconds. + + Default: 600 seconds + + The minimum time allowed between sending unsolicited multicast + Router Advertisements from the interface, in seconds. Must be no + less than 3 seconds and no greater than MaxAdvertisementInterval. + + Default: 0.75 * MaxAdvertisementInterval + + The value to be placed in the Lifetime field of Router + Advertisements sent from the interface, in seconds. Must be no + less than MaxAdvertisementInterval and no greater than 9000 + seconds. + + Default: 3 * MaxAdvertisementInterval + + The preferability of the address as a default router address, + relative to other router addresses on the same subnet. A 32-bit, + signed, twos-complement integer, with higher values meaning more + preferable. The minimum value (hex 80000000) is used to indicate + that the address, even though it may be advertised, is not to be + used by neighboring hosts as a default router address. + + Default: 0 +*/ + +struct irdp_interface { + bool started; + + unsigned long MaxAdvertInterval; + unsigned long MinAdvertInterval; + unsigned long Preference; + + uint32_t flags; + +#define IF_ACTIVE (1<<0) /* ICMP Active */ +#define IF_BROADCAST (1<<1) /* 255.255.255.255 */ +#define IF_SOLICIT (1<<2) /* Solicit active */ +#define IF_DEBUG_MESSAGES (1<<3) +#define IF_DEBUG_PACKET (1<<4) +#define IF_DEBUG_MISC (1<<5) +#define IF_SHUTDOWN (1<<6) + + struct interface *ifp; + struct event *t_advertise; + unsigned long irdp_sent; + uint16_t Lifetime; + + struct list *AdvPrefList; +}; + +struct Adv { + struct in_addr ip; + int pref; +}; + +extern void irdp_if_init(void); +extern int irdp_sock_init(void); +extern int irdp_config_write(struct vty *, struct interface *); +extern void irdp_send_thread(struct event *t_advert); +extern void irdp_advert_off(struct interface *ifp); +extern void process_solicit(struct interface *ifp); +extern void irdp_read_raw(struct event *r); +extern void send_packet(struct interface *ifp, struct stream *s, uint32_t dst, + struct prefix *p, uint32_t ttl); + +#ifdef __cplusplus +} +#endif + +#endif /* _IRDP_H */ diff --git a/zebra/irdp_interface.c b/zebra/irdp_interface.c new file mode 100644 index 0000000..253e6a8 --- /dev/null +++ b/zebra/irdp_interface.c @@ -0,0 +1,716 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) 1997, 2000 + * Portions: + * Swedish University of Agricultural Sciences + * Robert Olsson + * Kunihiro Ishiguro + * + * Thanks to Jens Laas at Swedish University of Agricultural Sciences + * for reviewing and tests. + */ + +#include + +#include "if.h" +#include "vty.h" +#include "sockunion.h" +#include "prefix.h" +#include "command.h" +#include "memory.h" +#include "stream.h" +#include "ioctl.h" +#include "connected.h" +#include "log.h" +#include "zclient.h" +#include "frrevent.h" +#include "lib_errors.h" +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/irdp.h" +#include "zebra/zebra_errors.h" +#include +#include "if.h" +#include "sockunion.h" +#include "log.h" +#include "network.h" + +extern int irdp_sock; + +DEFINE_MTYPE_STATIC(ZEBRA, IRDP_IF, "IRDP interface data"); + +#define IRDP_CONFIGED \ + do { \ + if (!irdp) { \ + vty_out(vty, \ + "Please Configure IRDP before using this command\n"); \ + return CMD_WARNING_CONFIG_FAILED; \ + } \ + } while (0) + +static struct irdp_interface *irdp_if_get(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + + if (!zi) + return NULL; + + if (!zi->irdp) + zi->irdp = XCALLOC(MTYPE_IRDP_IF, sizeof(*zi->irdp)); + + if (!zi->irdp->started) + return NULL; + + return zi->irdp; +} + +static int irdp_if_delete(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + if (!zi) + return 0; + XFREE(MTYPE_IRDP_IF, zi->irdp); + return 0; +} + +static const char *inet_2a(uint32_t a, char *b, size_t b_len) +{ + snprintf(b, b_len, "%u.%u.%u.%u", (a)&0xFF, (a >> 8) & 0xFF, + (a >> 16) & 0xFF, (a >> 24) & 0xFF); + return b; +} + + +static struct prefix *irdp_get_prefix(struct interface *ifp) +{ + struct listnode *node; + struct connected *ifc; + + if (ifp->connected) + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) + return ifc->address; + + return NULL; +} + +/* Join to the add/leave multicast group. */ +static int if_group(struct interface *ifp, int sock, uint32_t group, + int add_leave) +{ + struct ip_mreq m; + struct prefix *p; + int ret; + char b1[INET_ADDRSTRLEN]; + + memset(&m, 0, sizeof(m)); + m.imr_multiaddr.s_addr = htonl(group); + p = irdp_get_prefix(ifp); + + if (!p) { + flog_warn(EC_ZEBRA_NO_IFACE_ADDR, + "IRDP: can't get address for %s", ifp->name); + return 1; + } + + m.imr_interface = p->u.prefix4; + + ret = setsockopt(sock, IPPROTO_IP, add_leave, (char *)&m, + sizeof(struct ip_mreq)); + if (ret < 0) + flog_err_sys(EC_LIB_SOCKET, "IRDP: %s can't setsockopt %s: %s", + add_leave == IP_ADD_MEMBERSHIP ? "join group" + : "leave group", + inet_2a(group, b1, sizeof(b1)), + safe_strerror(errno)); + + return ret; +} + +static int if_add_group(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + int ret; + char b1[INET_ADDRSTRLEN]; + + if (!irdp) + return -1; + + ret = if_group(ifp, irdp_sock, INADDR_ALLRTRS_GROUP, IP_ADD_MEMBERSHIP); + if (ret < 0) { + return ret; + } + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: Adding group %s for %s", + inet_2a(htonl(INADDR_ALLRTRS_GROUP), b1, sizeof(b1)), + ifp->name); + return 0; +} + +static int if_drop_group(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + int ret; + char b1[INET_ADDRSTRLEN]; + + if (!irdp) + return -1; + + ret = if_group(ifp, irdp_sock, INADDR_ALLRTRS_GROUP, + IP_DROP_MEMBERSHIP); + if (ret < 0) + return ret; + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: Leaving group %s for %s", + inet_2a(htonl(INADDR_ALLRTRS_GROUP), b1, sizeof(b1)), + ifp->name); + return 0; +} + +static void if_set_defaults(struct irdp_interface *irdp) +{ + irdp->MaxAdvertInterval = IRDP_MAXADVERTINTERVAL; + irdp->MinAdvertInterval = IRDP_MINADVERTINTERVAL; + irdp->Preference = IRDP_PREFERENCE; + irdp->Lifetime = IRDP_LIFETIME; +} + + +static struct Adv *Adv_new(void) +{ + return XCALLOC(MTYPE_IRDP_IF, sizeof(struct Adv)); +} + +static void Adv_free(struct Adv *adv) +{ + XFREE(MTYPE_IRDP_IF, adv); +} + +static void irdp_if_start(struct interface *ifp, int multicast, + int set_defaults) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct listnode *node; + struct connected *ifc; + uint32_t timer, seed; + + assert(irdp); + + irdp->started = true; + if (irdp->flags & IF_ACTIVE) { + zlog_debug("IRDP: Interface is already active %s", ifp->name); + return; + } + if ((irdp_sock < 0) && ((irdp_sock = irdp_sock_init()) < 0)) { + flog_warn(EC_ZEBRA_IRDP_CANNOT_ACTIVATE_IFACE, + "IRDP: Cannot activate interface %s (cannot create IRDP socket)", + ifp->name); + return; + } + irdp->flags |= IF_ACTIVE; + + if (!multicast) + irdp->flags |= IF_BROADCAST; + + if_add_update(ifp); + + if (!(ifp->flags & IFF_UP)) { + flog_warn(EC_ZEBRA_IRDP_IFACE_DOWN, + "IRDP: Interface is down %s", ifp->name); + } + + /* Shall we cancel if_start if if_add_group fails? */ + + if (multicast) { + if_add_group(ifp); + + if (!(ifp->flags & (IFF_MULTICAST | IFF_ALLMULTI))) { + flog_warn(EC_ZEBRA_IRDP_IFACE_MCAST_DISABLED, + "IRDP: Interface not multicast enabled %s", + ifp->name); + } + } + + if (set_defaults) + if_set_defaults(irdp); + + irdp->irdp_sent = 0; + + /* The spec suggests this for randomness */ + + seed = 0; + if (ifp->connected) + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) { + seed = ifc->address->u.prefix4.s_addr; + break; + } + + srandom(seed); + timer = (frr_weak_random() % IRDP_DEFAULT_INTERVAL) + 1; + + irdp->AdvPrefList = list_new(); + irdp->AdvPrefList->del = (void (*)(void *))Adv_free; /* Destructor */ + + + /* And this for startup. Speed limit from 1991 :-). But it's OK*/ + + if (irdp->irdp_sent < MAX_INITIAL_ADVERTISEMENTS + && timer > MAX_INITIAL_ADVERT_INTERVAL) + timer = MAX_INITIAL_ADVERT_INTERVAL; + + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: Init timer for %s set to %u", ifp->name, + timer); + + irdp->t_advertise = NULL; + event_add_timer(zrouter.master, irdp_send_thread, ifp, timer, + &irdp->t_advertise); +} + +static void irdp_if_stop(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + + if (irdp == NULL) { + zlog_debug("Interface %s structure is NULL", ifp->name); + return; + } + + if (!(irdp->flags & IF_ACTIVE)) { + zlog_debug("Interface is not active %s", ifp->name); + return; + } + + if (!(irdp->flags & IF_BROADCAST)) + if_drop_group(ifp); + + irdp_advert_off(ifp); + + list_delete(&irdp->AdvPrefList); + + irdp->flags = 0; +} + + +static void irdp_if_shutdown(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + + if (!irdp) + return; + + if (irdp->flags & IF_SHUTDOWN) { + zlog_debug("IRDP: Interface is already shutdown %s", ifp->name); + return; + } + + irdp->flags |= IF_SHUTDOWN; + irdp->flags &= ~IF_ACTIVE; + + if (!(irdp->flags & IF_BROADCAST)) + if_drop_group(ifp); + + /* Tell the hosts we are out of service */ + irdp_advert_off(ifp); +} + +static void irdp_if_no_shutdown(struct interface *ifp) +{ + struct irdp_interface *irdp = irdp_if_get(ifp); + + if (!irdp) + return; + + if (!(irdp->flags & IF_SHUTDOWN)) { + zlog_debug("IRDP: Interface is not shutdown %s", ifp->name); + return; + } + + irdp->flags &= ~IF_SHUTDOWN; + + irdp_if_start(ifp, irdp->flags & IF_BROADCAST ? false : true, false); +} + + +/* Write configuration to user */ + +int irdp_config_write(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct Adv *adv; + struct listnode *node; + char b1[INET_ADDRSTRLEN]; + + if (!irdp) + return 0; + + if (irdp->flags & IF_ACTIVE || irdp->flags & IF_SHUTDOWN) { + + if (irdp->flags & IF_SHUTDOWN) + vty_out(vty, " ip irdp shutdown \n"); + + if (irdp->flags & IF_BROADCAST) + vty_out(vty, " ip irdp broadcast\n"); + else + vty_out(vty, " ip irdp multicast\n"); + + vty_out(vty, " ip irdp preference %ld\n", irdp->Preference); + + for (ALL_LIST_ELEMENTS_RO(irdp->AdvPrefList, node, adv)) + vty_out(vty, " ip irdp address %s preference %d\n", + inet_2a(adv->ip.s_addr, b1, sizeof(b1)), + adv->pref); + + vty_out(vty, " ip irdp holdtime %d\n", irdp->Lifetime); + + vty_out(vty, " ip irdp minadvertinterval %ld\n", + irdp->MinAdvertInterval); + + vty_out(vty, " ip irdp maxadvertinterval %ld\n", + irdp->MaxAdvertInterval); + } + return 0; +} + + +DEFUN (ip_irdp_multicast, + ip_irdp_multicast_cmd, + "ip irdp multicast", + IP_STR + "ICMP Router discovery on this interface\n" + "Use multicast mode\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + irdp_if_get(ifp); + + irdp_if_start(ifp, true, true); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_broadcast, + ip_irdp_broadcast_cmd, + "ip irdp broadcast", + IP_STR + "ICMP Router discovery on this interface\n" + "Use broadcast mode\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + irdp_if_get(ifp); + + irdp_if_start(ifp, false, true); + return CMD_SUCCESS; +} + +DEFUN (no_ip_irdp, + no_ip_irdp_cmd, + "no ip irdp", + NO_STR + IP_STR + "Disable ICMP Router discovery on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + irdp_if_stop(ifp); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_shutdown, + ip_irdp_shutdown_cmd, + "ip irdp shutdown", + IP_STR + "ICMP Router discovery on this interface\n" + "ICMP Router discovery shutdown on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + irdp_if_shutdown(ifp); + return CMD_SUCCESS; +} + +DEFUN (no_ip_irdp_shutdown, + no_ip_irdp_shutdown_cmd, + "no ip irdp shutdown", + NO_STR + IP_STR + "ICMP Router discovery on this interface\n" + "ICMP Router discovery no shutdown on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + irdp_if_no_shutdown(ifp); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_holdtime, + ip_irdp_holdtime_cmd, + "ip irdp holdtime (0-9000)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set holdtime value\n" + "Holdtime value in seconds. Default is 1800 seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->Lifetime = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_minadvertinterval, + ip_irdp_minadvertinterval_cmd, + "ip irdp minadvertinterval (3-1800)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set minimum time between advertisement\n" + "Minimum advertisement interval in seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + if ((unsigned)atoi(argv[idx_number]->arg) <= irdp->MaxAdvertInterval) { + irdp->MinAdvertInterval = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; + } else { + vty_out(vty, + "%% MinAdvertInterval must be less than or equal to MaxAdvertInterval\n"); + return CMD_WARNING_CONFIG_FAILED; + } +} + +DEFUN (ip_irdp_maxadvertinterval, + ip_irdp_maxadvertinterval_cmd, + "ip irdp maxadvertinterval (4-1800)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set maximum time between advertisement\n" + "Maximum advertisement interval in seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + if (irdp->MinAdvertInterval <= (unsigned)atoi(argv[idx_number]->arg)) { + irdp->MaxAdvertInterval = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; + } else { + vty_out(vty, + "%% MaxAdvertInterval must be greater than or equal to MinAdvertInterval\n"); + return CMD_WARNING_CONFIG_FAILED; + } +} + +/* DEFUN needs to be fixed for negative ranages... + * "ip irdp preference <-2147483648-2147483647>", + * Be positive for now. :-) + */ + +DEFUN (ip_irdp_preference, + ip_irdp_preference_cmd, + "ip irdp preference (0-2147483647)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set default preference level for this interface\n" + "Preference level\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->Preference = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_address_preference, + ip_irdp_address_preference_cmd, + "ip irdp address A.B.C.D preference (0-2147483647)", + IP_STR + "Alter ICMP Router discovery preference on this interface\n" + "Set IRDP address for advertise\n" + "IPv4 address\n" + "Specify IRDP non-default preference to advertise\n" + "Preference level\n") +{ + int idx_ipv4 = 3; + int idx_number = 5; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + struct listnode *node; + struct in_addr ip; + int pref; + int ret; + struct Adv *adv; + + IRDP_CONFIGED; + + ret = inet_aton(argv[idx_ipv4]->arg, &ip); + if (!ret) + return CMD_WARNING_CONFIG_FAILED; + + pref = atoi(argv[idx_number]->arg); + + for (ALL_LIST_ELEMENTS_RO(irdp->AdvPrefList, node, adv)) + if (adv->ip.s_addr == ip.s_addr) + return CMD_SUCCESS; + + adv = Adv_new(); + adv->ip = ip; + adv->pref = pref; + listnode_add(irdp->AdvPrefList, adv); + + return CMD_SUCCESS; +} + +DEFUN (no_ip_irdp_address_preference, + no_ip_irdp_address_preference_cmd, + "no ip irdp address A.B.C.D preference (0-2147483647)", + NO_STR + IP_STR + "Alter ICMP Router discovery preference on this interface\n" + "Select IRDP address\n" + "IPv4 address\n" + "Reset ICMP Router discovery preference on this interface\n" + "Old preference level\n") +{ + int idx_ipv4 = 4; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + struct listnode *node, *nnode; + struct in_addr ip; + int ret; + struct Adv *adv; + + IRDP_CONFIGED; + + ret = inet_aton(argv[idx_ipv4]->arg, &ip); + if (!ret) + return CMD_WARNING_CONFIG_FAILED; + + for (ALL_LIST_ELEMENTS(irdp->AdvPrefList, node, nnode, adv)) { + if (adv->ip.s_addr == ip.s_addr) { + listnode_delete(irdp->AdvPrefList, adv); + break; + } + } + + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_debug_messages, + ip_irdp_debug_messages_cmd, + "ip irdp debug messages", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Enable debugging for IRDP messages\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags |= IF_DEBUG_MESSAGES; + + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_debug_misc, + ip_irdp_debug_misc_cmd, + "ip irdp debug misc", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Enable debugging for miscellaneous IRDP events\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags |= IF_DEBUG_MISC; + + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_debug_packet, + ip_irdp_debug_packet_cmd, + "ip irdp debug packet", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Enable debugging for IRDP packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags |= IF_DEBUG_PACKET; + + return CMD_SUCCESS; +} + + +DEFUN (ip_irdp_debug_disable, + ip_irdp_debug_disable_cmd, + "ip irdp debug disable", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Disable debugging for all IRDP events\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags &= ~IF_DEBUG_PACKET; + irdp->flags &= ~IF_DEBUG_MESSAGES; + irdp->flags &= ~IF_DEBUG_MISC; + + return CMD_SUCCESS; +} + +void irdp_if_init(void) +{ + hook_register(zebra_if_config_wr, irdp_config_write); + hook_register(if_del, irdp_if_delete); + + install_element(INTERFACE_NODE, &ip_irdp_broadcast_cmd); + install_element(INTERFACE_NODE, &ip_irdp_multicast_cmd); + install_element(INTERFACE_NODE, &no_ip_irdp_cmd); + install_element(INTERFACE_NODE, &ip_irdp_shutdown_cmd); + install_element(INTERFACE_NODE, &no_ip_irdp_shutdown_cmd); + install_element(INTERFACE_NODE, &ip_irdp_holdtime_cmd); + install_element(INTERFACE_NODE, &ip_irdp_maxadvertinterval_cmd); + install_element(INTERFACE_NODE, &ip_irdp_minadvertinterval_cmd); + install_element(INTERFACE_NODE, &ip_irdp_preference_cmd); + install_element(INTERFACE_NODE, &ip_irdp_address_preference_cmd); + install_element(INTERFACE_NODE, &no_ip_irdp_address_preference_cmd); + + install_element(INTERFACE_NODE, &ip_irdp_debug_messages_cmd); + install_element(INTERFACE_NODE, &ip_irdp_debug_misc_cmd); + install_element(INTERFACE_NODE, &ip_irdp_debug_packet_cmd); + install_element(INTERFACE_NODE, &ip_irdp_debug_disable_cmd); +} diff --git a/zebra/irdp_main.c b/zebra/irdp_main.c new file mode 100644 index 0000000..6548790 --- /dev/null +++ b/zebra/irdp_main.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) 2000 Robert Olsson. + * Swedish University of Agricultural Sciences + */ + +/* + * This work includes work with the following copywrite: + * + * Copyright (C) 1997, 2000 Kunihiro Ishiguro + * + */ + +/* + * Thanks to Jens Laas at Swedish University of Agricultural Sciences + * for reviewing and tests. + */ + + +#include + +#include "if.h" +#include "vty.h" +#include "sockunion.h" +#include "sockopt.h" +#include "prefix.h" +#include "command.h" +#include "memory.h" +#include "stream.h" +#include "ioctl.h" +#include "connected.h" +#include "log.h" +#include "zclient.h" +#include "frrevent.h" +#include "privs.h" +#include "libfrr.h" +#include "lib_errors.h" +#include "lib/version.h" +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/irdp.h" +#include "zebra/zebra_errors.h" +#include + +#include "checksum.h" +#include "if.h" +#include "sockunion.h" +#include "log.h" +#include "network.h" + +/* GLOBAL VARS */ + +extern struct zebra_privs_t zserv_privs; + +struct event *t_irdp_raw; + +/* Timer interval of irdp. */ +int irdp_timer_interval = IRDP_DEFAULT_INTERVAL; + +int irdp_sock_init(void) +{ + int ret, i; + int save_errno; + int sock; + + frr_with_privs(&zserv_privs) { + + sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + save_errno = errno; + + } + + if (sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "IRDP: can't create irdp socket %s", + safe_strerror(save_errno)); + return sock; + }; + + i = 1; + ret = setsockopt(sock, IPPROTO_IP, IP_TTL, (void *)&i, sizeof(i)); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, "IRDP: can't do irdp sockopt %s", + safe_strerror(errno)); + close(sock); + return ret; + }; + + ret = setsockopt_ifindex(AF_INET, sock, 1); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, "IRDP: can't do irdp sockopt %s", + safe_strerror(errno)); + close(sock); + return ret; + }; + + event_add_read(zrouter.master, irdp_read_raw, NULL, sock, &t_irdp_raw); + + return sock; +} + + +static int get_pref(struct irdp_interface *irdp, struct prefix *p) +{ + struct listnode *node; + struct Adv *adv; + + /* Use default preference or use the override pref */ + + if (irdp->AdvPrefList == NULL) + return irdp->Preference; + + for (ALL_LIST_ELEMENTS_RO(irdp->AdvPrefList, node, adv)) + if (p->u.prefix4.s_addr == adv->ip.s_addr) + return adv->pref; + + return irdp->Preference; +} + +/* Make ICMP Router Advertisement Message. */ +static int make_advertisement_packet(struct interface *ifp, struct prefix *p, + struct stream *s) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + int size; + int pref; + uint16_t checksum; + + pref = get_pref(irdp, p); + + stream_putc(s, ICMP_ROUTERADVERT); /* Type. */ + stream_putc(s, 0); /* Code. */ + stream_putw(s, 0); /* Checksum. */ + stream_putc(s, 1); /* Num address. */ + stream_putc(s, 2); /* Address Entry Size. */ + + if (irdp->flags & IF_SHUTDOWN) + stream_putw(s, 0); + else + stream_putw(s, irdp->Lifetime); + + stream_putl(s, htonl(p->u.prefix4.s_addr)); /* Router address. */ + stream_putl(s, pref); + + /* in_cksum return network byte order value */ + size = 16; + checksum = in_cksum(s->data, size); + stream_putw_at(s, 2, htons(checksum)); + + return size; +} + +static void irdp_send(struct interface *ifp, struct prefix *p, struct stream *s) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + uint32_t dst; + uint32_t ttl = 1; + + if (!irdp) + return; + if (!(ifp->flags & IFF_UP)) + return; + + if (irdp->flags & IF_BROADCAST) + dst = INADDR_BROADCAST; + else + dst = htonl(INADDR_ALLHOSTS_GROUP); + + if (irdp->flags & IF_DEBUG_MESSAGES) + zlog_debug( + "IRDP: TX Advert on %s %pFX Holdtime=%d Preference=%d", + ifp->name, p, + irdp->flags & IF_SHUTDOWN ? 0 : irdp->Lifetime, + get_pref(irdp, p)); + + send_packet(ifp, s, dst, p, ttl); +} + +static void irdp_advertisement(struct interface *ifp, struct prefix *p) +{ + struct stream *s; + s = stream_new(128); + make_advertisement_packet(ifp, p, s); + irdp_send(ifp, p, s); + stream_free(s); +} + +void irdp_send_thread(struct event *t_advert) +{ + uint32_t timer, tmp; + struct interface *ifp = EVENT_ARG(t_advert); + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct prefix *p; + struct listnode *node, *nnode; + struct connected *ifc; + + if (!irdp) + return; + + irdp->flags &= ~IF_SOLICIT; + + if (ifp->connected) + for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { + p = ifc->address; + + if (p->family != AF_INET) + continue; + + irdp_advertisement(ifp, p); + irdp->irdp_sent++; + } + + tmp = irdp->MaxAdvertInterval - irdp->MinAdvertInterval; + timer = frr_weak_random() % (tmp + 1); + timer = irdp->MinAdvertInterval + timer; + + if (irdp->irdp_sent < MAX_INITIAL_ADVERTISEMENTS + && timer > MAX_INITIAL_ADVERT_INTERVAL) + timer = MAX_INITIAL_ADVERT_INTERVAL; + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: New timer for %s set to %u", ifp->name, + timer); + + irdp->t_advertise = NULL; + event_add_timer(zrouter.master, irdp_send_thread, ifp, timer, + &irdp->t_advertise); +} + +void irdp_advert_off(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct listnode *node, *nnode; + int i; + struct connected *ifc; + struct prefix *p; + + if (!irdp) + return; + + EVENT_OFF(irdp->t_advertise); + + if (ifp->connected) + for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { + p = ifc->address; + + /* Output some packets with Lifetime 0 + we should add a wait... + */ + + for (i = 0; i < IRDP_LAST_ADVERT_MESSAGES; i++) { + irdp->irdp_sent++; + irdp_advertisement(ifp, p); + } + } +} + + +void process_solicit(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + uint32_t timer; + + if (!irdp) + return; + + /* When SOLICIT is active we reject further incoming solicits + this keeps down the answering rate so we don't have think + about DoS attacks here. */ + + if (irdp->flags & IF_SOLICIT) + return; + + irdp->flags |= IF_SOLICIT; + EVENT_OFF(irdp->t_advertise); + + timer = (frr_weak_random() % MAX_RESPONSE_DELAY) + 1; + + irdp->t_advertise = NULL; + event_add_timer(zrouter.master, irdp_send_thread, ifp, timer, + &irdp->t_advertise); +} + +static int irdp_finish(void) +{ + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *zi; + struct irdp_interface *irdp; + + zlog_info("IRDP: Received shutdown notification."); + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) + FOR_ALL_INTERFACES (vrf, ifp) { + zi = ifp->info; + + if (!zi) + continue; + irdp = zi->irdp; + if (!irdp) + continue; + + if (irdp->flags & IF_ACTIVE) { + irdp->flags |= IF_SHUTDOWN; + irdp_advert_off(ifp); + } + } + return 0; +} + +static int irdp_init(struct event_loop *master) +{ + irdp_if_init(); + + hook_register(frr_early_fini, irdp_finish); + return 0; +} + +static int irdp_module_init(void) +{ + hook_register(frr_late_init, irdp_init); + return 0; +} + +FRR_MODULE_SETUP(.name = "zebra_irdp", .version = FRR_VERSION, + .description = "zebra IRDP module", .init = irdp_module_init, +); diff --git a/zebra/irdp_packet.c b/zebra/irdp_packet.c new file mode 100644 index 0000000..2dfc027 --- /dev/null +++ b/zebra/irdp_packet.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) 2000 Robert Olsson. + * Swedish University of Agricultural Sciences + */ + +/* + * This work includes work with the following copywrite: + * + * Copyright (C) 1997, 2000 Kunihiro Ishiguro + * + */ + +/* + * Thanks to Jens Laas at Swedish University of Agricultural Sciences + * for reviewing and tests. + */ + + +#include +#include + +#include "checksum.h" +#include "command.h" +#include "connected.h" +#include "if.h" +#include "ioctl.h" +#include "log.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "sockopt.h" +#include "sockunion.h" +#include "sockunion.h" +#include "stream.h" +#include "frrevent.h" +#include "vty.h" +#include "zclient.h" +#include "lib_errors.h" + +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/irdp.h" +#include "zebra/zebra_errors.h" + + +/* GLOBAL VARS */ + +int irdp_sock = -1; + +extern struct event *t_irdp_raw; + +static void parse_irdp_packet(char *p, int len, struct interface *ifp) +{ + struct ip *ip = (struct ip *)p; + struct icmphdr *icmp; + struct in_addr src; + int ip_hlen, iplen, datalen; + struct zebra_if *zi; + struct irdp_interface *irdp; + uint16_t saved_chksum; + char buf[PREFIX_STRLEN]; + + zi = ifp->info; + if (!zi) + return; + + irdp = zi->irdp; + if (!irdp) + return; + + ip_hlen = ip->ip_hl << 2; + + sockopt_iphdrincl_swab_systoh(ip); + + iplen = ip->ip_len; + datalen = len - ip_hlen; + src = ip->ip_src; + + if (len != iplen) { + flog_err(EC_ZEBRA_IRDP_LEN_MISMATCH, + "IRDP: RX length doesn't match IP length"); + return; + } + + if (iplen < ICMP_MINLEN) { + flog_err(EC_ZEBRA_IRDP_LEN_MISMATCH, + "IRDP: RX ICMP packet too short from %pI4", + &src); + return; + } + + /* XXX: RAW doesn't receive link-layer, surely? ??? */ + /* Check so we don't checksum packets longer than oure RX_BUF - (ethlen + + + len of IP-header) 14+20 */ + if (iplen > IRDP_RX_BUF - 34) { + flog_err(EC_ZEBRA_IRDP_LEN_MISMATCH, + "IRDP: RX ICMP packet too long from %pI4", + &src); + return; + } + + icmp = (struct icmphdr *)(p + ip_hlen); + + saved_chksum = icmp->checksum; + icmp->checksum = 0; + /* check icmp checksum */ + if (in_cksum(icmp, datalen) != saved_chksum) { + flog_warn( + EC_ZEBRA_IRDP_BAD_CHECKSUM, + "IRDP: RX ICMP packet from %pI4 Bad checksum, silently ignored", + &src); + return; + } + + /* Handle just only IRDP */ + if (!(icmp->type == ICMP_ROUTERADVERT + || icmp->type == ICMP_ROUTERSOLICIT)) + return; + + if (icmp->code != 0) { + flog_warn( + EC_ZEBRA_IRDP_BAD_TYPE_CODE, + "IRDP: RX packet type %d from %pI4 Bad ICMP type code, silently ignored", + icmp->type, &src); + return; + } + + if (!((ntohl(ip->ip_dst.s_addr) == INADDR_BROADCAST) + && (irdp->flags & IF_BROADCAST)) + || (ntohl(ip->ip_dst.s_addr) == INADDR_ALLRTRS_GROUP + && !(irdp->flags & IF_BROADCAST))) { + flog_warn( + EC_ZEBRA_IRDP_BAD_RX_FLAGS, + "IRDP: RX illegal from %pI4 to %s while %s operates in %s; Please correct settings", + &src, + ntohl(ip->ip_dst.s_addr) == INADDR_ALLRTRS_GROUP + ? "multicast" + : inet_ntop(AF_INET, &ip->ip_dst, + buf, sizeof(buf)), + ifp->name, + irdp->flags & IF_BROADCAST ? "broadcast" : "multicast"); + return; + } + + switch (icmp->type) { + case ICMP_ROUTERADVERT: + break; + + case ICMP_ROUTERSOLICIT: + + if (irdp->flags & IF_DEBUG_MESSAGES) + zlog_debug("IRDP: RX Solicit on %s from %pI4", + ifp->name, &src); + + process_solicit(ifp); + break; + + default: + flog_warn( + EC_ZEBRA_IRDP_BAD_TYPE_CODE, + "IRDP: RX packet type %d from %pI4 Bad ICMP type code, silently ignored", + icmp->type, &src); + } +} + +static int irdp_recvmsg(int sock, uint8_t *buf, int size, int *ifindex) +{ + struct msghdr msg; + struct iovec iov; + char adata[CMSG_SPACE(SOPT_SIZE_CMSG_PKTINFO_IPV4())]; + int ret; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void *)0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (void *)adata; + msg.msg_controllen = sizeof(adata); + + iov.iov_base = buf; + iov.iov_len = size; + + ret = recvmsg(sock, &msg, 0); + if (ret < 0) { + flog_warn(EC_LIB_SOCKET, "IRDP: recvmsg: read error %s", + safe_strerror(errno)); + return ret; + } + + if (msg.msg_flags & MSG_TRUNC) { + flog_warn(EC_LIB_SOCKET, "IRDP: recvmsg: truncated message"); + return ret; + } + if (msg.msg_flags & MSG_CTRUNC) { + flog_warn(EC_LIB_SOCKET, + "IRDP: recvmsg: truncated control message"); + return ret; + } + + *ifindex = getsockopt_ifindex(AF_INET, &msg); + + return ret; +} + +void irdp_read_raw(struct event *r) +{ + struct interface *ifp; + struct zebra_if *zi; + struct irdp_interface *irdp; + char buf[IRDP_RX_BUF]; + int ret, ifindex = 0; + + int irdp_sock = EVENT_FD(r); + event_add_read(zrouter.master, irdp_read_raw, NULL, irdp_sock, + &t_irdp_raw); + + ret = irdp_recvmsg(irdp_sock, (uint8_t *)buf, IRDP_RX_BUF, &ifindex); + + if (ret < 0) + flog_warn(EC_LIB_SOCKET, "IRDP: RX Error length = %d", ret); + + ifp = if_lookup_by_index(ifindex, VRF_DEFAULT); + if (!ifp) + return; + + zi = ifp->info; + if (!zi) + return; + + irdp = zi->irdp; + if (!irdp) + return; + + if (!(irdp->flags & IF_ACTIVE)) { + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: RX ICMP for disabled interface %s", + ifp->name); + return; + } + + if (irdp->flags & IF_DEBUG_PACKET) { + int i; + zlog_debug("IRDP: RX (idx %d) ", ifindex); + for (i = 0; i < ret; i++) + zlog_debug("IRDP: RX %x ", buf[i] & 0xFF); + } + + parse_irdp_packet(buf, ret, ifp); +} + +void send_packet(struct interface *ifp, struct stream *s, uint32_t dst, + struct prefix *p, uint32_t ttl) +{ + static struct sockaddr_in sockdst = {AF_INET}; + struct ip *ip; + struct icmphdr *icmp; + struct msghdr *msg; + struct cmsghdr *cmsg; + struct iovec iovector; + char msgbuf[256]; + char buf[256]; + struct in_pktinfo *pktinfo; + unsigned long src; + uint8_t on; + + if (!(ifp->flags & IFF_UP)) + return; + + if (p) + src = ntohl(p->u.prefix4.s_addr); + else + src = 0; /* Is filled in */ + + ip = (struct ip *)buf; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_v = IPVERSION; + ip->ip_tos = 0xC0; + ip->ip_off = 0L; + ip->ip_p = 1; /* IP_ICMP */ + ip->ip_ttl = ttl; + ip->ip_src.s_addr = src; + ip->ip_dst.s_addr = dst; + icmp = (struct icmphdr *)(buf + sizeof(struct ip)); + + /* Merge IP header with icmp packet */ + assert(stream_get_endp(s) < (sizeof(buf) - sizeof(struct ip))); + stream_get(icmp, s, stream_get_endp(s)); + + /* icmp->checksum is already calculated */ + ip->ip_len = sizeof(struct ip) + stream_get_endp(s); + + on = 1; + if (setsockopt(irdp_sock, IPPROTO_IP, IP_HDRINCL, (char *)&on, + sizeof(on)) + < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: Cannot set IP_HDRINCLU %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); + + + if (dst == INADDR_BROADCAST) { + uint32_t bon = 1; + + if (setsockopt(irdp_sock, SOL_SOCKET, SO_BROADCAST, &bon, + sizeof(bon)) + < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: Cannot set SO_BROADCAST %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); + } + + if (dst != INADDR_BROADCAST) + setsockopt_ipv4_multicast_loop(irdp_sock, 0); + + memset(&sockdst, 0, sizeof(sockdst)); + sockdst.sin_family = AF_INET; + sockdst.sin_addr.s_addr = dst; + + cmsg = (struct cmsghdr *)(msgbuf + sizeof(struct msghdr)); + cmsg->cmsg_len = sizeof(struct cmsghdr) + sizeof(struct in_pktinfo); + cmsg->cmsg_level = SOL_IP; + cmsg->cmsg_type = IP_PKTINFO; + pktinfo = (struct in_pktinfo *)CMSG_DATA(cmsg); + pktinfo->ipi_ifindex = ifp->ifindex; + pktinfo->ipi_spec_dst.s_addr = src; + pktinfo->ipi_addr.s_addr = src; + + iovector.iov_base = (void *)buf; + iovector.iov_len = ip->ip_len; + msg = (struct msghdr *)msgbuf; + msg->msg_name = &sockdst; + msg->msg_namelen = sizeof(sockdst); + msg->msg_iov = &iovector; + msg->msg_iovlen = 1; + msg->msg_control = cmsg; + msg->msg_controllen = cmsg->cmsg_len; + + sockopt_iphdrincl_swab_htosys(ip); + + if (sendmsg(irdp_sock, msg, 0) < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: sendmsg send failure %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); +} diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c new file mode 100644 index 0000000..7c934ed --- /dev/null +++ b/zebra/kernel_netlink.c @@ -0,0 +1,1946 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Kernel communication using netlink interface. + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#include + +#ifdef HAVE_NETLINK + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "rib.h" +#include "frrevent.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "mpls.h" +#include "lib_errors.h" +#include "hash.h" + +#include "zebra/zebra_router.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/debug.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/tc_netlink.h" +#include "zebra/netconf_netlink.h" +#include "zebra/zebra_errors.h" + +#ifndef SO_RCVBUFFORCE +#define SO_RCVBUFFORCE (33) +#endif + +/* Hack for GNU libc version 2. */ +#ifndef MSG_TRUNC +#define MSG_TRUNC 0x20 +#endif /* MSG_TRUNC */ + +#ifndef NLMSG_TAIL +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *)(((uint8_t *)(nmsg)) \ + + NLMSG_ALIGN((nmsg)->nlmsg_len))) +#endif + +#ifndef RTA_TAIL +#define RTA_TAIL(rta) \ + ((struct rtattr *)(((uint8_t *)(rta)) + RTA_ALIGN((rta)->rta_len))) +#endif + +#ifndef RTNL_FAMILY_IP6MR +#define RTNL_FAMILY_IP6MR 129 +#endif + +#ifndef RTPROT_MROUTED +#define RTPROT_MROUTED 17 +#endif + +#define NL_DEFAULT_BATCH_BUFSIZE (16 * NL_PKT_BUF_SIZE) + +/* + * We limit the batch's size to a number smaller than the length of the + * underlying buffer since the last message that wouldn't fit the batch would go + * over the upper boundary and then it would have to be encoded again into a new + * buffer. If the difference between the limit and the length of the buffer is + * big enough (bigger than the biggest Netlink message) then this situation + * won't occur. + */ +#define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE) + +static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"}, + {RTM_DELROUTE, "RTM_DELROUTE"}, + {RTM_GETROUTE, "RTM_GETROUTE"}, + {RTM_NEWLINK, "RTM_NEWLINK"}, + {RTM_SETLINK, "RTM_SETLINK"}, + {RTM_DELLINK, "RTM_DELLINK"}, + {RTM_GETLINK, "RTM_GETLINK"}, + {RTM_NEWADDR, "RTM_NEWADDR"}, + {RTM_DELADDR, "RTM_DELADDR"}, + {RTM_GETADDR, "RTM_GETADDR"}, + {RTM_NEWNEIGH, "RTM_NEWNEIGH"}, + {RTM_DELNEIGH, "RTM_DELNEIGH"}, + {RTM_GETNEIGH, "RTM_GETNEIGH"}, + {RTM_NEWRULE, "RTM_NEWRULE"}, + {RTM_DELRULE, "RTM_DELRULE"}, + {RTM_GETRULE, "RTM_GETRULE"}, + {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"}, + {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"}, + {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"}, + {RTM_NEWNETCONF, "RTM_NEWNETCONF"}, + {RTM_DELNETCONF, "RTM_DELNETCONF"}, + {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"}, + {RTM_DELTUNNEL, "RTM_DELTUNNEL"}, + {RTM_GETTUNNEL, "RTM_GETTUNNEL"}, + {RTM_NEWQDISC, "RTM_NEWQDISC"}, + {RTM_DELQDISC, "RTM_DELQDISC"}, + {RTM_GETQDISC, "RTM_GETQDISC"}, + {RTM_NEWTCLASS, "RTM_NEWTCLASS"}, + {RTM_DELTCLASS, "RTM_DELTCLASS"}, + {RTM_GETTCLASS, "RTM_GETTCLASS"}, + {RTM_NEWTFILTER, "RTM_NEWTFILTER"}, + {RTM_DELTFILTER, "RTM_DELTFILTER"}, + {RTM_GETTFILTER, "RTM_GETTFILTER"}, + {RTM_NEWVLAN, "RTM_NEWVLAN"}, + {RTM_DELVLAN, "RTM_DELVLAN"}, + {RTM_GETVLAN, "RTM_GETVLAN"}, + {0}}; + +static const struct message rtproto_str[] = { + {RTPROT_REDIRECT, "redirect"}, + {RTPROT_KERNEL, "kernel"}, + {RTPROT_BOOT, "boot"}, + {RTPROT_STATIC, "static"}, + {RTPROT_GATED, "GateD"}, + {RTPROT_RA, "router advertisement"}, + {RTPROT_MRT, "MRT"}, + {RTPROT_ZEBRA, "Zebra"}, +#ifdef RTPROT_BIRD + {RTPROT_BIRD, "BIRD"}, +#endif /* RTPROT_BIRD */ + {RTPROT_MROUTED, "mroute"}, + {RTPROT_BGP, "BGP"}, + {RTPROT_OSPF, "OSPF"}, + {RTPROT_ISIS, "IS-IS"}, + {RTPROT_RIP, "RIP"}, + {RTPROT_RIPNG, "RIPNG"}, + {RTPROT_ZSTATIC, "static"}, + {0}}; + +static const struct message family_str[] = {{AF_INET, "ipv4"}, + {AF_INET6, "ipv6"}, + {AF_BRIDGE, "bridge"}, + {RTNL_FAMILY_IPMR, "ipv4MR"}, + {RTNL_FAMILY_IP6MR, "ipv6MR"}, + {0}}; + +static const struct message rttype_str[] = {{RTN_UNSPEC, "none"}, + {RTN_UNICAST, "unicast"}, + {RTN_LOCAL, "local"}, + {RTN_BROADCAST, "broadcast"}, + {RTN_ANYCAST, "anycast"}, + {RTN_MULTICAST, "multicast"}, + {RTN_BLACKHOLE, "blackhole"}, + {RTN_UNREACHABLE, "unreachable"}, + {RTN_PROHIBIT, "prohibited"}, + {RTN_THROW, "throw"}, + {RTN_NAT, "nat"}, + {RTN_XRESOLVE, "resolver"}, + {0}}; + +extern struct event_loop *master; + +extern struct zebra_privs_t zserv_privs; + +DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers"); + +/* Hashtable and mutex to allow lookup of nlsock structs by socket/fd value. + * We have both the main and dplane pthreads using these structs, so we have + * to protect the hash with a lock. + */ +static struct hash *nlsock_hash; +pthread_mutex_t nlsock_mutex; + +/* Lock and unlock wrappers for nlsock hash */ +#define NLSOCK_LOCK() pthread_mutex_lock(&nlsock_mutex) +#define NLSOCK_UNLOCK() pthread_mutex_unlock(&nlsock_mutex) + +size_t nl_batch_tx_bufsize; +char *nl_batch_tx_buf; + +_Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE; +_Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD; + +struct nl_batch { + void *buf; + size_t bufsiz; + size_t limit; + + void *buf_head; + size_t curlen; + size_t msgcnt; + + const struct zebra_dplane_info *zns; + + struct dplane_ctx_list_head ctx_list; + + /* + * Pointer to the queue of completed contexts outbound back + * towards the dataplane module. + */ + struct dplane_ctx_list_head *ctx_out_q; +}; + +int netlink_config_write_helper(struct vty *vty) +{ + uint32_t size = + atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed); + uint32_t threshold = atomic_load_explicit(&nl_batch_send_threshold, + memory_order_relaxed); + + if (size != NL_DEFAULT_BATCH_BUFSIZE + || threshold != NL_DEFAULT_BATCH_SEND_THRESHOLD) + vty_out(vty, "zebra kernel netlink batch-tx-buf %u %u\n", size, + threshold); + + if (if_netlink_frr_protodown_r_bit_is_set()) + vty_out(vty, "zebra protodown reason-bit %u\n", + if_netlink_get_frr_protodown_r_bit()); + + return 0; +} + +void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, bool set) +{ + if (!set) { + size = NL_DEFAULT_BATCH_BUFSIZE; + threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD; + } + + atomic_store_explicit(&nl_batch_bufsize, size, memory_order_relaxed); + atomic_store_explicit(&nl_batch_send_threshold, threshold, + memory_order_relaxed); +} + +int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + /* + * This is an error condition that must be handled during + * development. + * + * The netlink_talk_filter function is used for communication + * down the netlink_cmd pipe and we are expecting + * an ack being received. So if we get here + * then we did not receive the ack and instead + * received some other message in an unexpected + * way. + */ + zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", __func__, + h->nlmsg_type, nl_msg_type_to_str(h->nlmsg_type), ns_id); + return 0; +} + +static int netlink_recvbuf(struct nlsock *nl, uint32_t newsize) +{ + uint32_t oldsize; + socklen_t newlen = sizeof(newsize); + socklen_t oldlen = sizeof(oldsize); + int ret; + + ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't get %s receive buffer size: %s", nl->name, + safe_strerror(errno)); + return -1; + } + + /* Try force option (linux >= 2.6.14) and fall back to normal set */ + frr_with_privs(&zserv_privs) { + ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, + &rcvbufsize, sizeof(rcvbufsize)); + } + if (ret < 0) + ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsize, + sizeof(rcvbufsize)); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't set %s receive buffer size: %s", nl->name, + safe_strerror(errno)); + return -1; + } + + ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't get %s receive buffer size: %s", nl->name, + safe_strerror(errno)); + return -1; + } + return 0; +} + +static const char *group2str(uint32_t group) +{ + switch (group) { + case RTNLGRP_TUNNEL: + return "RTNLGRP_TUNNEL"; + default: + return "UNKNOWN"; + } +} + +/* Make socket for Linux netlink interface. */ +static int netlink_socket(struct nlsock *nl, unsigned long groups, + uint32_t ext_groups[], uint8_t ext_group_size, + ns_id_t ns_id) +{ + int ret; + struct sockaddr_nl snl; + int sock; + int namelen; + + frr_with_privs(&zserv_privs) { + sock = ns_socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, ns_id); + if (sock < 0) { + zlog_err("Can't open %s socket: %s", nl->name, + safe_strerror(errno)); + return -1; + } + + memset(&snl, 0, sizeof(snl)); + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + if (ext_group_size) { + uint8_t i; + + for (i = 0; i < ext_group_size; i++) { +#if defined SOL_NETLINK + ret = setsockopt(sock, SOL_NETLINK, + NETLINK_ADD_MEMBERSHIP, + &ext_groups[i], + sizeof(ext_groups[i])); + if (ret < 0) { + zlog_notice( + "can't setsockopt NETLINK_ADD_MEMBERSHIP for group %s(%u), this linux kernel does not support it: %s(%d)", + group2str(ext_groups[i]), + ext_groups[i], + safe_strerror(errno), errno); + } +#else + zlog_notice( + "Unable to use NETLINK_ADD_MEMBERSHIP via SOL_NETLINK for %s(%u) since the linux kernel does not support the socket option", + group2str(ext_groups[i]), + ext_groups[i]); +#endif + } + } + + /* Bind the socket to the netlink structure for anything. */ + ret = bind(sock, (struct sockaddr *)&snl, sizeof(snl)); + } + + if (ret < 0) { + zlog_err("Can't bind %s socket to group 0x%x: %s", nl->name, + snl.nl_groups, safe_strerror(errno)); + close(sock); + return -1; + } + + /* multiple netlink sockets will have different nl_pid */ + namelen = sizeof(snl); + ret = getsockname(sock, (struct sockaddr *)&snl, (socklen_t *)&namelen); + if (ret < 0 || namelen != sizeof(snl)) { + flog_err_sys(EC_LIB_SOCKET, "Can't get %s socket name: %s", + nl->name, safe_strerror(errno)); + close(sock); + return -1; + } + + nl->snl = snl; + nl->sock = sock; + nl->buflen = NL_RCV_PKT_BUF_SIZE; + nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen); + + return ret; +} + +/* + * Dispatch an incoming netlink message; used by the zebra main pthread's + * netlink event reader. + */ +static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, + int startup) +{ + /* + * When we handle new message types here + * because we are starting to install them + * then lets check the netlink_install_filter + * and see if we should add the corresponding + * allow through entry there. + * Probably not needed to do but please + * think about it. + */ + switch (h->nlmsg_type) { + case RTM_NEWROUTE: + return netlink_route_change(h, ns_id, startup); + case RTM_DELROUTE: + return netlink_route_change(h, ns_id, startup); + case RTM_NEWLINK: + return netlink_link_change(h, ns_id, startup); + case RTM_DELLINK: + return 0; + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + case RTM_GETNEIGH: + return netlink_neigh_change(h, ns_id); + case RTM_NEWRULE: + return netlink_rule_change(h, ns_id, startup); + case RTM_DELRULE: + return netlink_rule_change(h, ns_id, startup); + case RTM_NEWNEXTHOP: + return netlink_nexthop_change(h, ns_id, startup); + case RTM_DELNEXTHOP: + return netlink_nexthop_change(h, ns_id, startup); + case RTM_NEWQDISC: + case RTM_DELQDISC: + return netlink_qdisc_change(h, ns_id, startup); + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + return netlink_tclass_change(h, ns_id, startup); + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + return netlink_tfilter_change(h, ns_id, startup); + case RTM_NEWVLAN: + return netlink_vlan_change(h, ns_id, startup); + case RTM_DELVLAN: + return netlink_vlan_change(h, ns_id, startup); + + /* Messages we may receive, but ignore */ + case RTM_NEWCHAIN: + case RTM_DELCHAIN: + case RTM_GETCHAIN: + return 0; + + /* Messages handled in the dplane thread */ + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_NEWNETCONF: + case RTM_DELNETCONF: + case RTM_NEWTUNNEL: + case RTM_DELTUNNEL: + case RTM_GETTUNNEL: + return 0; + default: + /* + * If we have received this message then + * we have made a mistake during development + * and we need to write some code to handle + * this message type or not ask for + * it to be sent up to us + */ + flog_err(EC_ZEBRA_UNKNOWN_NLMSG, + "Unknown netlink nlmsg_type %s(%d) vrf %u", + nl_msg_type_to_str(h->nlmsg_type), h->nlmsg_type, + ns_id); + break; + } + return 0; +} + +/* + * Dispatch an incoming netlink message; used by the dataplane pthread's + * netlink event reader code. + */ +static int dplane_netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, + int startup) +{ + /* + * Dispatch the incoming messages that the dplane pthread handles + */ + switch (h->nlmsg_type) { + case RTM_NEWADDR: + case RTM_DELADDR: + return netlink_interface_addr_dplane(h, ns_id, startup); + + case RTM_NEWNETCONF: + case RTM_DELNETCONF: + return netlink_netconf_change(h, ns_id, startup); + + /* TODO -- other messages for the dplane socket and pthread */ + + case RTM_NEWLINK: + case RTM_DELLINK: + return netlink_link_change(h, ns_id, startup); + + default: + break; + } + + return 0; +} + +static void kernel_read(struct event *thread) +{ + struct zebra_ns *zns = (struct zebra_ns *)EVENT_ARG(thread); + struct zebra_dplane_info dp_info; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, false); + + netlink_parse_info(netlink_information_fetch, &zns->netlink, &dp_info, + 5, false); + + event_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock, + &zns->t_netlink); +} + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info) +{ + struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock); + + netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5, + false); + + return 0; +} + +/* + * Filter out messages from self that occur on listener socket, + * caused by our actions on the command socket(s) + * + * When we add new Netlink message types we probably + * do not need to add them here as that we are filtering + * on the routes we actually care to receive( which is rarer + * then the normal course of operations). We are intentionally + * allowing some messages from ourselves through + * ( I'm looking at you Interface based netlink messages ) + * so that we only have to write one way to handle incoming + * address add/delete and xxxNETCONF changes. + */ +static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid) +{ + /* + * BPF_JUMP instructions and where you jump to are based upon + * 0 as being the next statement. So count from 0. Writing + * this down because every time I look at this I have to + * re-remember it. + */ + struct sock_filter filter[] = { + /* + * Logic: + * if (nlmsg_pid == pid || + * nlmsg_pid == dplane_pid) { + * if (the incoming nlmsg_type == + * RTM_NEWADDR || RTM_DELADDR || RTM_NEWNETCONF || + * RTM_DELNETCONF) + * keep this message + * else + * skip this message + * } else + * keep this netlink message + */ + /* + * 0: Load the nlmsg_pid into the BPF register + */ + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, + offsetof(struct nlmsghdr, nlmsg_pid)), + /* + * 1: Compare to pid + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0), + /* + * 2: Compare to dplane pid + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 6), + /* + * 3: Load the nlmsg_type into BPF register + */ + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, + offsetof(struct nlmsghdr, nlmsg_type)), + /* + * 4: Compare to RTM_NEWADDR + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 4, 0), + /* + * 5: Compare to RTM_DELADDR + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 3, 0), + /* + * 6: Compare to RTM_NEWNETCONF + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 2, + 0), + /* + * 7: Compare to RTM_DELNETCONF + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 1, + 0), + /* + * 8: This is the end state of we want to skip the + * message + */ + BPF_STMT(BPF_RET | BPF_K, 0), + /* 9: This is the end state of we want to keep + * the message + */ + BPF_STMT(BPF_RET | BPF_K, 0xffff), + }; + + struct sock_fprog prog = { + .len = array_size(filter), .filter = filter, + }; + + if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) + < 0) + flog_err_sys(EC_LIB_SOCKET, "Can't install socket filter: %s", + safe_strerror(errno)); +} + +void netlink_parse_rtattr_flags(struct rtattr **tb, int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if ((type <= max) && (!tb[type])) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +void netlink_parse_rtattr(struct rtattr **tb, int max, struct rtattr *rta, + int len) +{ + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + if (rta->rta_type <= max) + tb[rta->rta_type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +/** + * netlink_parse_rtattr_nested() - Parses a nested route attribute + * @tb: Pointer to array for storing rtattr in. + * @max: Max number to store. + * @rta: Pointer to rtattr to look for nested items in. + */ +void netlink_parse_rtattr_nested(struct rtattr **tb, int max, + struct rtattr *rta) +{ + netlink_parse_rtattr(tb, max, RTA_DATA(rta), RTA_PAYLOAD(rta)); +} + +bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, const void *data, + unsigned int len) +{ + if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) { + zlog_err("ERROR message exceeded bound of %d", maxlen); + return false; + } + + memcpy(NLMSG_TAIL(n), data, len); + memset((uint8_t *)NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len); + + return true; +} + +bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type, + const void *data, unsigned int alen) +{ + int len; + struct rtattr *rta; + + len = RTA_LENGTH(alen); + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) + return false; + + rta = (struct rtattr *)(((char *)n) + NLMSG_ALIGN(n->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = len; + + if (data) + memcpy(RTA_DATA(rta), data, alen); + else + assert(alen == 0); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return true; +} + +bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type, + uint8_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint8_t)); +} + +bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type, + uint16_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint16_t)); +} + +bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type, + uint32_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint32_t)); +} + +bool nl_attr_put64(struct nlmsghdr *n, unsigned int maxlen, int type, + uint64_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint64_t)); +} + +struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(n); + + if (!nl_attr_put(n, maxlen, type, NULL, 0)) + return NULL; + + nest->rta_type |= NLA_F_NESTED; + return nest; +} + +int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)nest; + return n->nlmsg_len; +} + +struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen) +{ + struct rtnexthop *rtnh = (struct rtnexthop *)NLMSG_TAIL(n); + + if (NLMSG_ALIGN(n->nlmsg_len) + RTNH_ALIGN(sizeof(struct rtnexthop)) + > maxlen) + return NULL; + + memset(rtnh, 0, sizeof(struct rtnexthop)); + n->nlmsg_len = + NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(sizeof(struct rtnexthop)); + + return rtnh; +} + +void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh) +{ + rtnh->rtnh_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)rtnh; +} + +const char *nl_msg_type_to_str(uint16_t msg_type) +{ + return lookup_msg(nlmsg_str, msg_type, ""); +} + +const char *nl_rtproto_to_str(uint8_t rtproto) +{ + return lookup_msg(rtproto_str, rtproto, ""); +} + +const char *nl_family_to_str(uint8_t family) +{ + return lookup_msg(family_str, family, ""); +} + +const char *nl_rttype_to_str(uint8_t rttype) +{ + return lookup_msg(rttype_str, rttype, ""); +} + +#define NLA_OK(nla, len) \ + ((len) >= (int)sizeof(struct nlattr) \ + && (nla)->nla_len >= sizeof(struct nlattr) \ + && (nla)->nla_len <= (len)) +#define NLA_NEXT(nla, attrlen) \ + ((attrlen) -= NLA_ALIGN((nla)->nla_len), \ + (struct nlattr *)(((char *)(nla)) + NLA_ALIGN((nla)->nla_len))) +#define NLA_LENGTH(len) (NLA_ALIGN(sizeof(struct nlattr)) + (len)) +#define NLA_DATA(nla) ((struct nlattr *)(((char *)(nla)) + NLA_LENGTH(0))) + +#define ERR_NLA(err, inner_len) \ + ((struct nlattr *)(((char *)(err)) \ + + NLMSG_ALIGN(sizeof(struct nlmsgerr)) \ + + NLMSG_ALIGN((inner_len)))) + +static void netlink_parse_nlattr(struct nlattr **tb, int max, + struct nlattr *nla, int len) +{ + while (NLA_OK(nla, len)) { + if (nla->nla_type <= max) + tb[nla->nla_type] = nla; + nla = NLA_NEXT(nla, len); + } +} + +static void netlink_parse_extended_ack(struct nlmsghdr *h) +{ + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; + const struct nlmsgerr *err = (const struct nlmsgerr *)NLMSG_DATA(h); + const struct nlmsghdr *err_nlh = NULL; + /* Length not including nlmsghdr */ + uint32_t len = 0; + /* Inner error netlink message length */ + uint32_t inner_len = 0; + const char *msg = NULL; + uint32_t off = 0; + + if (!(h->nlmsg_flags & NLM_F_CAPPED)) + inner_len = (uint32_t)NLMSG_PAYLOAD(&err->msg, 0); + + len = (uint32_t)(NLMSG_PAYLOAD(h, sizeof(struct nlmsgerr)) - inner_len); + + netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, ERR_NLA(err, inner_len), + len); + + if (tb[NLMSGERR_ATTR_MSG]) + msg = (const char *)NLA_DATA(tb[NLMSGERR_ATTR_MSG]); + + if (tb[NLMSGERR_ATTR_OFFS]) { + off = *(uint32_t *)NLA_DATA(tb[NLMSGERR_ATTR_OFFS]); + + if (off > h->nlmsg_len) { + zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS"); + } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) { + /* + * Header of failed message + * we are not doing anything currently with it + * but noticing it for later. + */ + err_nlh = &err->msg; + zlog_debug("%s: Received %s extended Ack", __func__, + nl_msg_type_to_str(err_nlh->nlmsg_type)); + } + } + + if (msg && *msg != '\0') { + bool is_err = !!err->error; + + if (is_err) + zlog_err("Extended Error: %s", msg); + else + flog_warn(EC_ZEBRA_NETLINK_EXTENDED_WARNING, + "Extended Warning: %s", msg); + } +} + +/* + * netlink_send_msg - send a netlink message of a certain size. + * + * Returns -1 on error. Otherwise, it returns the number of bytes sent. + */ +static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf, + size_t buflen) +{ + struct sockaddr_nl snl = {}; + struct iovec iov = {}; + struct msghdr msg = {}; + ssize_t status; + int save_errno = 0; + + iov.iov_base = buf; + iov.iov_len = buflen; + msg.msg_name = &snl; + msg.msg_namelen = sizeof(snl); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + snl.nl_family = AF_NETLINK; + + /* Send message to netlink interface. */ + frr_with_privs(&zserv_privs) { + status = sendmsg(nl->sock, &msg, 0); + save_errno = errno; + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) { + zlog_debug("%s: >> netlink message dump [sent]", __func__); +#ifdef NETLINK_DEBUG + nl_dump(buf, buflen); +#else + zlog_hexdump(buf, buflen); +#endif /* NETLINK_DEBUG */ + } + + if (status == -1) { + flog_err_sys(EC_LIB_SOCKET, "%s error: %s", __func__, + safe_strerror(save_errno)); + return -1; + } + + return status; +} + +/* + * netlink_recv_msg - receive a netlink message. + * + * Returns -1 on error, 0 if read would block or the number of bytes received. + */ +static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg) +{ + struct iovec iov; + int status; + + iov.iov_base = nl->buf; + iov.iov_len = nl->buflen; + msg->msg_iov = &iov; + msg->msg_iovlen = 1; + + do { + int bytes; + + bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC); + + if (bytes >= 0 && (size_t)bytes > nl->buflen) { + nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes); + nl->buflen = bytes; + iov.iov_base = nl->buf; + iov.iov_len = nl->buflen; + } + + status = recvmsg(nl->sock, msg, 0); + } while (status == -1 && errno == EINTR); + + if (status == -1) { + if (errno == EWOULDBLOCK || errno == EAGAIN) + return 0; + flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s", + nl->name, safe_strerror(errno)); + /* + * In this case we are screwed. There is no good way to recover + * zebra at this point. + */ + exit(-1); + } + + if (status == 0) { + flog_err_sys(EC_LIB_SOCKET, "%s EOF", nl->name); + return -1; + } + + if (msg->msg_namelen != sizeof(struct sockaddr_nl)) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s sender address length error: length %d", nl->name, + msg->msg_namelen); + return -1; + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { + zlog_debug("%s: << netlink message dump [recv]", __func__); +#ifdef NETLINK_DEBUG + nl_dump(nl->buf, status); +#else + zlog_hexdump(nl->buf, status); +#endif /* NETLINK_DEBUG */ + } + + return status; +} + +/* + * netlink_parse_error - parse a netlink error message + * + * Returns 1 if this message is acknowledgement, 0 if this error should be + * ignored, -1 otherwise. + */ +static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h, + bool is_cmd, bool startup) +{ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + int errnum = err->error; + int msg_type = err->msg.nlmsg_type; + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s error: message truncated", nl->name); + return -1; + } + + /* + * Parse the extended information before we actually handle it. At this + * point in time we do not do anything other than report the issue. + */ + if (h->nlmsg_flags & NLM_F_ACK_TLVS) + netlink_parse_extended_ack(h); + + /* If the error field is zero, then this is an ACK. */ + if (err->error == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) { + zlog_debug("%s: %s ACK: type=%s(%u), seq=%u, pid=%u", + __func__, nl->name, + nl_msg_type_to_str(err->msg.nlmsg_type), + err->msg.nlmsg_type, err->msg.nlmsg_seq, + err->msg.nlmsg_pid); + } + + return 1; + } + + /* + * Deal with errors that occur because of races in link handling + * or types are not supported in kernel. + */ + if (is_cmd && + ((msg_type == RTM_DELROUTE && + (-errnum == ENODEV || -errnum == ESRCH)) || + (msg_type == RTM_NEWROUTE && + (-errnum == ENETDOWN || -errnum == EEXIST)) || + ((msg_type == RTM_NEWTUNNEL || msg_type == RTM_DELTUNNEL || + msg_type == RTM_GETTUNNEL) && + (-errnum == EOPNOTSUPP)))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: error: %s type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror(-errnum), + nl_msg_type_to_str(msg_type), msg_type, + err->msg.nlmsg_seq, err->msg.nlmsg_pid); + return 0; + } + + /* + * We see RTM_DELNEIGH when shutting down an interface with an IPv4 + * link-local. The kernel should have already deleted the neighbor so + * do not log these as an error. + */ + if (msg_type == RTM_DELNEIGH + || (is_cmd && msg_type == RTM_NEWROUTE + && (-errnum == ESRCH || -errnum == ENETUNREACH))) { + /* + * This is known to happen in some situations, don't log as + * error. + */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror(-errnum), + nl_msg_type_to_str(msg_type), msg_type, + err->msg.nlmsg_seq, err->msg.nlmsg_pid); + } else { + if ((msg_type != RTM_GETNEXTHOP && msg_type != RTM_GETVLAN) || + !startup) + flog_err(EC_ZEBRA_UNEXPECTED_MESSAGE, + "%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror(-errnum), + nl_msg_type_to_str(msg_type), msg_type, + err->msg.nlmsg_seq, err->msg.nlmsg_pid); + } + + return -1; +} + +/* + * netlink_parse_info + * + * Receive message from netlink interface and pass those information + * to the given function. + * + * filter -> Function to call to read the results + * nl -> netlink socket information + * zns -> The zebra namespace data + * count -> How many we should read in, 0 means as much as possible + * startup -> Are we reading in under startup conditions? passed to + * the filter. + */ +int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), + struct nlsock *nl, const struct zebra_dplane_info *zns, + int count, bool startup) +{ + int status; + int ret = 0; + int error; + int read_in = 0; + + while (1) { + struct sockaddr_nl snl; + struct msghdr msg = {.msg_name = (void *)&snl, + .msg_namelen = sizeof(snl)}; + struct nlmsghdr *h; + + if (count && read_in >= count) + return 0; + + status = netlink_recv_msg(nl, &msg); + if (status == -1) + return -1; + else if (status == 0) + break; + + read_in++; + for (h = (struct nlmsghdr *)nl->buf; + (status >= 0 && NLMSG_OK(h, (unsigned int)status)); + h = NLMSG_NEXT(h, status)) { + /* Finish of reading. */ + if (h->nlmsg_type == NLMSG_DONE) + return ret; + + /* Error handling. */ + if (h->nlmsg_type == NLMSG_ERROR) { + int err = netlink_parse_error( + nl, h, zns->is_cmd, startup); + + if (err == 1) { + if (!(h->nlmsg_flags & NLM_F_MULTI)) + return 0; + continue; + } else + return err; + } + + /* + * What is the right thing to do? The kernel + * is telling us that the dump request was interrupted + * and we more than likely are out of luck and have + * missed data from the kernel. At this point in time + * lets just note that this is happening. + */ + if (h->nlmsg_flags & NLM_F_DUMP_INTR) + flog_err( + EC_ZEBRA_NETLINK_BAD_SEQUENCE, + "netlink recvmsg: The Dump request was interrupted"); + + /* OK we got netlink message. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: %s type %s(%u), len=%d, seq=%u, pid=%u", + __func__, nl->name, + nl_msg_type_to_str(h->nlmsg_type), + h->nlmsg_type, h->nlmsg_len, + h->nlmsg_seq, h->nlmsg_pid); + + /* + * Ignore messages that maybe sent from + * other actors besides the kernel + */ + if (snl.nl_pid != 0) { + zlog_debug("Ignoring message from pid %u", + snl.nl_pid); + continue; + } + + error = (*filter)(h, zns->ns_id, startup); + if (error < 0) { + zlog_debug("%s filter function error", + nl->name); + ret = error; + } + } + + /* After error care. */ + if (msg.msg_flags & MSG_TRUNC) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s error: message truncated", nl->name); + continue; + } + if (status) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s error: data remnant size %d", nl->name, + status); + return -1; + } + } + return ret; +} + +/* + * netlink_talk_info + * + * sendmsg() to netlink socket then recvmsg(). + * Calls netlink_parse_info to parse returned data + * + * filter -> The filter to read final results from kernel + * nlmsghdr -> The data to send to the kernel + * dp_info -> The dataplane and netlink socket information + * startup -> Are we reading in under startup conditions + * This is passed through eventually to filter. + */ +static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, + int startup), + struct nlmsghdr *n, + struct zebra_dplane_info *dp_info, bool startup) +{ + struct nlsock *nl; + + nl = kernel_netlink_nlsock_lookup(dp_info->sock); + n->nlmsg_seq = dp_info->seq; + n->nlmsg_pid = nl->snl.nl_pid; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x", + nl->name, nl_msg_type_to_str(n->nlmsg_type), + n->nlmsg_type, n->nlmsg_len, n->nlmsg_seq, + n->nlmsg_flags); + + if (netlink_send_msg(nl, n, n->nlmsg_len) == -1) + return -1; + + /* + * Get reply from netlink socket. + * The reply should either be an acknowlegement or an error. + */ + return netlink_parse_info(filter, nl, dp_info, 0, startup); +} + +/* + * Synchronous version of netlink_talk_info. Converts args to suit the + * common version, which is suitable for both sync and async use. + */ +int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, + bool startup) +{ + struct zebra_dplane_info dp_info; + + /* Increment sequence number before capturing snapshot of ns socket + * info. + */ + nl->seq++; + + /* Capture info in intermediate info struct */ + zebra_dplane_info_from_zns(&dp_info, zns, (nl == &(zns->netlink_cmd))); + + return netlink_talk_info(filter, n, &dp_info, startup); +} + +/* Issue request message to kernel via netlink socket. GET messages + * are issued through this interface. + */ +int netlink_request(struct nlsock *nl, void *req) +{ + struct nlmsghdr *n = (struct nlmsghdr *)req; + + /* Check netlink socket. */ + if (nl->sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "%s socket isn't active.", + nl->name); + return -1; + } + + /* Fill common fields for all requests. */ + n->nlmsg_pid = nl->snl.nl_pid; + n->nlmsg_seq = ++nl->seq; + + if (netlink_send_msg(nl, req, n->nlmsg_len) == -1) + return -1; + + return 0; +} + +static int nl_batch_read_resp(struct nl_batch *bth, struct nlsock *nl) +{ + struct nlmsghdr *h; + struct sockaddr_nl snl; + struct msghdr msg = {}; + int status, seq; + struct zebra_dplane_ctx *ctx; + bool ignore_msg; + + msg.msg_name = (void *)&snl; + msg.msg_namelen = sizeof(snl); + + /* + * The responses are not batched, so we need to read and process one + * message at a time. + */ + while (true) { + status = netlink_recv_msg(nl, &msg); + /* + * status == -1 is a full on failure somewhere + * since we don't know where the problem happened + * we must mark all as failed + * + * Else we mark everything as worked + * + */ + if (status == -1 || status == 0) { + while ((ctx = dplane_ctx_dequeue(&(bth->ctx_list))) != + NULL) { + if (status == -1) + dplane_ctx_set_status( + ctx, + ZEBRA_DPLANE_REQUEST_FAILURE); + dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); + } + return status; + } + + h = (struct nlmsghdr *)nl->buf; + ignore_msg = false; + seq = h->nlmsg_seq; + /* + * Find the corresponding context object. Received responses are + * in the same order as requests we sent, so we can simply + * iterate over the context list and match responses with + * requests at same time. + */ + while (true) { + ctx = dplane_ctx_get_head(&(bth->ctx_list)); + if (ctx == NULL) { + /* + * This is a situation where we have gotten + * into a bad spot. We need to know that + * this happens( does it? ) + */ + zlog_err( + "%s:WARNING Received netlink Response for an error and no Contexts to associate with it", + __func__); + break; + } + + /* + * 'update' context objects take two consecutive + * sequence numbers. + */ + if (dplane_ctx_is_update(ctx) && + dplane_ctx_get_ns(ctx)->seq + 1 == seq) { + /* + * This is the situation where we get a response + * to a message that should be ignored. + */ + ignore_msg = true; + break; + } + + ctx = dplane_ctx_dequeue(&(bth->ctx_list)); + dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); + + /* We have found corresponding context object. */ + if (dplane_ctx_get_ns(ctx)->seq == seq) + break; + + if (dplane_ctx_get_ns(ctx)->seq > seq) + zlog_warn( + "%s:WARNING Received %u is less than any context on the queue ctx->seq %u", + __func__, seq, + dplane_ctx_get_ns(ctx)->seq); + } + + if (ignore_msg) { + /* + * If we ignore the message due to an update + * above we should still fricking decode the + * message for our operator to understand + * what is going on + */ + int err = netlink_parse_error(nl, h, bth->zns->is_cmd, + false); + + zlog_debug("%s: netlink error message seq=%d %d", + __func__, h->nlmsg_seq, err); + continue; + } + + /* + * We received a message with the sequence number that isn't + * associated with any dplane context object. + */ + if (ctx == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: skipping unassociated response, seq number %d NS %u", + __func__, h->nlmsg_seq, + bth->zns->ns_id); + continue; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + int err = netlink_parse_error(nl, h, bth->zns->is_cmd, + false); + + if (err == -1) + dplane_ctx_set_status( + ctx, ZEBRA_DPLANE_REQUEST_FAILURE); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: netlink error message seq=%d ", + __func__, h->nlmsg_seq); + continue; + } + + /* + * If we get here then we did not receive neither the ack nor + * the error and instead received some other message in an + * unexpected way. + */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", + __func__, h->nlmsg_type, + nl_msg_type_to_str(h->nlmsg_type), + bth->zns->ns_id); + } + + return 0; +} + +static void nl_batch_reset(struct nl_batch *bth) +{ + bth->buf_head = bth->buf; + bth->curlen = 0; + bth->msgcnt = 0; + bth->zns = NULL; + + dplane_ctx_q_init(&(bth->ctx_list)); +} + +static void nl_batch_init(struct nl_batch *bth, + struct dplane_ctx_list_head *ctx_out_q) +{ + /* + * If the size of the buffer has changed, free and then allocate a new + * one. + */ + size_t bufsize = + atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed); + if (bufsize != nl_batch_tx_bufsize) { + if (nl_batch_tx_buf) + XFREE(MTYPE_NL_BUF, nl_batch_tx_buf); + + nl_batch_tx_buf = XCALLOC(MTYPE_NL_BUF, bufsize); + nl_batch_tx_bufsize = bufsize; + } + + bth->buf = nl_batch_tx_buf; + bth->bufsiz = bufsize; + bth->limit = atomic_load_explicit(&nl_batch_send_threshold, + memory_order_relaxed); + + bth->ctx_out_q = ctx_out_q; + + nl_batch_reset(bth); +} + +static void nl_batch_send(struct nl_batch *bth) +{ + struct zebra_dplane_ctx *ctx; + bool err = false; + + if (bth->curlen != 0 && bth->zns != NULL) { + struct nlsock *nl = + kernel_netlink_nlsock_lookup(bth->zns->sock); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu", + __func__, nl->name, bth->curlen, + bth->msgcnt); + + if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1) + err = true; + + if (!err) { + if (nl_batch_read_resp(bth, nl) == -1) + err = true; + } + } + + /* Move remaining contexts to the outbound queue. */ + while (true) { + ctx = dplane_ctx_dequeue(&(bth->ctx_list)); + if (ctx == NULL) + break; + + if (err) + dplane_ctx_set_status(ctx, + ZEBRA_DPLANE_REQUEST_FAILURE); + + dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); + } + + nl_batch_reset(bth); +} + +enum netlink_msg_status netlink_batch_add_msg( + struct nl_batch *bth, struct zebra_dplane_ctx *ctx, + ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t), + bool ignore_res) +{ + int seq; + ssize_t size; + struct nlmsghdr *msgh; + struct nlsock *nl; + + size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen); + + /* + * If there was an error while encoding the message (other than buffer + * overflow) then return an error. + */ + if (size < 0) + return FRR_NETLINK_ERROR; + + /* + * If the message doesn't fit entirely in the buffer then send the batch + * and retry. + */ + if (size == 0) { + nl_batch_send(bth); + size = (*msg_encoder)(ctx, bth->buf_head, + bth->bufsiz - bth->curlen); + /* + * If the message doesn't fit in the empty buffer then just + * return an error. + */ + if (size <= 0) + return FRR_NETLINK_ERROR; + } + + seq = dplane_ctx_get_ns(ctx)->seq; + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + if (ignore_res) + seq++; + + msgh = (struct nlmsghdr *)bth->buf_head; + msgh->nlmsg_seq = seq; + msgh->nlmsg_pid = nl->snl.nl_pid; + + bth->zns = dplane_ctx_get_ns(ctx); + bth->buf_head = ((char *)bth->buf_head) + size; + bth->curlen += size; + bth->msgcnt++; + + return FRR_NETLINK_QUEUED; +} + +static enum netlink_msg_status nl_put_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + if (dplane_ctx_is_skip_kernel(ctx)) + return FRR_NETLINK_SUCCESS; + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + return netlink_put_route_update_msg(bth, ctx); + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + return netlink_put_nexthop_update_msg(bth, ctx); + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + return netlink_put_lsp_update_msg(bth, ctx); + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + return netlink_put_pw_update_msg(bth, ctx); + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + return netlink_put_address_update_msg(bth, ctx); + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + return netlink_put_mac_update_msg(bth, ctx); + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + return netlink_put_neigh_update_msg(bth, ctx); + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + return netlink_put_rule_update_msg(bth, ctx); + + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_BR_PORT_UPDATE: + return FRR_NETLINK_SUCCESS; + + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_STARTUP_STAGE: + return FRR_NETLINK_ERROR; + + case DPLANE_OP_GRE_SET: + return netlink_put_gre_set_msg(bth, ctx); + + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_NONE: + return FRR_NETLINK_ERROR; + + case DPLANE_OP_INTF_NETCONFIG: + return netlink_put_intf_netconfig(bth, ctx); + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + return netlink_put_intf_update_msg(bth, ctx); + + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + return netlink_put_tc_qdisc_update_msg(bth, ctx); + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + return netlink_put_tc_class_update_msg(bth, ctx); + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + return netlink_put_tc_filter_update_msg(bth, ctx); + } + + return FRR_NETLINK_ERROR; +} + +void kernel_update_multi(struct dplane_ctx_list_head *ctx_list) +{ + struct nl_batch batch; + struct zebra_dplane_ctx *ctx; + struct dplane_ctx_list_head handled_list; + enum netlink_msg_status res; + + dplane_ctx_q_init(&handled_list); + nl_batch_init(&batch, &handled_list); + + while (true) { + ctx = dplane_ctx_dequeue(ctx_list); + if (ctx == NULL) + break; + + if (batch.zns != NULL + && batch.zns->ns_id != dplane_ctx_get_ns(ctx)->ns_id) + nl_batch_send(&batch); + + /* + * Assume all messages will succeed and then mark only the ones + * that failed. + */ + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + + res = nl_put_msg(&batch, ctx); + + dplane_ctx_enqueue_tail(&(batch.ctx_list), ctx); + if (res == FRR_NETLINK_ERROR) + dplane_ctx_set_status(ctx, + ZEBRA_DPLANE_REQUEST_FAILURE); + + if (batch.curlen > batch.limit) + nl_batch_send(&batch); + } + + nl_batch_send(&batch); + + dplane_ctx_q_init(ctx_list); + dplane_ctx_list_append(ctx_list, &handled_list); +} + +struct nlsock *kernel_netlink_nlsock_lookup(int sock) +{ + struct nlsock lookup, *retval; + + lookup.sock = sock; + + NLSOCK_LOCK(); + retval = hash_lookup(nlsock_hash, &lookup); + NLSOCK_UNLOCK(); + + return retval; +} + +/* Insert nlsock entry into hash */ +static void kernel_netlink_nlsock_insert(struct nlsock *nls) +{ + NLSOCK_LOCK(); + (void)hash_get(nlsock_hash, nls, hash_alloc_intern); + NLSOCK_UNLOCK(); +} + +/* Remove nlsock entry from hash */ +static void kernel_netlink_nlsock_remove(struct nlsock *nls) +{ + NLSOCK_LOCK(); + (void)hash_release(nlsock_hash, nls); + NLSOCK_UNLOCK(); +} + +static uint32_t kernel_netlink_nlsock_key(const void *arg) +{ + const struct nlsock *nl = arg; + + return nl->sock; +} + +static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2) +{ + const struct nlsock *nl1 = arg1; + const struct nlsock *nl2 = arg2; + + if (nl1->sock == nl2->sock) + return true; + + return false; +} + +/* Exported interface function. This function simply calls + netlink_socket (). */ +void kernel_init(struct zebra_ns *zns) +{ + uint32_t groups, dplane_groups, ext_groups; +#if defined SOL_NETLINK + int one, ret, grp; +#endif + + /* + * Initialize netlink sockets + * + * If RTMGRP_XXX exists use that, but at some point + * I think the kernel developers realized that + * keeping track of all the different values would + * lead to confusion, so we need to convert the + * RTNLGRP_XXX to a bit position for ourself + * + * + * NOTE: If the bit is >= 32, you must use setsockopt(). Those + * groups are added further below after SOL_NETLINK is verified to + * exist. + */ + groups = RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_MROUTE | + RTMGRP_NEIGH | ((uint32_t)1 << (RTNLGRP_IPV4_RULE - 1)) | + ((uint32_t)1 << (RTNLGRP_IPV6_RULE - 1)) | + ((uint32_t)1 << (RTNLGRP_NEXTHOP - 1)) | + ((uint32_t)1 << (RTNLGRP_TC - 1)); + + dplane_groups = (RTMGRP_LINK | + RTMGRP_IPV4_IFADDR | + RTMGRP_IPV6_IFADDR | + ((uint32_t) 1 << (RTNLGRP_IPV4_NETCONF - 1)) | + ((uint32_t) 1 << (RTNLGRP_IPV6_NETCONF - 1)) | + ((uint32_t) 1 << (RTNLGRP_MPLS_NETCONF - 1))); + + /* Use setsockopt for > 31 group */ + ext_groups = RTNLGRP_TUNNEL; + + snprintf(zns->netlink.name, sizeof(zns->netlink.name), + "netlink-listen (NS %u)", zns->ns_id); + zns->netlink.sock = -1; + if (netlink_socket(&zns->netlink, groups, &ext_groups, 1, zns->ns_id) < + 0) { + zlog_err("Failure to create %s socket", + zns->netlink.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink); + + snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name), + "netlink-cmd (NS %u)", zns->ns_id); + zns->netlink_cmd.sock = -1; + if (netlink_socket(&zns->netlink_cmd, 0, 0, 0, zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_cmd.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink_cmd); + + /* Outbound socket for dplane programming of the host OS. */ + snprintf(zns->netlink_dplane_out.name, + sizeof(zns->netlink_dplane_out.name), "netlink-dp (NS %u)", + zns->ns_id); + zns->netlink_dplane_out.sock = -1; + if (netlink_socket(&zns->netlink_dplane_out, 0, 0, 0, zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_dplane_out.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink_dplane_out); + + /* Inbound socket for OS events coming to the dplane. */ + snprintf(zns->netlink_dplane_in.name, + sizeof(zns->netlink_dplane_in.name), "netlink-dp-in (NS %u)", + zns->ns_id); + zns->netlink_dplane_in.sock = -1; + if (netlink_socket(&zns->netlink_dplane_in, dplane_groups, 0, 0, + zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_dplane_in.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink_dplane_in); + + /* + * SOL_NETLINK is not available on all platforms yet + * apparently. It's in bits/socket.h which I am not + * sure that we want to pull into our build system. + */ +#if defined SOL_NETLINK + + /* + * setsockopt multicast group subscriptions that don't fit in nl_groups + */ + grp = RTNLGRP_BRVLAN; + ret = setsockopt(zns->netlink.sock, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &grp, sizeof(grp)); + + if (ret < 0) + zlog_notice( + "Registration for RTNLGRP_BRVLAN Membership failed : %d %s", + errno, safe_strerror(errno)); + /* + * Let's tell the kernel that we want to receive extended + * ACKS over our command socket(s) + */ + one = 1; + ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended cmd ACK failed : %d %s", + errno, safe_strerror(errno)); + + one = 1; + ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, + NETLINK_EXT_ACK, &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended dp ACK failed : %d %s", + errno, safe_strerror(errno)); + + /* + * Trim off the payload of the original netlink message in the + * acknowledgment. This option is available since Linux 4.2, so if + * setsockopt fails, ignore the error. + */ + one = 1; + ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, + NETLINK_CAP_ACK, &one, sizeof(one)); + if (ret < 0) + zlog_notice( + "Registration for reduced ACK packet size failed, probably running an early kernel"); +#endif + + /* Register kernel socket. */ + if (fcntl(zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0) + flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket flags: %s", + zns->netlink.name, safe_strerror(errno)); + + if (fcntl(zns->netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_cmd.name, safe_strerror(errno), errno); + + if (fcntl(zns->netlink_dplane_out.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_dplane_out.name, safe_strerror(errno), + errno); + + if (fcntl(zns->netlink_dplane_in.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_dplane_in.name, safe_strerror(errno), + errno); + + /* Set receive buffer size if it's set from command line */ + if (rcvbufsize) { + netlink_recvbuf(&zns->netlink, rcvbufsize); + netlink_recvbuf(&zns->netlink_cmd, rcvbufsize); + netlink_recvbuf(&zns->netlink_dplane_out, rcvbufsize); + netlink_recvbuf(&zns->netlink_dplane_in, rcvbufsize); + } + + /* Set filter for inbound sockets, to exclude events we've generated + * ourselves. + */ + netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid, + zns->netlink_dplane_out.snl.nl_pid); + + netlink_install_filter(zns->netlink_dplane_in.sock, + zns->netlink_cmd.snl.nl_pid, + zns->netlink_dplane_out.snl.nl_pid); + + zns->t_netlink = NULL; + + event_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock, + &zns->t_netlink); + + rt_netlink_init(); +} + +/* Helper to clean up an nlsock */ +static void kernel_nlsock_fini(struct nlsock *nls) +{ + if (nls && nls->sock >= 0) { + kernel_netlink_nlsock_remove(nls); + close(nls->sock); + nls->sock = -1; + XFREE(MTYPE_NL_BUF, nls->buf); + nls->buflen = 0; + } +} + +void kernel_terminate(struct zebra_ns *zns, bool complete) +{ + EVENT_OFF(zns->t_netlink); + + kernel_nlsock_fini(&zns->netlink); + + kernel_nlsock_fini(&zns->netlink_cmd); + + kernel_nlsock_fini(&zns->netlink_dplane_in); + + /* During zebra shutdown, we need to leave the dataplane socket + * around until all work is done. + */ + if (complete) + kernel_nlsock_fini(&zns->netlink_dplane_out); +} + +/* + * Global init for platform-/OS-specific things + */ +void kernel_router_init(void) +{ + /* Init nlsock hash and lock */ + pthread_mutex_init(&nlsock_mutex, NULL); + nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key, + kernel_netlink_nlsock_hash_equal, + "Netlink Socket Hash"); +} + +/* + * Global deinit for platform-/OS-specific things + */ +void kernel_router_terminate(void) +{ + pthread_mutex_destroy(&nlsock_mutex); + + hash_free(nlsock_hash); + nlsock_hash = NULL; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h new file mode 100644 index 0000000..e910f62 --- /dev/null +++ b/zebra/kernel_netlink.h @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Declarations and definitions for kernel interaction over netlink + * Copyright (C) 2016 Cumulus Networks, Inc. + */ + +#ifndef _ZEBRA_KERNEL_NETLINK_H +#define _ZEBRA_KERNEL_NETLINK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_NETLINK + +#define RTM_NHA(h) \ + ((struct rtattr *)(((char *)(h)) + NLMSG_ALIGN(sizeof(struct nhmsg)))) + + +#define NL_RCV_PKT_BUF_SIZE (34 * 1024) +#define NL_PKT_BUF_SIZE 8192 + +/* + * nl_attr_put - add an attribute to the Netlink message. + * + * Returns true if the attribute could be added to the message (fits into the + * buffer), otherwise false is returned. + */ +extern bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type, + const void *data, unsigned int alen); +extern bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type, + uint8_t data); +extern bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type, + uint16_t data); +extern bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type, + uint32_t data); +extern bool nl_attr_put64(struct nlmsghdr *n, unsigned int maxlen, int type, + uint64_t data); + +/* + * nl_attr_nest - start an attribute nest. + * + * Returns a valid pointer to the beginning of the nest if the attribute + * describing the nest could be added to the message (fits into the buffer), + * otherwise NULL is returned. + */ +extern struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, + int type); + +/* + * nl_attr_nest_end - finalize nesting of attributes. + * + * Updates the length field of the attribute header to include the appeneded + * attributes. Returns a total length of the Netlink message. + */ +extern int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest); + +/* + * nl_attr_rtnh - append a rtnexthop record to the Netlink message. + * + * Returns a valid pointer to the rtnexthop struct if it could be added to + * the message (fits into the buffer), otherwise NULL is returned. + */ +extern struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen); + +/* + * nl_attr_rtnh_end - finalize adding a rtnexthop record. + * + * Updates the length field of the rtnexthop to include the appeneded + * attributes. + */ +extern void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh); + +extern void netlink_parse_rtattr(struct rtattr **tb, int max, + struct rtattr *rta, int len); +extern void netlink_parse_rtattr_flags(struct rtattr **tb, int max, + struct rtattr *rta, int len, + unsigned short flags); +extern void netlink_parse_rtattr_nested(struct rtattr **tb, int max, + struct rtattr *rta); +/* + * nl_addraw_l copies raw form the netlink message buffer into netlink + * message header pointer. It ensures the aligned data buffer does not + * override past max length. + * return value is 0 if its successful + */ +extern bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, + const void *data, unsigned int len); +extern const char *nl_msg_type_to_str(uint16_t msg_type); +extern const char *nl_rtproto_to_str(uint8_t rtproto); +extern const char *nl_family_to_str(uint8_t family); +extern const char *nl_rttype_to_str(uint8_t rttype); + +extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), + struct nlsock *nl, + const struct zebra_dplane_info *dp_info, + int count, bool startup); +extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup); +extern int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, struct nlsock *nl, + struct zebra_ns *zns, bool startup); +extern int netlink_request(struct nlsock *nl, void *req); + +enum netlink_msg_status { + FRR_NETLINK_SUCCESS, + FRR_NETLINK_ERROR, + FRR_NETLINK_QUEUED, +}; + +struct nl_batch; + +/* + * netlink_batch_add_msg - add message to the netlink batch using dplane + * context object. + * + * @ctx: Dataplane context + * @msg_encoder: A function that encodes dplane context object into + * netlink message. Should take dplane context object, + * pointer to a buffer and buffer's length as parameters + * and should return -1 on error, 0 on buffer overflow or + * size of the encoded message. + * @ignore_res: Whether the result of this message should be ignored. + * This should be used in some 'update' cases where we + * need to send two messages for one context object. + * + * Return: Status of the message. + */ +extern enum netlink_msg_status netlink_batch_add_msg( + struct nl_batch *bth, struct zebra_dplane_ctx *ctx, + ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t), + bool ignore_res); + +/* + * Vty/cli apis + */ +extern int netlink_config_write_helper(struct vty *vty); + +/* + * Configure size of the batch buffer and sending threshold. If 'unset', reset + * to default value. + */ +extern void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, + bool set); + +extern struct nlsock *kernel_netlink_nlsock_lookup(int sock); +#endif /* HAVE_NETLINK */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_KERNEL_NETLINK_H */ diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c new file mode 100644 index 0000000..d897f4a --- /dev/null +++ b/zebra/kernel_socket.c @@ -0,0 +1,1644 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Kernel communication using routing socket. + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#include + +#ifndef HAVE_NETLINK + +#include +#ifdef __OpenBSD__ +#include +#endif + +#include "if.h" +#include "prefix.h" +#include "sockunion.h" +#include "connected.h" +#include "memory.h" +#include "ioctl.h" +#include "log.h" +#include "table.h" +#include "rib.h" +#include "privs.h" +#include "vrf.h" +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/kernel_socket.h" +#include "zebra/rib.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_ptm.h" + +extern struct zebra_privs_t zserv_privs; + +/* + * Historically, the BSD routing socket has aligned data following a + * struct sockaddr to sizeof(long), which was 4 bytes on some + * platforms, and 8 bytes on others. NetBSD 6 changed the routing + * socket to align to sizeof(uint64_t), which is 8 bytes. OS X + * appears to align to sizeof(int), which is 4 bytes. + * + * Alignment of zero-sized sockaddrs is nonsensical, but historically + * BSD defines RT_ROUNDUP(0) to be the alignment interval (rather than + * 0). We follow this practice without questioning it, but it is a + * bug if frr calls ROUNDUP with 0. + */ +#ifdef __APPLE__ +#define ROUNDUP_TYPE int +#else +#define ROUNDUP_TYPE long +#endif + +/* + * Because of these varying conventions, the only sane approach is for + * the header to define some flavor of ROUNDUP macro. + */ + +/* OS X (Xcode as of 2014-12) is known not to define RT_ROUNDUP */ +#if defined(RT_ROUNDUP) +#define ROUNDUP(a) RT_ROUNDUP(a) +#endif /* defined(RT_ROUNDUP) */ + +/* + * If ROUNDUP has not yet been defined in terms of platform-provided + * defines, attempt to cope with heuristics. + */ +#if !defined(ROUNDUP) + +/* + * If you're porting to a platform that changed RT_ROUNDUP but doesn't + * have it in its headers, this will break rather obviously and you'll + * have to fix it here. + */ +#define ROUNDUP(a) \ + ((a) > 0 ? (1 + (((a)-1) | (sizeof(ROUNDUP_TYPE) - 1))) \ + : sizeof(ROUNDUP_TYPE)) + +#endif /* defined(ROUNDUP) */ + + +#if defined(SA_SIZE) +/* SAROUNDUP is the only thing we need, and SA_SIZE provides that */ +#define SAROUNDUP(a) SA_SIZE(a) +#else /* !SA_SIZE */ +/* + * Given a pointer (sockaddr or void *), return the number of bytes + * taken up by the sockaddr and any padding needed for alignment. + */ +#if defined(HAVE_STRUCT_SOCKADDR_SA_LEN) +#define SAROUNDUP(X) ROUNDUP(((struct sockaddr *)(X))->sa_len) +#else +/* + * One would hope all fixed-size structure definitions are aligned, + * but round them up nonetheless. + */ +#define SAROUNDUP(X) \ + (((struct sockaddr *)(X))->sa_family == AF_INET \ + ? ROUNDUP(sizeof(struct sockaddr_in)) \ + : (((struct sockaddr *)(X))->sa_family == AF_INET6 \ + ? ROUNDUP(sizeof(struct sockaddr_in6)) \ + : (((struct sockaddr *)(X))->sa_family == AF_LINK \ + ? ROUNDUP(sizeof(struct sockaddr_dl)) \ + : sizeof(struct sockaddr)))) +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + +#endif /* !SA_SIZE */ + +/* Routing socket message types. */ +const struct message rtm_type_str[] = {{RTM_ADD, "RTM_ADD"}, + {RTM_DELETE, "RTM_DELETE"}, + {RTM_CHANGE, "RTM_CHANGE"}, + {RTM_GET, "RTM_GET"}, + {RTM_LOSING, "RTM_LOSING"}, + {RTM_REDIRECT, "RTM_REDIRECT"}, + {RTM_MISS, "RTM_MISS"}, +#ifdef RTM_LOCK + {RTM_LOCK, "RTM_LOCK"}, +#endif /* RTM_LOCK */ +#ifdef OLDADD + {RTM_OLDADD, "RTM_OLDADD"}, +#endif /* RTM_OLDADD */ +#ifdef RTM_OLDDEL + {RTM_OLDDEL, "RTM_OLDDEL"}, +#endif /* RTM_OLDDEL */ +#ifdef RTM_RESOLVE + {RTM_RESOLVE, "RTM_RESOLVE"}, +#endif /* RTM_RESOLVE */ + {RTM_NEWADDR, "RTM_NEWADDR"}, + {RTM_DELADDR, "RTM_DELADDR"}, + {RTM_IFINFO, "RTM_IFINFO"}, +#ifdef RTM_OIFINFO + {RTM_OIFINFO, "RTM_OIFINFO"}, +#endif /* RTM_OIFINFO */ +#ifdef RTM_NEWMADDR + {RTM_NEWMADDR, "RTM_NEWMADDR"}, +#endif /* RTM_NEWMADDR */ +#ifdef RTM_DELMADDR + {RTM_DELMADDR, "RTM_DELMADDR"}, +#endif /* RTM_DELMADDR */ +#ifdef RTM_IFANNOUNCE + {RTM_IFANNOUNCE, "RTM_IFANNOUNCE"}, +#endif /* RTM_IFANNOUNCE */ +#ifdef RTM_IEEE80211 + {RTM_IEEE80211, "RTM_IEEE80211"}, +#endif + {0}}; + +static const struct message rtm_flag_str[] = {{RTF_UP, "UP"}, + {RTF_GATEWAY, "GATEWAY"}, + {RTF_HOST, "HOST"}, + {RTF_REJECT, "REJECT"}, + {RTF_DYNAMIC, "DYNAMIC"}, + {RTF_MODIFIED, "MODIFIED"}, + {RTF_DONE, "DONE"}, +#ifdef RTF_MASK + {RTF_MASK, "MASK"}, +#endif /* RTF_MASK */ +#ifdef RTF_CLONING + {RTF_CLONING, "CLONING"}, +#endif /* RTF_CLONING */ +#ifdef RTF_XRESOLVE + {RTF_XRESOLVE, "XRESOLVE"}, +#endif /* RTF_XRESOLVE */ +#ifdef RTF_LLINFO + {RTF_LLINFO, "LLINFO"}, +#endif /* RTF_LLINFO */ + {RTF_STATIC, "STATIC"}, + {RTF_BLACKHOLE, "BLACKHOLE"}, +#ifdef RTF_PRIVATE + {RTF_PRIVATE, "PRIVATE"}, +#endif /* RTF_PRIVATE */ + {RTF_PROTO1, "PROTO1"}, + {RTF_PROTO2, "PROTO2"}, +#ifdef RTF_PRCLONING + {RTF_PRCLONING, "PRCLONING"}, +#endif /* RTF_PRCLONING */ +#ifdef RTF_WASCLONED + {RTF_WASCLONED, "WASCLONED"}, +#endif /* RTF_WASCLONED */ +#ifdef RTF_PROTO3 + {RTF_PROTO3, "PROTO3"}, +#endif /* RTF_PROTO3 */ +#ifdef RTF_PINNED + {RTF_PINNED, "PINNED"}, +#endif /* RTF_PINNED */ +#ifdef RTF_LOCAL + {RTF_LOCAL, "LOCAL"}, +#endif /* RTF_LOCAL */ +#ifdef RTF_BROADCAST + {RTF_BROADCAST, "BROADCAST"}, +#endif /* RTF_BROADCAST */ +#ifdef RTF_MULTICAST + {RTF_MULTICAST, "MULTICAST"}, +#endif /* RTF_MULTICAST */ +#ifdef RTF_MULTIRT + {RTF_MULTIRT, "MULTIRT"}, +#endif /* RTF_MULTIRT */ +#ifdef RTF_SETSRC + {RTF_SETSRC, "SETSRC"}, +#endif /* RTF_SETSRC */ + {0}}; + +/* Kernel routing update socket. */ +int routing_sock = -1; + +/* Kernel dataplane routing update socket, used in the dataplane pthread + * context. + */ +int dplane_routing_sock = -1; + +/* Yes I'm checking ugly routing socket behavior. */ +/* #define DEBUG */ + +size_t _rta_get(caddr_t sap, void *destp, size_t destlen, bool checkaf); +size_t rta_get(caddr_t sap, void *dest, size_t destlen); +size_t rta_getattr(caddr_t sap, void *destp, size_t destlen); +size_t rta_getsdlname(caddr_t sap, void *dest, short *destlen); +const char *rtatostr(unsigned int flags, char *buf, size_t buflen); + +/* Supported address family check. */ +static inline int af_check(int family) +{ + if (family == AF_INET) + return 1; + if (family == AF_INET6) + return 1; + return 0; +} + +size_t _rta_get(caddr_t sap, void *destp, size_t destlen, bool checkaf) +{ + struct sockaddr *sa = (struct sockaddr *)sap; + struct sockaddr_dl *sdl; + uint8_t *dest = destp; + size_t tlen, copylen; + +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + copylen = sa->sa_len; + tlen = (copylen == 0) ? sizeof(ROUNDUP_TYPE) : ROUNDUP(copylen); +#else /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + copylen = tlen = SAROUNDUP(sap); +#endif /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + + if (copylen > 0 && dest != NULL) { + if (checkaf && af_check(sa->sa_family) == 0) + return tlen; + /* + * Handle sockaddr_dl corner case: + * RTA_NETMASK might be AF_LINK, but it doesn't anything + * relevant (e.g. zeroed out fields). Check for this + * case and avoid warning log message. + */ + if (sa->sa_family == AF_LINK) { + sdl = (struct sockaddr_dl *)sa; + if (sdl->sdl_index == 0 || sdl->sdl_nlen == 0) + copylen = destlen; + } + + if (copylen > destlen) { + zlog_warn( + "%s: destination buffer too small (%zu vs %zu)", + __func__, copylen, destlen); + memcpy(dest, sap, destlen); + } else + memcpy(dest, sap, copylen); + } + + return tlen; +} + +size_t rta_get(caddr_t sap, void *destp, size_t destlen) +{ + return _rta_get(sap, destp, destlen, true); +} + +size_t rta_getattr(caddr_t sap, void *destp, size_t destlen) +{ + return _rta_get(sap, destp, destlen, false); +} + +size_t rta_getsdlname(caddr_t sap, void *destp, short *destlen) +{ + struct sockaddr_dl *sdl = (struct sockaddr_dl *)sap; + uint8_t *dest = destp; + size_t tlen, copylen; + + copylen = sdl->sdl_nlen; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + struct sockaddr *sa = (struct sockaddr *)sap; + + tlen = (sa->sa_len == 0) ? sizeof(ROUNDUP_TYPE) : ROUNDUP(sa->sa_len); +#else /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + tlen = SAROUNDUP(sap); +#endif /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + + if (copylen > 0 && dest != NULL && sdl->sdl_family == AF_LINK) { + if (copylen > IFNAMSIZ) { + zlog_warn( + "%s: destination buffer too small (%zu vs %d)", + __func__, copylen, IFNAMSIZ); + memcpy(dest, sdl->sdl_data, IFNAMSIZ); + dest[IFNAMSIZ] = 0; + *destlen = IFNAMSIZ; + } else { + memcpy(dest, sdl->sdl_data, copylen); + dest[copylen] = 0; + *destlen = copylen; + } + } else + *destlen = 0; + + return tlen; +} + +const char *rtatostr(unsigned int flags, char *buf, size_t buflen) +{ + const char *flagstr, *bufstart; + int bit, wlen; + char ustr[32]; + + /* Hold the pointer to the buffer beginning. */ + bufstart = buf; + + for (bit = 1; bit; bit <<= 1) { + if ((flags & bit) == 0) + continue; + + switch (bit) { + case RTA_DST: + flagstr = "DST"; + break; + case RTA_GATEWAY: + flagstr = "GATEWAY"; + break; + case RTA_NETMASK: + flagstr = "NETMASK"; + break; +#ifdef RTA_GENMASK + case RTA_GENMASK: + flagstr = "GENMASK"; + break; +#endif /* RTA_GENMASK */ + case RTA_IFP: + flagstr = "IFP"; + break; + case RTA_IFA: + flagstr = "IFA"; + break; +#ifdef RTA_AUTHOR + case RTA_AUTHOR: + flagstr = "AUTHOR"; + break; +#endif /* RTA_AUTHOR */ + case RTA_BRD: + flagstr = "BRD"; + break; +#ifdef RTA_SRC + case RTA_SRC: + flagstr = "SRC"; + break; +#endif /* RTA_SRC */ +#ifdef RTA_SRCMASK + case RTA_SRCMASK: + flagstr = "SRCMASK"; + break; +#endif /* RTA_SRCMASK */ +#ifdef RTA_LABEL + case RTA_LABEL: + flagstr = "LABEL"; + break; +#endif /* RTA_LABEL */ + + default: + snprintf(ustr, sizeof(ustr), "0x%x", bit); + flagstr = ustr; + break; + } + + wlen = snprintf(buf, buflen, "%s,", flagstr); + buf += wlen; + buflen -= wlen; + } + + /* Check for empty buffer. */ + if (bufstart != buf) + buf--; + + /* Remove the last comma. */ + *buf = 0; + + return bufstart; +} + +/* Dump routing table flag for debug purpose. */ +static void rtm_flag_dump(int flag) +{ + const struct message *mes; + static char buf[BUFSIZ]; + + buf[0] = '\0'; + for (mes = rtm_flag_str; mes->key != 0; mes++) { + if (mes->key & flag) { + strlcat(buf, mes->str, BUFSIZ); + strlcat(buf, " ", BUFSIZ); + } + } + zlog_debug("Kernel: %s", buf); +} + +#ifdef RTM_IFANNOUNCE +/* Interface adding function */ +static int ifan_read(struct if_announcemsghdr *ifan) +{ + struct interface *ifp; + + ifp = if_lookup_by_index(ifan->ifan_index, VRF_DEFAULT); + + if (ifp) + assert((ifp->ifindex == ifan->ifan_index) + || (ifp->ifindex == IFINDEX_INTERNAL)); + + if ((ifp == NULL) || ((ifp->ifindex == IFINDEX_INTERNAL) + && (ifan->ifan_what == IFAN_ARRIVAL))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: creating interface for ifindex %d, name %s", + __func__, ifan->ifan_index, ifan->ifan_name); + + /* Create Interface */ + ifp = if_get_by_name(ifan->ifan_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_set_index(ifp, ifan->ifan_index); + + if_get_metric(ifp); + if_add_update(ifp); + } else if (ifp != NULL && ifan->ifan_what == IFAN_DEPARTURE) + if_delete_update(&ifp); + + if (ifp) { + if_get_flags(ifp); + if_get_mtu(ifp); + if_get_metric(ifp); + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: interface %s index %d", __func__, + ifan->ifan_name, ifan->ifan_index); + + return 0; +} +#endif /* RTM_IFANNOUNCE */ + +#ifdef HAVE_BSD_IFI_LINK_STATE +/* BSD link detect translation */ +static void bsd_linkdetect_translate(struct if_msghdr *ifm) +{ + if ((ifm->ifm_data.ifi_link_state >= LINK_STATE_UP) + || (ifm->ifm_data.ifi_link_state == LINK_STATE_UNKNOWN)) + SET_FLAG(ifm->ifm_flags, IFF_RUNNING); + else + UNSET_FLAG(ifm->ifm_flags, IFF_RUNNING); +} +#endif /* HAVE_BSD_IFI_LINK_STATE */ + +static enum zebra_link_type sdl_to_zebra_link_type(unsigned int sdlt) +{ + switch (sdlt) { + case IFT_ETHER: + return ZEBRA_LLT_ETHER; + case IFT_X25: + return ZEBRA_LLT_X25; + case IFT_FDDI: + return ZEBRA_LLT_FDDI; + case IFT_PPP: + return ZEBRA_LLT_PPP; + case IFT_LOOP: + return ZEBRA_LLT_LOOPBACK; + case IFT_SLIP: + return ZEBRA_LLT_SLIP; + case IFT_ARCNET: + return ZEBRA_LLT_ARCNET; + case IFT_ATM: + return ZEBRA_LLT_ATM; + case IFT_LOCALTALK: + return ZEBRA_LLT_LOCALTLK; + case IFT_HIPPI: + return ZEBRA_LLT_HIPPI; +#ifdef IFT_IEEE1394 + case IFT_IEEE1394: + return ZEBRA_LLT_IEEE1394; +#endif + + default: + return ZEBRA_LLT_UNKNOWN; + } +} + +/* + * Handle struct if_msghdr obtained from reading routing socket or + * sysctl (from interface_list). There may or may not be sockaddrs + * present after the header. + */ +int ifm_read(struct if_msghdr *ifm) +{ + struct interface *ifp = NULL; + struct sockaddr_dl *sdl = NULL; + char ifname[IFNAMSIZ]; + short ifnlen = 0; + int maskbit; + caddr_t cp; + char fbuf[64]; + + /* terminate ifname at head (for strnlen) and tail (for safety) */ + ifname[IFNAMSIZ - 1] = '\0'; + + /* paranoia: sanity check structure */ + if (ifm->ifm_msglen < sizeof(struct if_msghdr)) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s: ifm->ifm_msglen %d too short", __func__, + ifm->ifm_msglen); + return -1; + } + + /* + * Check for a sockaddr_dl following the message. First, point to + * where a socakddr might be if one follows the message. + */ + cp = (void *)(ifm + 1); + + /* Look up for RTA_IFP and skip others. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & ifm->ifm_addrs) == 0) + continue; + if (maskbit != RTA_IFP) { + cp += rta_get(cp, NULL, 0); + continue; + } + + /* Save the pointer to the structure. */ + sdl = (struct sockaddr_dl *)cp; + cp += rta_getsdlname(cp, ifname, &ifnlen); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: sdl ifname %s addrs {%s}", __func__, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifm_addrs, fbuf, sizeof(fbuf))); + + /* + * Look up on ifindex first, because ifindices are the primary handle + * for + * interfaces across the user/kernel boundary, for most systems. (Some + * messages, such as up/down status changes on NetBSD, do not include a + * sockaddr_dl). + */ + if ((ifp = if_lookup_by_index(ifm->ifm_index, VRF_DEFAULT)) != NULL) { + /* we have an ifp, verify that the name matches as some systems, + * eg Solaris, have a 1:many association of ifindex:ifname + * if they dont match, we dont have the correct ifp and should + * set it back to NULL to let next check do lookup by name + */ + if (ifnlen && (strncmp(ifp->name, ifname, IFNAMSIZ) != 0)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: ifp name %s doesn't match sdl name %s", + __func__, ifp->name, ifname); + ifp = NULL; + } + } + + /* + * If we dont have an ifp, try looking up by name. Particularly as some + * systems (Solaris) have a 1:many mapping of ifindex:ifname - the + * ifname + * is therefore our unique handle to that interface. + * + * Interfaces specified in the configuration file for which the ifindex + * has not been determined will have ifindex == IFINDEX_INTERNAL, and + * such + * interfaces are found by this search, and then their ifindex values + * can + * be filled in. + */ + if ((ifp == NULL) && ifnlen) + ifp = if_lookup_by_name(ifname, VRF_DEFAULT); + + /* + * If ifp still does not exist or has an invalid index + * (IFINDEX_INTERNAL), + * create or fill in an interface. + */ + if ((ifp == NULL) || (ifp->ifindex == IFINDEX_INTERNAL)) { + /* + * To create or fill in an interface, a sockaddr_dl (via + * RTA_IFP) is required. + */ + if (!ifnlen) { + zlog_debug("Interface index %d (new) missing ifname", + ifm->ifm_index); + return -1; + } + +#ifndef RTM_IFANNOUNCE + /* Down->Down interface should be ignored here. + * See further comment below. + */ + if (!CHECK_FLAG(ifm->ifm_flags, IFF_UP)) + return 0; +#endif /* !RTM_IFANNOUNCE */ + + if (ifp == NULL) { + /* Interface that zebra was not previously aware of, so + * create. */ + ifp = if_get_by_name(ifname, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: creating ifp for ifindex %d", + __func__, ifm->ifm_index); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: updated/created ifp, ifname %s, ifindex %d", + __func__, ifp->name, ifp->ifindex); + /* + * Fill in newly created interface structure, or larval + * structure with ifindex IFINDEX_INTERNAL. + */ + if_set_index(ifp, ifm->ifm_index); + +#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */ + bsd_linkdetect_translate(ifm); +#endif /* HAVE_BSD_IFI_LINK_STATE */ + + if_flags_update(ifp, ifm->ifm_flags); +#if defined(__bsdi__) + if_kvm_get_mtu(ifp); +#else + if_get_mtu(ifp); +#endif /* __bsdi__ */ + if_get_metric(ifp); + + /* + * XXX sockaddr_dl contents can be larger than the structure + * definition. There are 2 big families here: + * - BSD has sdl_len + sdl_data[16] + overruns sdl_data + * we MUST use sdl_len here or we'll truncate data. + * - Solaris has no sdl_len, but sdl_data[244] + * presumably, it's not going to run past that, so sizeof() + * is fine here. + * a nonzero ifnlen from rta_getsdlname() means sdl is valid + */ + ifp->ll_type = ZEBRA_LLT_UNKNOWN; + ifp->hw_addr_len = 0; + if (ifnlen) { +#ifdef HAVE_STRUCT_SOCKADDR_DL_SDL_LEN + memcpy(&((struct zebra_if *)ifp->info)->sdl, sdl, + sdl->sdl_len); +#else + memcpy(&((struct zebra_if *)ifp->info)->sdl, sdl, + sizeof(struct sockaddr_dl)); +#endif /* HAVE_STRUCT_SOCKADDR_DL_SDL_LEN */ + + ifp->ll_type = sdl_to_zebra_link_type(sdl->sdl_type); + if (sdl->sdl_alen <= sizeof(ifp->hw_addr)) { + memcpy(ifp->hw_addr, LLADDR(sdl), + sdl->sdl_alen); + ifp->hw_addr_len = sdl->sdl_alen; + } + } + + if_add_update(ifp); + } else + /* + * Interface structure exists. Adjust stored flags from + * notification. If interface has up->down or down->up + * transition, call state change routines (to adjust routes, + * notify routing daemons, etc.). (Other flag changes are stored + * but apparently do not trigger action.) + */ + { + if (ifp->ifindex != ifm->ifm_index) { + zlog_debug( + "%s: index mismatch, ifname %s, ifp index %d, ifm index %d", + __func__, ifp->name, ifp->ifindex, + ifm->ifm_index); + return -1; + } + +#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */ + bsd_linkdetect_translate(ifm); +#endif /* HAVE_BSD_IFI_LINK_STATE */ + + /* update flags and handle operative->inoperative transition, if + * any */ + if_flags_update(ifp, ifm->ifm_flags); + +#ifndef RTM_IFANNOUNCE + if (!if_is_up(ifp)) { + /* No RTM_IFANNOUNCE on this platform, so we can never + * distinguish between ~IFF_UP and delete. We must + * presume + * it has been deleted. + * Eg, Solaris will not notify us of unplumb. + * + * XXX: Fixme - this should be runtime detected + * So that a binary compiled on a system with IFANNOUNCE + * will still behave correctly if run on a platform + * without + */ + if_delete_update(&ifp); + } +#endif /* RTM_IFANNOUNCE */ + if (ifp && if_is_up(ifp)) { +#if defined(__bsdi__) + if_kvm_get_mtu(ifp); +#else + if_get_mtu(ifp); +#endif /* __bsdi__ */ + if_get_metric(ifp); + } + } + + if (ifp) { +#ifdef HAVE_NET_RT_IFLIST + ifp->stats = ifm->ifm_data; +#endif /* HAVE_NET_RT_IFLIST */ + ifp->speed = ifm->ifm_data.ifi_baudrate / 1000000; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: interface %s index %d", __func__, + ifp->name, ifp->ifindex); + } + + return 0; +} + +/* Address read from struct ifa_msghdr. */ +static void ifam_read_mesg(struct ifa_msghdr *ifm, union sockunion *addr, + union sockunion *mask, union sockunion *brd, + char *ifname, short *ifnlen) +{ + caddr_t pnt, end; + union sockunion dst; + union sockunion gateway; + int maskbit; + char fbuf[64]; + + pnt = (caddr_t)(ifm + 1); + end = ((caddr_t)ifm) + ifm->ifam_msglen; + + /* Be sure structure is cleared */ + memset(mask, 0, sizeof(union sockunion)); + memset(addr, 0, sizeof(union sockunion)); + memset(brd, 0, sizeof(union sockunion)); + memset(&dst, 0, sizeof(union sockunion)); + memset(&gateway, 0, sizeof(union sockunion)); + + /* We fetch each socket variable into sockunion. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & ifm->ifam_addrs) == 0) + continue; + + switch (maskbit) { + case RTA_DST: + pnt += rta_get(pnt, &dst, sizeof(dst)); + break; + case RTA_GATEWAY: + pnt += rta_get(pnt, &gateway, sizeof(gateway)); + break; + case RTA_NETMASK: + pnt += rta_getattr(pnt, mask, sizeof(*mask)); + break; + case RTA_IFP: + pnt += rta_getsdlname(pnt, ifname, ifnlen); + break; + case RTA_IFA: + pnt += rta_get(pnt, addr, sizeof(*addr)); + break; + case RTA_BRD: + pnt += rta_get(pnt, brd, sizeof(*brd)); + break; + + default: + pnt += rta_get(pnt, NULL, 0); + break; + } + + if (pnt > end) { + zlog_warn("%s: overflow detected (pnt:%p end:%p)", + __func__, pnt, end); + break; + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + switch (sockunion_family(addr)) { + case AF_INET: + case AF_INET6: { + int masklen = + (sockunion_family(addr) == AF_INET) + ? ip_masklen(mask->sin.sin_addr) + : ip6_masklen(mask->sin6.sin6_addr); + zlog_debug( + "%s: ifindex %d, ifname %s, ifam_addrs {%s}, ifam_flags 0x%x, addr %pSU/%d broad %pSU dst %pSU gateway %pSU", + __func__, ifm->ifam_index, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifam_addrs, fbuf, sizeof(fbuf)), + ifm->ifam_flags, addr, masklen, brd, &dst, + &gateway); + } break; + default: + zlog_debug("%s: ifindex %d, ifname %s, ifam_addrs {%s}", + __func__, ifm->ifam_index, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifam_addrs, fbuf, + sizeof(fbuf))); + break; + } + } + + /* Assert read up end point matches to end point */ + pnt = (caddr_t)ROUNDUP((size_t)pnt); + if (pnt != (caddr_t)ROUNDUP((size_t)end)) + zlog_debug("ifam_read() doesn't read all socket data"); +} + +/* Interface's address information get. */ +int ifam_read(struct ifa_msghdr *ifam) +{ + struct interface *ifp = NULL; + union sockunion addr, mask, brd; + bool dest_same = false; + char ifname[INTERFACE_NAMSIZ]; + short ifnlen = 0; + bool isalias = false; + uint32_t flags = 0; + + ifname[0] = ifname[INTERFACE_NAMSIZ - 1] = '\0'; + + /* Allocate and read address information. */ + ifam_read_mesg(ifam, &addr, &mask, &brd, ifname, &ifnlen); + + if ((ifp = if_lookup_by_index(ifam->ifam_index, VRF_DEFAULT)) == NULL) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: no interface for ifname %s, index %d", __func__, + ifname, ifam->ifam_index); + return -1; + } + + if (ifnlen && strncmp(ifp->name, ifname, INTERFACE_NAMSIZ)) + isalias = true; + + /* + * Mark the alias prefixes as secondary + */ + if (isalias) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* N.B. The info in ifa_msghdr does not tell us whether the RTA_BRD + field contains a broadcast address or a peer address, so we are + forced to + rely upon the interface type. */ + if (if_is_pointopoint(ifp)) + SET_FLAG(flags, ZEBRA_IFA_PEER); + else { + if (memcmp(&addr, &brd, sizeof(addr)) == 0) + dest_same = true; + } + +#if 0 + /* it might seem cute to grab the interface metric here, however + * we're processing an address update message, and so some systems + * (e.g. FBSD) dont bother to fill in ifam_metric. Disabled, but left + * in deliberately, as comment. + */ + ifp->metric = ifam->ifam_metric; +#endif + + /* Add connected address. */ + switch (sockunion_family(&addr)) { + case AF_INET: + if (ifam->ifam_type == RTM_NEWADDR) + connected_add_ipv4(ifp, flags, &addr.sin.sin_addr, + ip_masklen(mask.sin.sin_addr), + dest_same ? NULL : &brd.sin.sin_addr, + (isalias ? ifname : NULL), + METRIC_MAX); + else + connected_delete_ipv4(ifp, flags, &addr.sin.sin_addr, + ip_masklen(mask.sin.sin_addr), + dest_same ? NULL + : &brd.sin.sin_addr); + break; + case AF_INET6: + /* Unset interface index from link-local address when IPv6 stack + is KAME. */ + if (IN6_IS_ADDR_LINKLOCAL(&addr.sin6.sin6_addr)) { + SET_IN6_LINKLOCAL_IFINDEX(addr.sin6.sin6_addr, 0); + } + + if (ifam->ifam_type == RTM_NEWADDR) + connected_add_ipv6(ifp, flags, &addr.sin6.sin6_addr, + NULL, + ip6_masklen(mask.sin6.sin6_addr), + (isalias ? ifname : NULL), + METRIC_MAX); + else + connected_delete_ipv6(ifp, &addr.sin6.sin6_addr, NULL, + ip6_masklen(mask.sin6.sin6_addr)); + break; + default: + /* Unsupported family silently ignore... */ + break; + } + + /* Check interface flag for implicit up of the interface. */ + if_refresh(ifp); + + return 0; +} + +/* Interface function for reading kernel routing table information. */ +static int rtm_read_mesg(struct rt_msghdr *rtm, union sockunion *dest, + union sockunion *mask, union sockunion *gate, + char *ifname, short *ifnlen) +{ + caddr_t pnt, end; + int maskbit; + + /* Pnt points out socket data start point. */ + pnt = (caddr_t)(rtm + 1); + end = ((caddr_t)rtm) + rtm->rtm_msglen; + + /* rt_msghdr version check. */ + if (rtm->rtm_version != RTM_VERSION) + flog_warn(EC_ZEBRA_RTM_VERSION_MISMATCH, + "Routing message version different %d should be %d.This may cause problem", + rtm->rtm_version, RTM_VERSION); + + /* Be sure structure is cleared */ + memset(dest, 0, sizeof(union sockunion)); + memset(gate, 0, sizeof(union sockunion)); + memset(mask, 0, sizeof(union sockunion)); + + /* We fetch each socket variable into sockunion. */ + /* We fetch each socket variable into sockunion. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & rtm->rtm_addrs) == 0) + continue; + + switch (maskbit) { + case RTA_DST: + pnt += rta_get(pnt, dest, sizeof(*dest)); + break; + case RTA_GATEWAY: + pnt += rta_get(pnt, gate, sizeof(*gate)); + break; + case RTA_NETMASK: + pnt += rta_getattr(pnt, mask, sizeof(*mask)); + break; + case RTA_IFP: + pnt += rta_getsdlname(pnt, ifname, ifnlen); + break; + + default: + pnt += rta_get(pnt, NULL, 0); + break; + } + + if (pnt > end) { + zlog_warn("%s: overflow detected (pnt:%p end:%p)", + __func__, pnt, end); + break; + } + } + + /* If there is netmask information set it's family same as + destination family*/ + if (rtm->rtm_addrs & RTA_NETMASK) + mask->sa.sa_family = dest->sa.sa_family; + + /* Assert read up to the end of pointer. */ + if (pnt != end) + zlog_debug("rtm_read() doesn't read all socket data."); + + return rtm->rtm_flags; +} + +void rtm_read(struct rt_msghdr *rtm) +{ + int flags; + uint32_t zebra_flags; + union sockunion dest, mask, gate; + char ifname[INTERFACE_NAMSIZ + 1]; + short ifnlen = 0; + struct nexthop nh; + struct prefix p; + ifindex_t ifindex = 0; + afi_t afi; + char fbuf[64]; + int32_t proto = ZEBRA_ROUTE_KERNEL; + uint8_t distance = 0; + + zebra_flags = 0; + + /* Read destination and netmask and gateway from rtm message + structure. */ + flags = rtm_read_mesg(rtm, &dest, &mask, &gate, ifname, &ifnlen); + if (!(flags & RTF_DONE)) + return; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: got rtm of type %d (%s) addrs {%s}", __func__, + rtm->rtm_type, + lookup_msg(rtm_type_str, rtm->rtm_type, NULL), + rtatostr(rtm->rtm_addrs, fbuf, sizeof(fbuf))); + +#ifdef RTF_CLONED /*bsdi, netbsd 1.6*/ + if (flags & RTF_CLONED) + return; +#endif +#ifdef RTF_WASCLONED /*freebsd*/ + if (flags & RTF_WASCLONED) + return; +#endif + + if ((rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) + && !(flags & RTF_UP)) + return; + + /* This is connected route. */ + if (!(flags & RTF_GATEWAY)) + return; + + if (flags & RTF_PROTO1) { + SET_FLAG(zebra_flags, ZEBRA_FLAG_SELFROUTE); + proto = ZEBRA_ROUTE_STATIC; + distance = 255; + } + + memset(&nh, 0, sizeof(nh)); + + nh.vrf_id = VRF_DEFAULT; + /* This is a reject or blackhole route */ + if (flags & RTF_REJECT) { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_REJECT; + } else if (flags & RTF_BLACKHOLE) { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_NULL; + } + + /* + * Ignore our own messages. + */ + if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid) + return; + + if (dest.sa.sa_family == AF_INET) { + afi = AFI_IP; + p.family = AF_INET; + p.u.prefix4 = dest.sin.sin_addr; + if (flags & RTF_HOST) + p.prefixlen = IPV4_MAX_BITLEN; + else + p.prefixlen = ip_masklen(mask.sin.sin_addr); + + if (!nh.type) { + nh.type = NEXTHOP_TYPE_IPV4; + nh.gate.ipv4 = gate.sin.sin_addr; + } + } else if (dest.sa.sa_family == AF_INET6) { + afi = AFI_IP6; + p.family = AF_INET6; + p.u.prefix6 = dest.sin6.sin6_addr; + if (flags & RTF_HOST) + p.prefixlen = IPV6_MAX_BITLEN; + else + p.prefixlen = ip6_masklen(mask.sin6.sin6_addr); + +#ifdef KAME + if (IN6_IS_ADDR_LINKLOCAL(&gate.sin6.sin6_addr)) { + ifindex = IN6_LINKLOCAL_IFINDEX(gate.sin6.sin6_addr); + SET_IN6_LINKLOCAL_IFINDEX(gate.sin6.sin6_addr, 0); + } +#endif /* KAME */ + + if (!nh.type) { + nh.type = ifindex ? NEXTHOP_TYPE_IPV6_IFINDEX + : NEXTHOP_TYPE_IPV6; + nh.gate.ipv6 = gate.sin6.sin6_addr; + nh.ifindex = ifindex; + } + } else + return; + + if (rtm->rtm_type == RTM_GET || rtm->rtm_type == RTM_ADD + || rtm->rtm_type == RTM_CHANGE) + rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, proto, 0, zebra_flags, + &p, NULL, &nh, 0, rt_table_main_id, 0, 0, distance, 0, + false); + else + rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, proto, 0, + zebra_flags, &p, NULL, &nh, 0, rt_table_main_id, 0, + distance, true); +} + +/* Interface function for the kernel routing table updates. Support + * for RTM_CHANGE will be needed. + * Exported only for rt_socket.c + */ +int rtm_write(int message, union sockunion *dest, union sockunion *mask, + union sockunion *gate, union sockunion *mpls, unsigned int index, + enum blackhole_type bh_type, int metric) +{ + int ret; + caddr_t pnt; + struct interface *ifp; + + /* Sequencial number of routing message. */ + static int msg_seq = 0; + + /* Struct of rt_msghdr and buffer for storing socket's data. */ + struct { + struct rt_msghdr rtm; + char buf[512]; + } msg; + + if (dplane_routing_sock < 0) + return ZEBRA_ERR_EPERM; + + /* Clear and set rt_msghdr values */ + memset(&msg, 0, sizeof(msg)); + msg.rtm.rtm_version = RTM_VERSION; + msg.rtm.rtm_type = message; + msg.rtm.rtm_seq = msg_seq++; + msg.rtm.rtm_addrs = RTA_DST; + msg.rtm.rtm_addrs |= RTA_GATEWAY; + msg.rtm.rtm_flags = RTF_UP; +#ifdef __OpenBSD__ + msg.rtm.rtm_flags |= RTF_MPATH; + msg.rtm.rtm_fmask = RTF_MPLS; +#endif + msg.rtm.rtm_index = index; + + if (metric != 0) { + msg.rtm.rtm_rmx.rmx_hopcount = metric; + msg.rtm.rtm_inits |= RTV_HOPCOUNT; + } + + ifp = if_lookup_by_index(index, VRF_DEFAULT); + + if (gate && (message == RTM_ADD || message == RTM_CHANGE)) + msg.rtm.rtm_flags |= RTF_GATEWAY; + +/* When RTF_CLONING is unavailable on BSD, should we set some + * other flag instead? + */ +#ifdef RTF_CLONING + if (!gate && (message == RTM_ADD || message == RTM_CHANGE) && ifp + && (ifp->flags & IFF_POINTOPOINT) == 0) + msg.rtm.rtm_flags |= RTF_CLONING; +#endif /* RTF_CLONING */ + + /* If no protocol specific gateway is specified, use link + address for gateway. */ + if (!gate) { + if (!ifp) { + char dest_buf[INET_ADDRSTRLEN] = "NULL", + mask_buf[INET_ADDRSTRLEN] = "255.255.255.255"; + if (dest) + inet_ntop(AF_INET, &dest->sin.sin_addr, + dest_buf, INET_ADDRSTRLEN); + if (mask) + inet_ntop(AF_INET, &mask->sin.sin_addr, + mask_buf, INET_ADDRSTRLEN); + flog_warn( + EC_ZEBRA_RTM_NO_GATEWAY, + "%s: %s/%s: gate == NULL and no gateway found for ifindex %d", + __func__, dest_buf, mask_buf, index); + return -1; + } + gate = (union sockunion *)&((struct zebra_if *)ifp->info)->sdl; + } + + if (mask) + msg.rtm.rtm_addrs |= RTA_NETMASK; + else if (message == RTM_ADD || message == RTM_CHANGE) + msg.rtm.rtm_flags |= RTF_HOST; + +#ifdef __OpenBSD__ + if (mpls) { + msg.rtm.rtm_addrs |= RTA_SRC; + msg.rtm.rtm_flags |= RTF_MPLS; + + if (mpls->smpls.smpls_label + != htonl(MPLS_LABEL_IMPLICIT_NULL << MPLS_LABEL_OFFSET)) + msg.rtm.rtm_mpls = MPLS_OP_PUSH; + } +#endif + + /* Tagging route with flags */ + msg.rtm.rtm_flags |= (RTF_PROTO1); + + switch (bh_type) { + case BLACKHOLE_UNSPEC: + break; + case BLACKHOLE_REJECT: + msg.rtm.rtm_flags |= RTF_REJECT; + break; + case BLACKHOLE_NULL: + case BLACKHOLE_ADMINPROHIB: + msg.rtm.rtm_flags |= RTF_BLACKHOLE; + break; + } + + +#define SOCKADDRSET(X, R) \ + if (msg.rtm.rtm_addrs & (R)) { \ + int len = SAROUNDUP(X); \ + memcpy(pnt, (caddr_t)(X), len); \ + pnt += len; \ + } + + pnt = (caddr_t)msg.buf; + + /* Write each socket data into rtm message buffer */ + SOCKADDRSET(dest, RTA_DST); + SOCKADDRSET(gate, RTA_GATEWAY); + SOCKADDRSET(mask, RTA_NETMASK); +#ifdef __OpenBSD__ + SOCKADDRSET(mpls, RTA_SRC); +#endif + + msg.rtm.rtm_msglen = pnt - (caddr_t)&msg; + + ret = write(dplane_routing_sock, &msg, msg.rtm.rtm_msglen); + + if (ret != msg.rtm.rtm_msglen) { + if (errno == EEXIST) + return ZEBRA_ERR_RTEXIST; + if (errno == ENETUNREACH) + return ZEBRA_ERR_RTUNREACH; + if (errno == ESRCH) + return ZEBRA_ERR_RTNOEXIST; + + flog_err_sys(EC_LIB_SOCKET, "%s: write : %s (%d)", __func__, + safe_strerror(errno), errno); + return ZEBRA_ERR_KERNEL; + } + return ZEBRA_ERR_NOERROR; +} + + +#include "frrevent.h" +#include "zebra/zserv.h" + +/* For debug purpose. */ +static void rtmsg_debug(struct rt_msghdr *rtm) +{ + char fbuf[64]; + + zlog_debug("Kernel: Len: %d Type: %s", rtm->rtm_msglen, + lookup_msg(rtm_type_str, rtm->rtm_type, NULL)); + rtm_flag_dump(rtm->rtm_flags); + zlog_debug("Kernel: message seq %d", rtm->rtm_seq); + zlog_debug("Kernel: pid %lld, rtm_addrs {%s}", (long long)rtm->rtm_pid, + rtatostr(rtm->rtm_addrs, fbuf, sizeof(fbuf))); +} + +/* This is pretty gross, better suggestions welcome -- mhandler */ +#ifndef RTAX_MAX +#ifdef RTA_NUMBITS +#define RTAX_MAX RTA_NUMBITS +#else +#define RTAX_MAX 8 +#endif /* RTA_NUMBITS */ +#endif /* RTAX_MAX */ + +/* Kernel routing table and interface updates via routing socket. */ +static void kernel_read(struct event *thread) +{ + int sock; + int nbytes; + struct rt_msghdr *rtm; + + /* + * This must be big enough for any message the kernel might send. + * Rather than determining how many sockaddrs of what size might be + * in each particular message, just use RTAX_MAX of sockaddr_storage + * for each. Note that the sockaddrs must be after each message + * definition, or rather after whichever happens to be the largest, + * since the buffer needs to be big enough for a message and the + * sockaddrs together. + */ + union { + /* Routing information. */ + struct { + struct rt_msghdr rtm; + struct sockaddr_storage addr[RTAX_MAX]; + } r; + + /* Interface information. */ + struct { + struct if_msghdr ifm; + struct sockaddr_storage addr[RTAX_MAX]; + } im; + + /* Interface address information. */ + struct { + struct ifa_msghdr ifa; + struct sockaddr_storage addr[RTAX_MAX]; + } ia; + +#ifdef RTM_IFANNOUNCE + /* Interface arrival/departure */ + struct { + struct if_announcemsghdr ifan; + struct sockaddr_storage addr[RTAX_MAX]; + } ian; +#endif /* RTM_IFANNOUNCE */ + + } buf; + + /* Fetch routing socket. */ + sock = EVENT_FD(thread); + + nbytes = read(sock, &buf, sizeof(buf)); + + if (nbytes < 0) { + if (errno == ENOBUFS) { +#ifdef __FreeBSD__ + /* + * ENOBUFS indicates a temporary resource + * shortage and is not harmful for consistency of + * reading the routing socket. Ignore it. + */ + event_add_read(zrouter.master, kernel_read, NULL, sock, + NULL); + return; +#else + flog_err(EC_ZEBRA_RECVMSG_OVERRUN, + "routing socket overrun: %s", + safe_strerror(errno)); + /* + * In this case we are screwed. + * There is no good way to + * recover zebra at this point. + */ + exit(-1); +#endif + } + if (errno != EAGAIN && errno != EWOULDBLOCK) + flog_err_sys(EC_LIB_SOCKET, "routing socket error: %s", + safe_strerror(errno)); + return; + } + + if (nbytes == 0) + return; + + event_add_read(zrouter.master, kernel_read, NULL, sock, NULL); + + if (IS_ZEBRA_DEBUG_KERNEL) + rtmsg_debug(&buf.r.rtm); + + rtm = &buf.r.rtm; + + /* + * Ensure that we didn't drop any data, so that processing routines + * can assume they have the whole message. + */ + if (rtm->rtm_msglen != nbytes) { + zlog_debug("%s: rtm->rtm_msglen %d, nbytes %d, type %d", + __func__, rtm->rtm_msglen, nbytes, rtm->rtm_type); + return; + } + + switch (rtm->rtm_type) { + case RTM_ADD: + case RTM_DELETE: + case RTM_CHANGE: + rtm_read(rtm); + break; + case RTM_IFINFO: + ifm_read(&buf.im.ifm); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + ifam_read(&buf.ia.ifa); + break; +#ifdef RTM_IFANNOUNCE + case RTM_IFANNOUNCE: + ifan_read(&buf.ian.ifan); + break; +#endif /* RTM_IFANNOUNCE */ + default: + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Unprocessed RTM_type: %s(%d)", + lookup_msg(rtm_type_str, rtm->rtm_type, NULL), + rtm->rtm_type); + break; + } +} + +/* Make routing socket. */ +static void routing_socket(struct zebra_ns *zns) +{ + uint32_t default_rcvbuf; + socklen_t optlen; + + frr_with_privs(&zserv_privs) { + routing_sock = ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + + dplane_routing_sock = + ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + } + + if (routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "Can't init kernel routing socket"); + return; + } + + if (dplane_routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't init kernel dataplane routing socket"); + return; + } + +#ifdef SO_RERROR + /* Allow reporting of route(4) buffer overflow errors */ + int n = 1; + + if (setsockopt(routing_sock, SOL_SOCKET, SO_RERROR, &n, sizeof(n)) < 0) + flog_err_sys(EC_LIB_SOCKET, + "Can't set SO_RERROR on routing socket"); +#endif + + /* XXX: Socket should be NONBLOCK, however as we currently + * discard failed writes, this will lead to inconsistencies. + * For now, socket must be blocking. + */ + /*if (fcntl (routing_sock, F_SETFL, O_NONBLOCK) < 0) + zlog_warn ("Can't set O_NONBLOCK to routing socket");*/ + + /* + * Attempt to set a more useful receive buffer size + */ + optlen = sizeof(default_rcvbuf); + if (getsockopt(routing_sock, SOL_SOCKET, SO_RCVBUF, &default_rcvbuf, + &optlen) == -1) + flog_err_sys(EC_LIB_SOCKET, + "routing_sock sockopt SOL_SOCKET SO_RCVBUF"); + else { + for (; rcvbufsize > default_rcvbuf && + setsockopt(routing_sock, SOL_SOCKET, SO_RCVBUF, + &rcvbufsize, sizeof(rcvbufsize)) == -1 && + errno == ENOBUFS; + rcvbufsize /= 2) + ; + } + + /* kernel_read needs rewrite. */ + event_add_read(zrouter.master, kernel_read, NULL, routing_sock, NULL); +} + +void interface_list_second(struct zebra_ns *zns) +{ +} + +void interface_list_tunneldump(struct zebra_ns *zns) +{ +} + +/* Exported interface function. This function simply calls + routing_socket (). */ +void kernel_init(struct zebra_ns *zns) +{ + routing_socket(zns); +} + +void kernel_terminate(struct zebra_ns *zns, bool complete) +{ + return; +} + +/* + * Global init for platform-/OS-specific things + */ +void kernel_router_init(void) +{ +} + +/* + * Global deinit for platform-/OS-specific things + */ +void kernel_router_terminate(void) +{ +} + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info) +{ + return 0; +} + +void kernel_update_multi(struct dplane_ctx_list_head *ctx_list) +{ + struct zebra_dplane_ctx *ctx; + struct dplane_ctx_list_head handled_list; + enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS; + + dplane_ctx_q_init(&handled_list); + + while (true) { + ctx = dplane_ctx_dequeue(ctx_list); + if (ctx == NULL) + break; + + /* + * A previous provider plugin may have asked to skip the + * kernel update. + */ + if (dplane_ctx_is_skip_kernel(ctx)) { + res = ZEBRA_DPLANE_REQUEST_SUCCESS; + goto skip_one; + } + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + res = kernel_route_update(ctx); + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + res = kernel_nexthop_update(ctx); + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + res = kernel_lsp_update(ctx); + break; + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + res = kernel_pw_update(ctx); + break; + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + res = kernel_address_update_ctx(ctx); + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + res = kernel_mac_update_ctx(ctx); + break; + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + res = kernel_neigh_update_ctx(ctx); + break; + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + res = kernel_pbr_rule_update(ctx); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + res = kernel_intf_update(ctx); + break; + + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + res = kernel_tc_update(ctx); + break; + + /* Ignore 'notifications' - no-op */ + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + res = ZEBRA_DPLANE_REQUEST_SUCCESS; + break; + + case DPLANE_OP_INTF_NETCONFIG: + res = kernel_intf_netconf_update(ctx); + break; + + case DPLANE_OP_NONE: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_STARTUP_STAGE: + zlog_err("Unhandled dplane data for %s", + dplane_op2str(dplane_ctx_get_op(ctx))); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + } + + skip_one: + dplane_ctx_set_status(ctx, res); + + dplane_ctx_enqueue_tail(&handled_list, ctx); + } + + dplane_ctx_q_init(ctx_list); + dplane_ctx_list_append(ctx_list, &handled_list); +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/kernel_socket.h b/zebra/kernel_socket.h new file mode 100644 index 0000000..0d29abd --- /dev/null +++ b/zebra/kernel_socket.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Exported kernel_socket functions, exported only for convenience of + * sysctl methods. + */ + +#ifndef __ZEBRA_KERNEL_SOCKET_H +#define __ZEBRA_KERNEL_SOCKET_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Error codes of zebra. */ +#define ZEBRA_ERR_NOERROR 0 +#define ZEBRA_ERR_RTEXIST -1 +#define ZEBRA_ERR_RTUNREACH -2 +#define ZEBRA_ERR_EPERM -3 +#define ZEBRA_ERR_RTNOEXIST -4 +#define ZEBRA_ERR_KERNEL -5 + +extern void rtm_read(struct rt_msghdr *); +extern int ifam_read(struct ifa_msghdr *); +extern int ifm_read(struct if_msghdr *); +extern int rtm_write(int, union sockunion *, union sockunion *, + union sockunion *, union sockunion *, unsigned int, + enum blackhole_type, int); +extern const struct message rtm_type_str[]; + +#ifdef __cplusplus +} +#endif + +#endif /* __ZEBRA_KERNEL_SOCKET_H */ diff --git a/zebra/label_manager.c b/zebra/label_manager.c new file mode 100644 index 0000000..fa7dbb0 --- /dev/null +++ b/zebra/label_manager.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Label Manager for FRR + * + * Copyright (C) 2017 by Bingen Eguzkitza, + * Volta Networks Inc. + * + * This file is part of FRRouting (FRR) + */ + +#include +#include +#include +#include + +#include "lib/log.h" +#include "lib/memory.h" +#include "lib/mpls.h" +#include "lib/network.h" +#include "lib/stream.h" +#include "lib/zclient.h" +#include "lib/libfrr.h" + +//#include "zebra/zserv.h" +#include "zebra/zebra_router.h" +#include "zebra/label_manager.h" +#include "zebra/zebra_errors.h" +#include "zebra/zapi_msg.h" +#include "zebra/debug.h" + +#include "zebra/label_manager_clippy.c" + +#define CONNECTION_DELAY 5 + +struct label_manager lbl_mgr; + +DEFINE_MGROUP(LBL_MGR, "Label Manager"); +DEFINE_MTYPE_STATIC(LBL_MGR, LM_CHUNK, "Label Manager Chunk"); + +/* define hooks for the basic API, so that it can be specialized or served + * externally + */ + +DEFINE_HOOK(lm_client_connect, (struct zserv *client, vrf_id_t vrf_id), + (client, vrf_id)); +DEFINE_HOOK(lm_client_disconnect, (struct zserv *client), (client)); +DEFINE_HOOK(lm_get_chunk, + (struct label_manager_chunk * *lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, vrf_id_t vrf_id), + (lmc, client, keep, size, base, vrf_id)); +DEFINE_HOOK(lm_release_chunk, + (struct zserv *client, uint32_t start, uint32_t end), + (client, start, end)); +DEFINE_HOOK(lm_cbs_inited, (), ()); + +/* define wrappers to be called in zapi_msg.c (as hooks must be called in + * source file where they were defined) + */ +void lm_client_connect_call(struct zserv *client, vrf_id_t vrf_id) +{ + hook_call(lm_client_connect, client, vrf_id); +} +void lm_get_chunk_call(struct label_manager_chunk **lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, + vrf_id_t vrf_id) +{ + hook_call(lm_get_chunk, lmc, client, keep, size, base, vrf_id); +} +void lm_release_chunk_call(struct zserv *client, uint32_t start, uint32_t end) +{ + hook_call(lm_release_chunk, client, start, end); +} + +/* forward declarations of the static functions to be used for some hooks */ +static int label_manager_connect(struct zserv *client, vrf_id_t vrf_id); +static int label_manager_disconnect(struct zserv *client); +static int label_manager_get_chunk(struct label_manager_chunk **lmc, + struct zserv *client, uint8_t keep, + uint32_t size, uint32_t base, + vrf_id_t vrf_id); +static int label_manager_release_label_chunk(struct zserv *client, + uint32_t start, uint32_t end); + +void delete_label_chunk(void *val) +{ + XFREE(MTYPE_LM_CHUNK, val); +} + +/** + * Release label chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @return Number of chunks released + */ +int release_daemon_label_chunks(struct zserv *client) +{ + struct listnode *node, *nnode; + struct label_manager_chunk *lmc; + int count = 0; + int ret; + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Releasing chunks for client proto %s, instance %d, session %u", + __func__, zebra_route_string(client->proto), + client->instance, client->session_id); + + for (ALL_LIST_ELEMENTS(lbl_mgr.lc_list, node, nnode, lmc)) { + if (lmc->proto == client->proto && + lmc->instance == client->instance && + lmc->session_id == client->session_id && lmc->keep == 0) { + ret = release_label_chunk(lmc->proto, lmc->instance, + lmc->session_id, + lmc->start, lmc->end); + if (ret == 0) + count++; + } + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Released %d label chunks", __func__, count); + + return count; +} + +int lm_client_disconnect_cb(struct zserv *client) +{ + hook_call(lm_client_disconnect, client); + return 0; +} + +void lm_hooks_register(void) +{ + hook_register(lm_client_connect, label_manager_connect); + hook_register(lm_client_disconnect, label_manager_disconnect); + hook_register(lm_get_chunk, label_manager_get_chunk); + hook_register(lm_release_chunk, label_manager_release_label_chunk); +} +void lm_hooks_unregister(void) +{ + hook_unregister(lm_client_connect, label_manager_connect); + hook_unregister(lm_client_disconnect, label_manager_disconnect); + hook_unregister(lm_get_chunk, label_manager_get_chunk); + hook_unregister(lm_release_chunk, label_manager_release_label_chunk); +} + +DEFPY(show_label_table, show_label_table_cmd, "show debugging label-table", + SHOW_STR + DEBUG_STR + "Display allocated label chunks\n") +{ + struct label_manager_chunk *lmc; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + vty_out(vty, "Proto %s: [%u/%u]\n", + zebra_route_string(lmc->proto), lmc->start, lmc->end); + } + + return CMD_SUCCESS; +} + +/** + * Init label manager (or proxy to an external one) + */ +void label_manager_init(void) +{ + lbl_mgr.lc_list = list_new(); + lbl_mgr.lc_list->del = delete_label_chunk; + hook_register(zserv_client_close, lm_client_disconnect_cb); + + /* register default hooks for the label manager actions */ + lm_hooks_register(); + + /* notify any external module that we are done */ + hook_call(lm_cbs_inited); + + install_element(VIEW_NODE, &show_label_table_cmd); +} + +/* alloc and fill a label chunk */ +struct label_manager_chunk * +create_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t start, uint32_t end) +{ + /* alloc chunk, fill it and return it */ + struct label_manager_chunk *lmc = + XCALLOC(MTYPE_LM_CHUNK, sizeof(struct label_manager_chunk)); + + lmc->start = start; + lmc->end = end; + lmc->proto = proto; + lmc->instance = instance; + lmc->session_id = session_id; + lmc->keep = keep; + + return lmc; +} + +/* attempt to get a specific label chunk */ +static struct label_manager_chunk * +assign_specific_label_chunk(uint8_t proto, unsigned short instance, + uint32_t session_id, uint8_t keep, uint32_t size, + uint32_t base) +{ + struct label_manager_chunk *lmc; + struct listnode *node, *next = NULL; + struct listnode *first_node = NULL; + struct listnode *last_node = NULL; + struct listnode *insert_node = NULL; + + /* precompute last label from base and size */ + uint32_t end = base + size - 1; + + /* sanities */ + if ((base < MPLS_LABEL_UNRESERVED_MIN) + || (end > MPLS_LABEL_UNRESERVED_MAX)) { + zlog_err("Invalid LM request arguments: base: %u, size: %u", + base, size); + return NULL; + } + + /* Scan the existing chunks to see if the requested range of labels + * falls inside any of such chunks */ + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + + /* skip chunks for labels < base */ + if (base > lmc->end) + continue; + + /* requested range is not covered by any existing, free chunk. + * Therefore, need to insert a chunk */ + if ((end < lmc->start) && !first_node) { + insert_node = node; + break; + } + + if (!first_node) + first_node = node; + + /* if chunk is used, cannot honor request */ + if (lmc->proto != NO_PROTO) + return NULL; + + if (end <= lmc->end) { + last_node = node; + break; + } + } + + /* insert chunk between existing chunks */ + if (insert_node) { + lmc = create_label_chunk(proto, instance, session_id, keep, + base, end); + listnode_add_before(lbl_mgr.lc_list, insert_node, lmc); + return lmc; + } + + if (first_node) { + /* get node past the last one, if there */ + if (last_node) + last_node = listnextnode(last_node); + + /* delete node coming after the above chunk whose labels are + * included in the previous one */ + for (node = first_node; node && (node != last_node); + node = next) { + struct label_manager_chunk *death; + + next = listnextnode(node); + death = listgetdata(node); + list_delete_node(lbl_mgr.lc_list, node); + delete_label_chunk(death); + } + + lmc = create_label_chunk(proto, instance, session_id, keep, + base, end); + if (last_node) + listnode_add_before(lbl_mgr.lc_list, last_node, lmc); + else + listnode_add(lbl_mgr.lc_list, lmc); + + return lmc; + } else { + /* create a new chunk past all the existing ones and link at + * tail */ + lmc = create_label_chunk(proto, instance, session_id, keep, + base, end); + listnode_add(lbl_mgr.lc_list, lmc); + return lmc; + } +} + +/** + * Core function, assigns label chunks + * + * It first searches through the list to check if there's one available + * (previously released). Otherwise it creates and assigns a new one + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param keep If set, avoid garbage collection + * @param size Size of the label chunk + * @param base Desired starting label of the chunk; if MPLS_LABEL_BASE_ANY it does not apply + * @return Pointer to the assigned label chunk, or NULL if the request could not be satisfied + */ +struct label_manager_chunk * +assign_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t size, uint32_t base) +{ + struct label_manager_chunk *lmc; + struct listnode *node; + uint32_t prev_end = MPLS_LABEL_UNRESERVED_MIN; + + /* handle chunks request with a specific base label */ + if (base != MPLS_LABEL_BASE_ANY) + return assign_specific_label_chunk(proto, instance, session_id, + keep, size, base); + + /* appease scan-build, who gets confused by the use of macros */ + assert(lbl_mgr.lc_list); + + /* first check if there's one available */ + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + if (lmc->proto == NO_PROTO + && lmc->end - lmc->start + 1 == size) { + lmc->proto = proto; + lmc->instance = instance; + lmc->session_id = session_id; + lmc->keep = keep; + return lmc; + } + /* check if we hadve a "hole" behind us that we can squeeze into + */ + if ((lmc->start > prev_end) && (lmc->start - prev_end > size)) { + lmc = create_label_chunk(proto, instance, session_id, + keep, prev_end + 1, + prev_end + size); + listnode_add_before(lbl_mgr.lc_list, node, lmc); + return lmc; + } + prev_end = lmc->end; + } + /* otherwise create a new one */ + uint32_t start_free; + + if (list_isempty(lbl_mgr.lc_list)) + start_free = MPLS_LABEL_UNRESERVED_MIN; + else + start_free = ((struct label_manager_chunk *)listgetdata( + listtail(lbl_mgr.lc_list))) + ->end + + 1; + + if (start_free > MPLS_LABEL_UNRESERVED_MAX - size + 1) { + flog_err(EC_ZEBRA_LM_EXHAUSTED_LABELS, + "Reached max labels. Start: %u, size: %u", start_free, + size); + return NULL; + } + + /* create chunk and link at tail */ + lmc = create_label_chunk(proto, instance, session_id, keep, start_free, + start_free + size - 1); + listnode_add(lbl_mgr.lc_list, lmc); + return lmc; +} + +/** + * Release label chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param client Client zapi session + * @param start First label of the chunk + * @param end Last label of the chunk + * @return 0 on success + */ +static int label_manager_release_label_chunk(struct zserv *client, + uint32_t start, uint32_t end) +{ + return release_label_chunk(client->proto, client->instance, + client->session_id, start, end); +} + +/** + * Core function, release no longer used label chunks + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param session_id Zclient session ID, to identify the zclient session + * @param start First label of the chunk + * @param end Last label of the chunk + * @return 0 on success, -1 otherwise + */ +int release_label_chunk(uint8_t proto, unsigned short instance, + uint32_t session_id, uint32_t start, uint32_t end) +{ + struct listnode *node; + struct label_manager_chunk *lmc; + int ret = -1; + + /* check that size matches */ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("Releasing label chunk: %u - %u", start, end); + /* find chunk and disown */ + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + if (lmc->start != start) + continue; + if (lmc->end != end) + continue; + if (lmc->proto != proto || lmc->instance != instance || + lmc->session_id != session_id) { + flog_err(EC_ZEBRA_LM_DAEMON_MISMATCH, + "%s: Daemon mismatch!!", __func__); + continue; + } + ret = 0; + break; + } + if (lmc) { + list_delete_node(lbl_mgr.lc_list, node); + delete_label_chunk(lmc); + } + + if (ret != 0) + flog_err(EC_ZEBRA_LM_UNRELEASED_CHUNK, + "%s: Label chunk not released!!", __func__); + + return ret; +} + +/* default functions to be called on hooks */ +static int label_manager_connect(struct zserv *client, vrf_id_t vrf_id) +{ + /* + * Release previous labels of same protocol and instance. + * This is done in case it restarted from an unexpected shutdown. + */ + release_daemon_label_chunks(client); + return zsend_label_manager_connect_response(client, vrf_id, 0); +} +static int label_manager_disconnect(struct zserv *client) +{ + release_daemon_label_chunks(client); + return 0; +} +static int label_manager_get_chunk(struct label_manager_chunk **lmc, + struct zserv *client, uint8_t keep, + uint32_t size, uint32_t base, + vrf_id_t vrf_id) +{ + *lmc = assign_label_chunk(client->proto, client->instance, + client->session_id, keep, size, base); + /* Respond to a get_chunk request */ + if (!*lmc) { + if (base == MPLS_LABEL_BASE_ANY) + flog_err(EC_ZEBRA_LM_CANNOT_ASSIGN_CHUNK, + "Unable to assign Label Chunk size %u to %s instance %u", + size, zebra_route_string(client->proto), + client->instance); + else + flog_err(EC_ZEBRA_LM_CANNOT_ASSIGN_CHUNK, + "Unable to assign Label Chunk %u - %u to %s instance %u", + base, base + size - 1, + zebra_route_string(client->proto), + client->instance); + } else if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("Assigned Label Chunk %u - %u to %s instance %u", + (*lmc)->start, (*lmc)->end, + zebra_route_string(client->proto), client->instance); + + return zsend_assign_label_chunk_response(client, vrf_id, *lmc); +} + +/* Respond to a connect request */ +int lm_client_connect_response(uint8_t proto, uint16_t instance, + uint32_t session_id, vrf_id_t vrf_id, + uint8_t result) +{ + struct zserv *client = zserv_find_client_session(proto, instance, + session_id); + if (!client) { + zlog_err("%s: could not find client for daemon %s instance %u session %u", + __func__, zebra_route_string(proto), instance, + session_id); + return 1; + } + return zsend_label_manager_connect_response(client, vrf_id, result); +} + +void label_manager_close(void) +{ + list_delete(&lbl_mgr.lc_list); +} diff --git a/zebra/label_manager.h b/zebra/label_manager.h new file mode 100644 index 0000000..74f40fa --- /dev/null +++ b/zebra/label_manager.h @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Label Manager header + * + * Copyright (C) 2017 by Bingen Eguzkitza, + * Volta Networks Inc. + * + * This file is part of FRRouting (FRR) + */ + +#ifndef _LABEL_MANAGER_H +#define _LABEL_MANAGER_H + +#include + +#include "lib/linklist.h" +#include "frrevent.h" +#include "lib/hook.h" + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NO_PROTO 0 + +/* + * Label chunk struct + * Client daemon which the chunk belongs to can be identified by a tuple of: + * proto (daemon protocol) + instance + zapi session_id + * If the client then passes a non-empty value to keep field when it requests + * for chunks, the chunks won't be garbage collected and the client will be + * responsible for releasing them. + * Otherwise, if the keep field is not set (value 0) for the chunk, it will be + * automatically released when the client disconnects or when it reconnects + * (in case it died unexpectedly, we can know it's the same because it will have + * the same proto+instance+session values) + */ +struct label_manager_chunk { + uint8_t proto; + unsigned short instance; + uint32_t session_id; + uint8_t keep; + uint32_t start; /* First label of the chunk */ + uint32_t end; /* Last label of the chunk */ +}; + +/* declare hooks for the basic API, so that it can be specialized or served + * externally. Also declare a hook when those functions have been registered, + * so that any external module wanting to replace those can react + */ + +DECLARE_HOOK(lm_client_connect, (struct zserv *client, vrf_id_t vrf_id), + (client, vrf_id)); +DECLARE_HOOK(lm_client_disconnect, (struct zserv *client), (client)); +DECLARE_HOOK(lm_get_chunk, + (struct label_manager_chunk * *lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, vrf_id_t vrf_id), + (lmc, client, keep, size, base, vrf_id)); +DECLARE_HOOK(lm_release_chunk, + (struct zserv *client, uint32_t start, uint32_t end), + (client, start, end)); +DECLARE_HOOK(lm_cbs_inited, (), ()); + + +/* declare wrappers to be called in zapi_msg.c (as hooks must be called in + * source file where they were defined) + */ +void lm_client_connect_call(struct zserv *client, vrf_id_t vrf_id); +void lm_get_chunk_call(struct label_manager_chunk **lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, + vrf_id_t vrf_id); +void lm_release_chunk_call(struct zserv *client, uint32_t start, + uint32_t end); + +/* API for an external LM to return responses for requests */ +int lm_client_connect_response(uint8_t proto, uint16_t instance, + uint32_t session_id, vrf_id_t vrf_id, + uint8_t result); + +/* convenience function to allocate an lmc to be consumed by the above API */ +struct label_manager_chunk * +create_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t start, uint32_t end); +void delete_label_chunk(void *val); + +/* register/unregister callbacks for hooks */ +void lm_hooks_register(void); +void lm_hooks_unregister(void); + +/* + * Main label manager struct + * Holds a linked list of label chunks. + */ +struct label_manager { + struct list *lc_list; +}; + +void label_manager_init(void); +struct label_manager_chunk * +assign_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t size, uint32_t base); +int release_label_chunk(uint8_t proto, unsigned short instance, + uint32_t session_id, uint32_t start, uint32_t end); +int lm_client_disconnect_cb(struct zserv *client); +int release_daemon_label_chunks(struct zserv *client); +void label_manager_close(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LABEL_MANAGER_H */ diff --git a/zebra/main.c b/zebra/main.c new file mode 100644 index 0000000..1e833ce --- /dev/null +++ b/zebra/main.c @@ -0,0 +1,490 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* zebra daemon main routine. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#include +#include "getopt.h" +#include "command.h" +#include "frrevent.h" +#include "filter.h" +#include "memory.h" +#include "prefix.h" +#include "log.h" +#include "plist.h" +#include "privs.h" +#include "sigevent.h" +#include "vrf.h" +#include "libfrr.h" +#include "affinitymap.h" +#include "routemap.h" +#include "routing_nb.h" + +#include "zebra/zebra_router.h" +#include "zebra/zebra_errors.h" +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/debug.h" +#include "zebra/router-id.h" +#include "zebra/irdp.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_ns.h" +#include "zebra/redistribute.h" +#include "zebra/zebra_mpls.h" +#include "zebra/label_manager.h" +#include "zebra/zebra_netns_notify.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_routemap.h" +#include "zebra/zebra_nb.h" +#include "zebra/zebra_opaque.h" +#include "zebra/zebra_srte.h" +#include "zebra/zebra_srv6.h" +#include "zebra/zebra_srv6_vty.h" + +#define ZEBRA_PTM_SUPPORT + +/* process id. */ +pid_t pid; + +/* Pacify zclient.o in libfrr, which expects this variable. */ +struct event_loop *master; + +/* Route retain mode flag. */ +int retain_mode = 0; + +int graceful_restart; + +bool v6_rr_semantics = false; + +/* Receive buffer size for kernel control sockets */ +#define RCVBUFSIZE_MIN 4194304 +#ifdef HAVE_NETLINK +uint32_t rcvbufsize = RCVBUFSIZE_MIN; +#else +uint32_t rcvbufsize = 128 * 1024; +#endif + +uint32_t rt_table_main_id = RT_TABLE_MAIN; + +#define OPTION_V6_RR_SEMANTICS 2000 +#define OPTION_ASIC_OFFLOAD 2001 +#define OPTION_V6_WITH_V4_NEXTHOP 2002 + +/* Command line options. */ +const struct option longopts[] = { + { "batch", no_argument, NULL, 'b' }, + { "allow_delete", no_argument, NULL, 'a' }, + { "socket", required_argument, NULL, 'z' }, + { "ecmp", required_argument, NULL, 'e' }, + { "retain", no_argument, NULL, 'r' }, + { "graceful_restart", required_argument, NULL, 'K' }, + { "asic-offload", optional_argument, NULL, OPTION_ASIC_OFFLOAD }, + { "v6-with-v4-nexthops", no_argument, NULL, OPTION_V6_WITH_V4_NEXTHOP }, +#ifdef HAVE_NETLINK + { "vrfwnetns", no_argument, NULL, 'n' }, + { "nl-bufsize", required_argument, NULL, 's' }, + { "v6-rr-semantics", no_argument, NULL, OPTION_V6_RR_SEMANTICS }, +#endif /* HAVE_NETLINK */ + {"routing-table", optional_argument, NULL, 'R'}, + { 0 } +}; + +zebra_capabilities_t _caps_p[] = {ZCAP_NET_ADMIN, ZCAP_SYS_ADMIN, + ZCAP_NET_RAW, +#ifdef HAVE_DPDK + ZCAP_IPC_LOCK, ZCAP_READ_SEARCH, + ZCAP_SYS_RAWIO +#endif +}; + +/* zebra privileges to run with */ +struct zebra_privs_t zserv_privs = { +#if defined(FRR_USER) && defined(FRR_GROUP) + .user = FRR_USER, + .group = FRR_GROUP, +#endif +#ifdef VTY_GROUP + .vty_group = VTY_GROUP, +#endif + .caps_p = _caps_p, + .cap_num_p = array_size(_caps_p), + .cap_num_i = 0}; + +/* SIGHUP handler. */ +static void sighup(void) +{ + zlog_info("SIGHUP received"); + + /* Reload of config file. */ + ; +} + +/* SIGINT handler. */ +static void sigint(void) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + struct listnode *ln, *nn; + struct zserv *client; + static bool sigint_done; + + if (sigint_done) + return; + + sigint_done = true; + + zlog_notice("Terminating on signal"); + + atomic_store_explicit(&zrouter.in_shutdown, true, + memory_order_relaxed); + + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv_stop_ra_all(); + + frr_early_fini(); + + /* Stop the opaque module pthread */ + zebra_opaque_stop(); + + zebra_dplane_pre_finish(); + + /* Clean up GR related info. */ + zebra_gr_stale_client_cleanup(zrouter.stale_client_list); + list_delete_all_node(zrouter.stale_client_list); + + /* Clean up zapi clients and server module */ + for (ALL_LIST_ELEMENTS(zrouter.client_list, ln, nn, client)) + zserv_close_client(client); + + zserv_close(); + list_delete_all_node(zrouter.client_list); + + /* Once all the zclients are cleaned up, clean up the opaque module */ + zebra_opaque_finish(); + + zebra_ptm_finish(); + + if (retain_mode) { + zebra_nhg_mark_keep(); + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + if (zvrf) + SET_FLAG(zvrf->flags, ZEBRA_VRF_RETAIN); + } + } + + if (zrouter.lsp_process_q) + work_queue_free_and_null(&zrouter.lsp_process_q); + + access_list_reset(); + prefix_list_reset(); + /* + * zebra_routemap_finish will + * 1 set rmap upd timer to 0 so that rmap update wont be scheduled again + * 2 Put off the rmap update thread + * 3 route_map_finish + */ + zebra_routemap_finish(); + + rib_update_finish(); + + list_delete(&zrouter.client_list); + + /* Indicate that all new dplane work has been enqueued. When that + * work is complete, the dataplane will enqueue an event + * with the 'finalize' function. + */ + zebra_dplane_finish(); +} + +/* + * Final shutdown step for the zebra main thread. This is run after all + * async update processing has completed. + */ +void zebra_finalize(struct event *dummy) +{ + zlog_info("Zebra final shutdown"); + + vrf_terminate(); + + /* + * Stop dplane thread and finish any cleanup + * This is before the zebra_ns_early_shutdown call + * because sockets that the dplane depends on are closed + * in those functions + */ + zebra_dplane_shutdown(); + + ns_walk_func(zebra_ns_early_shutdown, NULL, NULL); + zebra_ns_notify_close(); + + /* Final shutdown of ns resources */ + ns_walk_func(zebra_ns_final_shutdown, NULL, NULL); + + zebra_router_terminate(); + + ns_terminate(); + frr_fini(); + exit(0); +} + +/* SIGUSR1 handler. */ +static void sigusr1(void) +{ + zlog_rotate(); +} + +struct frr_signal_t zebra_signals[] = { + { + .signal = SIGHUP, + .handler = &sighup, + }, + { + .signal = SIGUSR1, + .handler = &sigusr1, + }, + { + .signal = SIGINT, + .handler = &sigint, + }, + { + .signal = SIGTERM, + .handler = &sigint, + }, +}; + +/* clang-format off */ +static const struct frr_yang_module_info *const zebra_yang_modules[] = { + &frr_filter_info, + &frr_interface_info, + &frr_route_map_info, + &frr_zebra_info, + &frr_vrf_info, + &frr_routing_info, + &frr_affinity_map_info, + &frr_zebra_route_map_info, +}; +/* clang-format on */ + +FRR_DAEMON_INFO( + zebra, ZEBRA, .vty_port = ZEBRA_VTY_PORT, .flags = FRR_NO_ZCLIENT, + + .proghelp = + "Daemon which manages kernel routing table management and\nredistribution between different routing protocols.", + + .signals = zebra_signals, .n_signals = array_size(zebra_signals), + + .privs = &zserv_privs, + + .yang_modules = zebra_yang_modules, + .n_yang_modules = array_size(zebra_yang_modules), +); + +/* Main startup routine. */ +int main(int argc, char **argv) +{ + // int batch_mode = 0; + char *zserv_path = NULL; + struct sockaddr_storage dummy; + socklen_t dummylen; + bool asic_offload = false; + bool v6_with_v4_nexthop = false; + bool notify_on_ack = true; + + graceful_restart = 0; + vrf_configure_backend(VRF_BACKEND_VRF_LITE); + + frr_preinit(&zebra_di, argc, argv); + + frr_opt_add("baz:e:rK:s:R:" +#ifdef HAVE_NETLINK + "n" +#endif + , + longopts, + " -b, --batch Runs in batch mode\n" + " -a, --allow_delete Allow other processes to delete zebra routes\n" + " -z, --socket Set path of zebra socket\n" + " -e, --ecmp Specify ECMP to use.\n" + " -r, --retain When program terminates, retain added route by zebra.\n" + " -K, --graceful_restart Graceful restart at the kernel level, timer in seconds for expiration\n" + " -A, --asic-offload FRR is interacting with an asic underneath the linux kernel\n" + " --v6-with-v4-nexthops Underlying dataplane supports v6 routes with v4 nexthops" +#ifdef HAVE_NETLINK + " -s, --nl-bufsize Set netlink receive buffer size\n" + " -n, --vrfwnetns Use NetNS as VRF backend\n" + " --v6-rr-semantics Use v6 RR semantics\n" +#else + " -s, Set kernel socket receive buffer size\n" +#endif /* HAVE_NETLINK */ + " -R, --routing-table Set kernel routing table\n" + ); + + while (1) { + int opt = frr_getopt(argc, argv, NULL); + + if (opt == EOF) + break; + + switch (opt) { + case 0: + break; + case 'b': + // batch_mode = 1; + break; + case 'a': + zrouter.allow_delete = true; + break; + case 'e': { + unsigned long int parsed_multipath = + strtoul(optarg, NULL, 10); + if (parsed_multipath == 0 + || parsed_multipath > MULTIPATH_NUM + || parsed_multipath > UINT32_MAX) { + flog_err( + EC_ZEBRA_BAD_MULTIPATH_NUM, + "Multipath Number specified must be less than %u and greater than 0", + MULTIPATH_NUM); + return 1; + } + zrouter.multipath_num = parsed_multipath; + break; + } + case 'z': + zserv_path = optarg; + if (!frr_zclient_addr(&dummy, &dummylen, optarg)) { + fprintf(stderr, + "Invalid zserv socket path: %s\n", + optarg); + exit(1); + } + break; + case 'r': + retain_mode = 1; + break; + case 'K': + graceful_restart = atoi(optarg); + break; + case 's': + rcvbufsize = atoi(optarg); + if (rcvbufsize < RCVBUFSIZE_MIN) + fprintf(stderr, + "Rcvbufsize is smaller than recommended value: %d\n", + RCVBUFSIZE_MIN); + break; + case 'R': + rt_table_main_id = atoi(optarg); + break; +#ifdef HAVE_NETLINK + case 'n': + vrf_configure_backend(VRF_BACKEND_NETNS); + break; + case OPTION_V6_RR_SEMANTICS: + v6_rr_semantics = true; + break; + case OPTION_ASIC_OFFLOAD: + if (!strcmp(optarg, "notify_on_offload")) + notify_on_ack = false; + if (!strcmp(optarg, "notify_on_ack")) + notify_on_ack = true; + asic_offload = true; + break; + case OPTION_V6_WITH_V4_NEXTHOP: + v6_with_v4_nexthop = true; + break; +#endif /* HAVE_NETLINK */ + default: + frr_help_exit(1); + } + } + + zrouter.master = frr_init(); + + /* Zebra related initialize. */ + zebra_router_init(asic_offload, notify_on_ack, v6_with_v4_nexthop); + zserv_init(); + rib_init(); + zebra_if_init(); + zebra_debug_init(); + + /* + * Initialize NS( and implicitly the VRF module), and make kernel + * routing socket. */ + zebra_ns_init(); + router_id_cmd_init(); + zebra_vty_init(); + access_list_init(); + prefix_list_init(); + rtadv_cmd_init(); +/* PTM socket */ +#ifdef ZEBRA_PTM_SUPPORT + zebra_ptm_init(); +#endif + + zebra_mpls_init(); + zebra_mpls_vty_init(); + zebra_pw_vty_init(); + zebra_pbr_init(); + zebra_opaque_init(); + zebra_srte_init(); + zebra_srv6_init(); + zebra_srv6_vty_init(); + + /* For debug purpose. */ + /* SET_FLAG (zebra_debug_event, ZEBRA_DEBUG_EVENT); */ + + /* Process the configuration file. Among other configuration + * directives we can meet those installing static routes. Such + * requests will not be executed immediately, but queued in + * zebra->ribq structure until we enter the main execution loop. + * The notifications from kernel will show originating PID equal + * to that after daemon() completes (if ever called). + */ + frr_config_fork(); + + /* After we have successfully acquired the pidfile, we can be sure + * about being the only copy of zebra process, which is submitting + * changes to the FIB. + * Clean up zebra-originated routes. The requests will be sent to OS + * immediately, so originating PID in notifications from kernel + * will be equal to the current getpid(). To know about such routes, + * we have to have route_read() called before. + */ + zrouter.startup_time = monotime(NULL); + event_add_timer(zrouter.master, rib_sweep_route, NULL, graceful_restart, + &zrouter.sweeper); + + /* Needed for BSD routing socket. */ + pid = getpid(); + + /* Start dataplane system */ + zebra_dplane_start(); + + /* Start the ted module, before zserv */ + zebra_opaque_start(); + + /* Start Zebra API server */ + zserv_start(zserv_path); + + /* Init label manager */ + label_manager_init(); + + /* RNH init */ + zebra_rnh_init(); + + /* Config handler Init */ + zebra_evpn_init(); + + /* Error init */ + zebra_error_init(); + + frr_run(zrouter.master); + + /* Not reached... */ + return 0; +} diff --git a/zebra/netconf_netlink.c b/zebra/netconf_netlink.c new file mode 100644 index 0000000..7352dfb --- /dev/null +++ b/zebra/netconf_netlink.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * netconf_netlink.c - netconf interaction with the kernel using + * netlink + * Copyright (C) 2021 Nvidia, Inc. + * Donald Sharp + */ +#include + +#ifdef HAVE_NETLINK /* Netlink OSes only */ + +#include + +#include "linux/netconf.h" + +#include "lib/lib_errors.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_dplane.h" +#include "zebra/kernel_netlink.h" +#include "zebra/netconf_netlink.h" +#include "zebra/debug.h" + +static struct rtattr *netconf_rta(struct netconfmsg *ncm) +{ + return (struct rtattr *)((char *)ncm + + NLMSG_ALIGN(sizeof(struct netconfmsg))); +} + +/* + * Handle netconf update about a single interface: create dplane + * context, and enqueue for processing in the main zebra pthread. + */ +static int +netlink_netconf_dplane_update(ns_id_t ns_id, afi_t afi, ifindex_t ifindex, + enum dplane_netconf_status_e mpls_on, + enum dplane_netconf_status_e mcast_on, + enum dplane_netconf_status_e linkdown_on) +{ + struct zebra_dplane_ctx *ctx; + + ctx = dplane_ctx_alloc(); + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_NETCONFIG); + dplane_ctx_set_ns_id(ctx, ns_id); + dplane_ctx_set_afi(ctx, afi); + dplane_ctx_set_ifindex(ctx, ifindex); + + dplane_ctx_set_netconf_mpls(ctx, mpls_on); + dplane_ctx_set_netconf_mcast(ctx, mcast_on); + dplane_ctx_set_netconf_linkdown(ctx, linkdown_on); + + /* Enqueue ctx for main pthread to process */ + dplane_provider_enqueue_to_zebra(ctx); + + return 0; +} + +/* + * Parse and process an incoming netlink netconf update. + */ +int netlink_netconf_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct netconfmsg *ncm; + struct rtattr *tb[NETCONFA_MAX + 1] = {}; + int len; + ifindex_t ifindex; + uint32_t ival; + afi_t afi; + enum dplane_netconf_status_e mpls_on = DPLANE_NETCONF_STATUS_UNKNOWN; + enum dplane_netconf_status_e mcast_on = DPLANE_NETCONF_STATUS_UNKNOWN; + enum dplane_netconf_status_e linkdown_on = + DPLANE_NETCONF_STATUS_UNKNOWN; + + if (h->nlmsg_type != RTM_NEWNETCONF && h->nlmsg_type != RTM_DELNETCONF) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct netconfmsg)); + if (len < 0) { + zlog_err("%s: Message received from netlink is of a broken size: %d, min %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct netconfmsg))); + return -1; + } + + ncm = NLMSG_DATA(h); + + /* + * FRR does not have an internal representation of afi_t for + * the MPLS Address Family that the kernel has. So let's + * just call it v4. This is ok because the kernel appears + * to do a good job of not sending data that is mixed/matched + * across families + */ +#ifdef AF_MPLS + if (ncm->ncm_family == AF_MPLS) + afi = AFI_IP; + else +#endif /* AF_MPLS */ + afi = family2afi(ncm->ncm_family); + + netlink_parse_rtattr(tb, NETCONFA_MAX, netconf_rta(ncm), len); + + if (!tb[NETCONFA_IFINDEX]) { + zlog_err("NETCONF message received from netlink without an ifindex"); + return 0; + } + + ifindex = *(ifindex_t *)RTA_DATA(tb[NETCONFA_IFINDEX]); + + if (tb[NETCONFA_INPUT]) { + ival = *(uint32_t *)RTA_DATA(tb[NETCONFA_INPUT]); + if (ival != 0) + mpls_on = DPLANE_NETCONF_STATUS_ENABLED; + else + mpls_on = DPLANE_NETCONF_STATUS_DISABLED; + } + + if (tb[NETCONFA_MC_FORWARDING]) { + ival = *(uint32_t *)RTA_DATA(tb[NETCONFA_MC_FORWARDING]); + if (ival != 0) + mcast_on = DPLANE_NETCONF_STATUS_ENABLED; + else + mcast_on = DPLANE_NETCONF_STATUS_DISABLED; + } + + if (tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]) { + ival = *(uint32_t *)RTA_DATA( + tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]); + if (ival != 0) + linkdown_on = DPLANE_NETCONF_STATUS_ENABLED; + else + linkdown_on = DPLANE_NETCONF_STATUS_DISABLED; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: interface %u is mpls on: %d multicast on: %d linkdown: %d", + __func__, ifindex, mpls_on, mcast_on, linkdown_on); + + /* Create a dplane context and pass it along for processing */ + netlink_netconf_dplane_update(ns_id, afi, ifindex, mpls_on, mcast_on, + linkdown_on); + + return 0; +} + +/* + * Request info from the host OS. This only sends the request; any replies + * are processed asynchronously. + */ +int netlink_request_netconf(int sockfd) +{ + struct nlsock *nls; + struct { + struct nlmsghdr n; + struct netconfmsg ncm; + char buf[1024]; + } req = {}; + + nls = kernel_netlink_nlsock_lookup(sockfd); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: nlsock %s", __func__, nls ? nls->name : "NULL"); + + if (nls == NULL) + return -1; + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct netconfmsg)); + req.n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETNETCONF; + req.ncm.ncm_family = AF_UNSPEC; + + return netlink_request(nls, &req); +} + +extern struct zebra_privs_t zserv_privs; +/* + * Currently netconf has no ability to set from netlink. + * So we've received a request to do this work in the data plane. + * as such we need to set the value via the /proc system + */ +enum netlink_msg_status netlink_put_intf_netconfig(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + const char *ifname = dplane_ctx_get_ifname(ctx); + enum dplane_netconf_status_e mpls_on = dplane_ctx_get_netconf_mpls(ctx); + char set[64]; + char mpls_proc[PATH_MAX]; + int fd, ret = FRR_NETLINK_ERROR; + + snprintf(mpls_proc, sizeof(mpls_proc), + "/proc/sys/net/mpls/conf/%s/input", ifname); + + if (mpls_on == DPLANE_NETCONF_STATUS_ENABLED) + snprintf(set, sizeof(set), "1\n"); + else if (mpls_on == DPLANE_NETCONF_STATUS_DISABLED) + snprintf(set, sizeof(set), "0\n"); + else { + flog_err_sys( + EC_LIB_DEVELOPMENT, + "%s: Expected interface %s to be set to ENABLED or DISABLED was %d", + __func__, ifname, mpls_on); + return ret; + } + + frr_with_privs (&zserv_privs) { + fd = open(mpls_proc, O_WRONLY); + if (fd < 0) { + flog_err_sys( + EC_LIB_SOCKET, + "%s: Unable to open %s for writing: %s(%d)", + __func__, mpls_proc, safe_strerror(errno), + errno); + return ret; + } + if (write(fd, set, 2) == 2) + ret = FRR_NETLINK_SUCCESS; + else + flog_err_sys(EC_LIB_SOCKET, + "%s: Unsuccessful write to %s: %s(%d)", + __func__, mpls_proc, safe_strerror(errno), + errno); + close(fd); + } + return ret; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/netconf_netlink.h b/zebra/netconf_netlink.h new file mode 100644 index 0000000..3abc72e --- /dev/null +++ b/zebra/netconf_netlink.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * netconf_netlink.h - netconf interaction with the kernel using + * netlink + * Copyright (C) 2021 Nvidia, Inc. + * Donald Sharp + */ +#ifndef __NETCONF_NETLINK_H__ +#define __NETCONF_NETLINK_H__ + +#ifdef HAVE_NETLINK /* Netlink-only module */ + +#include "zebra/zebra_ns.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Parse and handle a NETCONF message. */ +extern int netlink_netconf_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +/* Request info from the host OS. */ +int netlink_request_netconf(int sockfd); + +struct nl_batch; + +extern enum netlink_msg_status +netlink_put_intf_netconfig(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* NETCONF_NETLINK_H */ diff --git a/zebra/redistribute.c b/zebra/redistribute.c new file mode 100644 index 0000000..7559e31 --- /dev/null +++ b/zebra/redistribute.c @@ -0,0 +1,922 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Redistribution Handler + * Copyright (C) 1998 Kunihiro Ishiguro + */ + +#include + +#include "vector.h" +#include "vty.h" +#include "command.h" +#include "prefix.h" +#include "table.h" +#include "stream.h" +#include "zclient.h" +#include "linklist.h" +#include "log.h" +#include "vrf.h" +#include "srcdest_table.h" + +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_routemap.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/router-id.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_neigh.h" + +#define ZEBRA_PTM_SUPPORT + +/* array holding redistribute info about table redistribution */ +/* bit AFI is set if that AFI is redistributing routes from this table */ +static int zebra_import_table_used[AFI_MAX][ZEBRA_KERNEL_TABLE_MAX]; +static uint32_t zebra_import_table_distance[AFI_MAX][ZEBRA_KERNEL_TABLE_MAX]; + +int is_zebra_import_table_enabled(afi_t afi, vrf_id_t vrf_id, uint32_t table_id) +{ + /* + * Make sure that what we are called with actualy makes sense + */ + if (afi == AFI_MAX) + return 0; + + if (is_zebra_valid_kernel_table(table_id) && + table_id < ZEBRA_KERNEL_TABLE_MAX) + return zebra_import_table_used[afi][table_id]; + return 0; +} + +static void zebra_redistribute_default(struct zserv *client, vrf_id_t vrf_id) +{ + int afi; + struct prefix p; + struct route_table *table; + struct route_node *rn; + struct route_entry *newre; + + for (afi = AFI_IP; afi <= AFI_IP6; afi++) { + + if (!vrf_bitmap_check(&client->redist_default[afi], vrf_id)) + continue; + + /* Lookup table. */ + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + if (!table) + continue; + + /* Lookup default route. */ + memset(&p, 0, sizeof(p)); + p.family = afi2family(afi); + rn = route_node_lookup(table, &p); + if (!rn) + continue; + + RNODE_FOREACH_RE (rn, newre) { + if (CHECK_FLAG(newre->flags, ZEBRA_FLAG_SELECTED)) + zsend_redistribute_route( + ZEBRA_REDISTRIBUTE_ROUTE_ADD, client, + rn, newre); + } + + route_unlock_node(rn); + } +} + +/* Redistribute routes. */ +static void zebra_redistribute(struct zserv *client, int type, + unsigned short instance, vrf_id_t vrf_id, + int afi) +{ + struct route_entry *newre; + struct route_table *table; + struct route_node *rn; + + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + if (!table) + return; + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) + RNODE_FOREACH_RE (rn, newre) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s: client %s %pRN(%u:%u) checking: selected=%d, type=%s, instance=%u, distance=%d, metric=%d zebra_check_addr=%d", + __func__, + zebra_route_string(client->proto), rn, + vrf_id, newre->instance, + !!CHECK_FLAG(newre->flags, + ZEBRA_FLAG_SELECTED), + zebra_route_string(newre->type), + newre->instance, + newre->distance, + newre->metric, + zebra_check_addr(&rn->p)); + + if (!CHECK_FLAG(newre->flags, ZEBRA_FLAG_SELECTED)) + continue; + if ((type != ZEBRA_ROUTE_ALL + && (newre->type != type + || newre->instance != instance))) + continue; + if (!zebra_check_addr(&rn->p)) + continue; + + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_ADD, + client, rn, newre); + } +} + +/* + * Function to check if prefix is candidate for + * redistribute. + */ +static bool zebra_redistribute_check(const struct route_node *rn, + const struct route_entry *re, + struct zserv *client) +{ + struct zebra_vrf *zvrf; + afi_t afi; + + /* Process only if there is valid re */ + if (!re) + return false; + + afi = family2afi(rn->p.family); + zvrf = zebra_vrf_lookup_by_id(re->vrf_id); + if (re->vrf_id == VRF_DEFAULT && zvrf->table_id != re->table) + return false; + + /* If default route and redistributed */ + if (is_default_prefix(&rn->p) && + vrf_bitmap_check(&client->redist_default[afi], re->vrf_id)) + return true; + + /* If redistribute in enabled for zebra route all */ + if (vrf_bitmap_check(&client->redist[afi][ZEBRA_ROUTE_ALL], re->vrf_id)) + return true; + + /* + * If multi-instance then check for route + * redistribution for given instance. + */ + if (re->instance) { + if (redist_check_instance(&client->mi_redist[afi][re->type], + re->instance)) + return true; + else + return false; + } + + /* If redistribution is enabled for give route type. */ + if (vrf_bitmap_check(&client->redist[afi][re->type], re->vrf_id)) + return true; + + return false; +} + +/* Either advertise a route for redistribution to registered clients or */ +/* withdraw redistribution if add cannot be done for client */ +void redistribute_update(const struct route_node *rn, + const struct route_entry *re, + const struct route_entry *prev_re) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "(%u:%u):%pRN(%u): Redist update re %p (%s), old %p (%s)", + re->vrf_id, re->table, rn, re->instance, re, + zebra_route_string(re->type), prev_re, + prev_re ? zebra_route_string(prev_re->type) : "None"); + + if (!zebra_check_addr(&rn->p)) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("Redist update filter prefix %pRN", rn); + return; + } + + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (zebra_redistribute_check(rn, re, client)) { + if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug( + "%s: client %s %pRN(%u:%u), type=%d, distance=%d, metric=%d", + __func__, + zebra_route_string(client->proto), rn, + re->vrf_id, re->table, re->type, + re->distance, re->metric); + } + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_ADD, + client, rn, re); + } else if (zebra_redistribute_check(rn, prev_re, client)) + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_DEL, + client, rn, prev_re); + } +} + +/* + * During a route delete, where 'new_re' is NULL, redist a delete to all + * clients registered for the type of 'old_re'. + * During a route update, redist a delete to any clients who will not see + * an update when the new route is installed. There are cases when a client + * may have seen a redist for 'old_re', but will not see + * the redist for 'new_re'. + */ +void redistribute_delete(const struct route_node *rn, + const struct route_entry *old_re, + const struct route_entry *new_re) +{ + struct listnode *node, *nnode; + struct zserv *client; + vrf_id_t vrfid; + + if (old_re) + vrfid = old_re->vrf_id; + else if (new_re) + vrfid = new_re->vrf_id; + else + return; + + if (IS_ZEBRA_DEBUG_RIB) { + uint8_t old_inst, new_inst; + uint32_t table = 0; + + old_inst = new_inst = 0; + + if (old_re) { + old_inst = old_re->instance; + table = old_re->table; + } + if (new_re) { + new_inst = new_re->instance; + table = new_re->table; + } + + zlog_debug("(%u:%u):%pRN: Redist del: re %p (%u:%s), new re %p (%u:%s)", + vrfid, table, rn, old_re, old_inst, + old_re ? zebra_route_string(old_re->type) : "None", + new_re, new_inst, + new_re ? zebra_route_string(new_re->type) : "None"); + } + + /* Skip invalid (e.g. linklocal) prefix */ + if (!zebra_check_addr(&rn->p)) { + if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug( + "%u:%pRN: Redist del old: skipping invalid prefix", + vrfid, rn); + } + return; + } + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + /* + * Skip this client if it will receive an update for the + * 'new' re + */ + if (zebra_redistribute_check(rn, new_re, client)) + continue; + + /* Send a delete for the 'old' re to any subscribed client. */ + if (zebra_redistribute_check(rn, old_re, client)) + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_DEL, + client, rn, old_re); + } +} + + +void zebra_redistribute_add(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + int type = 0; + unsigned short instance; + + STREAM_GETC(msg, afi); + STREAM_GETC(msg, type); + STREAM_GETW(msg, instance); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: client proto %s afi=%d, wants %s, vrf %s(%u), instance=%d", + __func__, zebra_route_string(client->proto), afi, + zebra_route_string(type), VRF_LOGNAME(zvrf->vrf), + zvrf_id(zvrf), instance); + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %d does not exist", __func__, afi); + return; + } + + if (type == 0 || type >= ZEBRA_ROUTE_MAX) { + zlog_debug("%s: Specified Route Type %d does not exist", + __func__, type); + return; + } + + if (instance) { + if (!redist_check_instance(&client->mi_redist[afi][type], + instance)) { + redist_add_instance(&client->mi_redist[afi][type], + instance); + zebra_redistribute(client, type, instance, + zvrf_id(zvrf), afi); + } + } else { + if (!vrf_bitmap_check(&client->redist[afi][type], + zvrf_id(zvrf))) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: setting vrf %s(%u) redist bitmap", + __func__, VRF_LOGNAME(zvrf->vrf), + zvrf_id(zvrf)); + vrf_bitmap_set(&client->redist[afi][type], + zvrf_id(zvrf)); + zebra_redistribute(client, type, 0, zvrf_id(zvrf), afi); + } + } + +stream_failure: + return; +} + +void zebra_redistribute_delete(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + int type = 0; + unsigned short instance; + + STREAM_GETC(msg, afi); + STREAM_GETC(msg, type); + STREAM_GETW(msg, instance); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: client proto %s afi=%d, no longer wants %s, vrf %s(%u), instance=%d", + __func__, zebra_route_string(client->proto), afi, + zebra_route_string(type), VRF_LOGNAME(zvrf->vrf), + zvrf_id(zvrf), instance); + + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %d does not exist", __func__, afi); + return; + } + + if (type == 0 || type >= ZEBRA_ROUTE_MAX) { + zlog_debug("%s: Specified Route Type %d does not exist", + __func__, type); + return; + } + + /* + * NOTE: no need to withdraw the previously advertised routes. The + * clients + * themselves should keep track of the received routes from zebra and + * withdraw them when necessary. + */ + if (instance) + redist_del_instance(&client->mi_redist[afi][type], instance); + else + vrf_bitmap_unset(&client->redist[afi][type], zvrf_id(zvrf)); + +stream_failure: + return; +} + +void zebra_redistribute_default_add(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + + STREAM_GETC(msg, afi); + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %u does not exist", __func__, afi); + return; + } + + vrf_bitmap_set(&client->redist_default[afi], zvrf_id(zvrf)); + zebra_redistribute_default(client, zvrf_id(zvrf)); + +stream_failure: + return; +} + +void zebra_redistribute_default_delete(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + + STREAM_GETC(msg, afi); + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %u does not exist", __func__, afi); + return; + } + + vrf_bitmap_unset(&client->redist_default[afi], zvrf_id(zvrf)); + +stream_failure: + return; +} + +/* Interface up information. */ +void zebra_interface_up_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_UP %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + if (ifp->ptm_status || !ifp->ptm_enable) { + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, + client)) { + /* Do not send unsolicited messages to synchronous + * clients. + */ + if (client->synchronous) + continue; + + zsend_interface_update(ZEBRA_INTERFACE_UP, + client, ifp); + zsend_interface_link_params(client, ifp); + } + } +} + +/* Interface down information. */ +void zebra_interface_down_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_DOWN %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_interface_update(ZEBRA_INTERFACE_DOWN, client, ifp); + } + + zebra_neigh_del_all(ifp); +} + +/* Interface information update. */ +void zebra_interface_add_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_ADD %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + client->ifadd_cnt++; + zsend_interface_add(client, ifp); + zsend_interface_link_params(client, ifp); + } +} + +void zebra_interface_delete_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_DELETE %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + client->ifdel_cnt++; + zsend_interface_delete(client, ifp); + } +} + +/* Interface address addition. */ +void zebra_interface_address_add_update(struct interface *ifp, + struct connected *ifc) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_ADDRESS_ADD %pFX on %s vrf %s(%u)", + ifc->address, ifp->name, ifp->vrf->name, + ifp->vrf->vrf_id); + + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + flog_warn( + EC_ZEBRA_ADVERTISING_UNUSABLE_ADDR, + "advertising address to clients that is not yet usable."); + + zebra_vxlan_add_del_gw_macip(ifp, ifc->address, 1); + + router_id_add_address(ifc); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) { + client->connected_rt_add_cnt++; + zsend_interface_address(ZEBRA_INTERFACE_ADDRESS_ADD, + client, ifp, ifc); + } + } + /* interface associated NHGs may have been deleted, + * re-sync zebra -> dplane NHGs + */ + zebra_interface_nhg_reinstall(ifp); +} + +/* Interface address deletion. */ +void zebra_interface_address_delete_update(struct interface *ifp, + struct connected *ifc) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_ADDRESS_DELETE %pFX on %s vrf %s(%u)", + ifc->address, ifp->name, ifp->vrf->name, + ifp->vrf->vrf_id); + + zebra_vxlan_add_del_gw_macip(ifp, ifc->address, 0); + + router_id_del_address(ifc); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) { + client->connected_rt_del_cnt++; + zsend_interface_address(ZEBRA_INTERFACE_ADDRESS_DELETE, + client, ifp, ifc); + } + } +} + +/* Interface VRF change. May need to delete from clients not interested in + * the new VRF. Note that this function is invoked *prior* to the VRF change. + */ +void zebra_interface_vrf_update_del(struct interface *ifp, vrf_id_t new_vrf_id) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_DELETE %s VRF Id %u -> %u", + ifp->name, ifp->vrf->vrf_id, new_vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + /* Need to delete if the client is not interested in the new + * VRF. */ + zsend_interface_update(ZEBRA_INTERFACE_DOWN, client, ifp); + client->ifdel_cnt++; + zsend_interface_delete(client, ifp); + } +} + +/* Interface VRF change. This function is invoked *post* VRF change and sends an + * add to clients who are interested in the new VRF but not in the old VRF. + */ +void zebra_interface_vrf_update_add(struct interface *ifp, vrf_id_t old_vrf_id) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_ADD %s VRF Id %u -> %u", + ifp->name, old_vrf_id, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + /* Need to add if the client is interested in the new VRF. */ + client->ifadd_cnt++; + zsend_interface_add(client, ifp); + zsend_interface_addresses(client, ifp); + } +} + +int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re, const char *rmap_name) +{ + struct route_entry *newre; + struct route_entry *same; + struct prefix p; + struct nexthop_group *ng; + route_map_result_t ret = RMAP_PERMITMATCH; + afi_t afi; + + afi = family2afi(rn->p.family); + if (rmap_name) + ret = zebra_import_table_route_map_check(afi, re, &rn->p, + re->nhe->nhg.nexthop, + rmap_name); + + if (ret != RMAP_PERMITMATCH) { + UNSET_FLAG(re->flags, ZEBRA_FLAG_SELECTED); + zebra_del_import_table_entry(zvrf, rn, re); + return 0; + } + + prefix_copy(&p, &rn->p); + + RNODE_FOREACH_RE (rn, same) { + if (CHECK_FLAG(same->status, ROUTE_ENTRY_REMOVED)) + continue; + + if (same->type == re->type && same->instance == re->instance + && same->table == re->table + && same->type != ZEBRA_ROUTE_CONNECT) + break; + } + + if (same) { + UNSET_FLAG(same->flags, ZEBRA_FLAG_SELECTED); + zebra_del_import_table_entry(zvrf, rn, same); + } + + UNSET_FLAG(re->flags, ZEBRA_FLAG_RR_USE_DISTANCE); + + newre = zebra_rib_route_entry_new( + 0, ZEBRA_ROUTE_TABLE, re->table, re->flags, re->nhe_id, + zvrf->table_id, re->metric, re->mtu, + zebra_import_table_distance[afi][re->table], re->tag); + + ng = nexthop_group_new(); + copy_nexthops(&ng->nexthop, re->nhe->nhg.nexthop, NULL); + + rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre, ng, false); + nexthop_group_delete(&ng); + + return 0; +} + +int zebra_del_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re) +{ + struct prefix p; + afi_t afi; + + afi = family2afi(rn->p.family); + prefix_copy(&p, &rn->p); + + rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_TABLE, + re->table, re->flags, &p, NULL, re->nhe->nhg.nexthop, + re->nhe_id, zvrf->table_id, re->metric, re->distance, + false); + + return 0; +} + +/* Assuming no one calls this with the main routing table */ +int zebra_import_table(afi_t afi, vrf_id_t vrf_id, uint32_t table_id, + uint32_t distance, const char *rmap_name, int add) +{ + struct route_table *table; + struct route_entry *re; + struct route_node *rn; + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf_id); + + if (!is_zebra_valid_kernel_table(table_id) + || (table_id == rt_table_main_id)) + return -1; + + if (afi >= AFI_MAX) + return -1; + + table = zebra_vrf_get_table_with_table_id(afi, SAFI_UNICAST, vrf_id, + table_id); + if (table == NULL) { + return 0; + } else if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug("%s routes from table %d", + add ? "Importing" : "Unimporting", table_id); + } + + if (add) { + if (rmap_name) + zebra_add_import_table_route_map(afi, rmap_name, + table_id); + else { + rmap_name = + zebra_get_import_table_route_map(afi, table_id); + if (rmap_name) { + zebra_del_import_table_route_map(afi, table_id); + rmap_name = NULL; + } + } + + zebra_import_table_used[afi][table_id] = 1; + zebra_import_table_distance[afi][table_id] = distance; + } else { + zebra_import_table_used[afi][table_id] = 0; + zebra_import_table_distance[afi][table_id] = + ZEBRA_TABLE_DISTANCE_DEFAULT; + + rmap_name = zebra_get_import_table_route_map(afi, table_id); + if (rmap_name) { + zebra_del_import_table_route_map(afi, table_id); + rmap_name = NULL; + } + } + + for (rn = route_top(table); rn; rn = route_next(rn)) { + /* For each entry in the non-default routing table, + * add the entry in the main table + */ + if (!rn->info) + continue; + + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + break; + } + + if (!re) + continue; + + if (((afi == AFI_IP) && (rn->p.family == AF_INET)) + || ((afi == AFI_IP6) && (rn->p.family == AF_INET6))) { + if (add) + zebra_add_import_table_entry(zvrf, rn, re, + rmap_name); + else + zebra_del_import_table_entry(zvrf, rn, re); + } + } + return 0; +} + +int zebra_import_table_config(struct vty *vty, vrf_id_t vrf_id) +{ + int i; + afi_t afi; + int write = 0; + char afi_str[AFI_MAX][10] = {"", "ip", "ipv6", "ethernet"}; + const char *rmap_name; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (i = 1; i < ZEBRA_KERNEL_TABLE_MAX; i++) { + if (!is_zebra_import_table_enabled(afi, vrf_id, i)) + continue; + + if (zebra_import_table_distance[afi][i] + != ZEBRA_TABLE_DISTANCE_DEFAULT) { + vty_out(vty, "%s import-table %d distance %d", + afi_str[afi], i, + zebra_import_table_distance[afi][i]); + } else { + vty_out(vty, "%s import-table %d", afi_str[afi], + i); + } + + rmap_name = zebra_get_import_table_route_map(afi, i); + if (rmap_name) + vty_out(vty, " route-map %s", rmap_name); + + vty_out(vty, "\n"); + write = 1; + } + } + + return write; +} + +static void zebra_import_table_rm_update_vrf_afi(struct zebra_vrf *zvrf, + afi_t afi, int table_id, + const char *rmap) +{ + struct route_table *table; + struct route_entry *re; + struct route_node *rn; + const char *rmap_name; + + rmap_name = zebra_get_import_table_route_map(afi, table_id); + if ((!rmap_name) || (strcmp(rmap_name, rmap) != 0)) + return; + + table = zebra_vrf_get_table_with_table_id(afi, SAFI_UNICAST, + zvrf->vrf->vrf_id, table_id); + if (!table) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: Table id=%d not found", __func__, + table_id); + return; + } + + for (rn = route_top(table); rn; rn = route_next(rn)) { + /* + * For each entry in the non-default routing table, + * add the entry in the main table + */ + if (!rn->info) + continue; + + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + break; + } + + if (!re) + continue; + + if (((afi == AFI_IP) && (rn->p.family == AF_INET)) + || ((afi == AFI_IP6) && (rn->p.family == AF_INET6))) + zebra_add_import_table_entry(zvrf, rn, re, rmap_name); + } + + return; +} + +static void zebra_import_table_rm_update_vrf(struct zebra_vrf *zvrf, + const char *rmap) +{ + afi_t afi; + int i; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (i = 1; i < ZEBRA_KERNEL_TABLE_MAX; i++) { + if (!is_zebra_import_table_enabled( + afi, zvrf->vrf->vrf_id, i)) + continue; + + zebra_import_table_rm_update_vrf_afi(zvrf, afi, i, + rmap); + } + } +} + +void zebra_import_table_rm_update(const char *rmap) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + + if (!zvrf) + continue; + + zebra_import_table_rm_update_vrf(zvrf, rmap); + } +} + +/* Interface parameters update */ +void zebra_interface_parameters_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_LINK_PARAMS %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_interface_link_params(client, ifp); + } +} diff --git a/zebra/redistribute.h b/zebra/redistribute.h new file mode 100644 index 0000000..4347454 --- /dev/null +++ b/zebra/redistribute.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Redistribution Handler + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_REDISTRIBUTE_H +#define _ZEBRA_REDISTRIBUTE_H + +#include "table.h" +#include "vty.h" +#include "vrf.h" + +#include "zebra/zserv.h" +#include "zebra/rib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* ZAPI command handlers */ +extern void zebra_redistribute_add(ZAPI_HANDLER_ARGS); +extern void zebra_redistribute_delete(ZAPI_HANDLER_ARGS); +extern void zebra_redistribute_default_add(ZAPI_HANDLER_ARGS); +extern void zebra_redistribute_default_delete(ZAPI_HANDLER_ARGS); +/* ----------------- */ + +extern void redistribute_update(const struct route_node *rn, + const struct route_entry *re, + const struct route_entry *prev_re); +/* + * During a route delete, where 'new_re' is NULL, redist a delete to all + * clients registered for the type of 'old_re'. + * During a route update, redist a delete to any clients who will not see + * an update when the new route is installed. There are cases when a client + * may have seen a redist for 'old_re', but will not see + * the redist for 'new_re'. + */ +void redistribute_delete(const struct route_node *rn, + const struct route_entry *old_re, + const struct route_entry *new_re); + +extern void zebra_interface_up_update(struct interface *ifp); +extern void zebra_interface_down_update(struct interface *ifp); + +extern void zebra_interface_add_update(struct interface *ifp); +extern void zebra_interface_delete_update(struct interface *ifp); + +extern void zebra_interface_address_add_update(struct interface *ifp, + struct connected *c); +extern void zebra_interface_address_delete_update(struct interface *ifp, + struct connected *c); +extern void zebra_interface_parameters_update(struct interface *ifp); +extern void zebra_interface_vrf_update_del(struct interface *ifp, + vrf_id_t new_vrf_id); +extern void zebra_interface_vrf_update_add(struct interface *ifp, + vrf_id_t old_vrf_id); + +extern int zebra_import_table(afi_t afi, vrf_id_t vrf_id, + uint32_t table_id, uint32_t distance, + const char *rmap_name, int add); + +extern int zebra_add_import_table_entry(struct zebra_vrf *zvrf, + struct route_node *rn, + struct route_entry *re, + const char *rmap_name); +extern int zebra_del_import_table_entry(struct zebra_vrf *zvrf, + struct route_node *rn, + struct route_entry *re); +extern int is_zebra_import_table_enabled(afi_t, vrf_id_t vrf_id, + uint32_t table_id); + +extern int zebra_import_table_config(struct vty *, vrf_id_t vrf_id); + +extern void zebra_import_table_rm_update(const char *rmap); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_REDISTRIBUTE_H */ diff --git a/zebra/rib.h b/zebra/rib.h new file mode 100644 index 0000000..e70b5c1 --- /dev/null +++ b/zebra/rib.h @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Routing Information Base header + * Copyright (C) 1997 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_RIB_H +#define _ZEBRA_RIB_H + +#include "zebra.h" +#include "memory.h" +#include "hook.h" +#include "typesafe.h" +#include "linklist.h" +#include "prefix.h" +#include "table.h" +#include "queue.h" +#include "nexthop.h" +#include "nexthop_group.h" +#include "vrf.h" +#include "if.h" +#include "mpls.h" +#include "srcdest_table.h" +#include "zebra/zebra_nhg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +DECLARE_MGROUP(ZEBRA); + +DECLARE_MTYPE(RE); + +PREDECL_LIST(rnh_list); + +/* Nexthop structure. */ +struct rnh { + uint8_t flags; + +#define ZEBRA_NHT_CONNECTED 0x1 +#define ZEBRA_NHT_DELETED 0x2 +#define ZEBRA_NHT_RESOLVE_VIA_DEFAULT 0x4 + + /* VRF identifier. */ + vrf_id_t vrf_id; + + afi_t afi; + safi_t safi; + + uint32_t seqno; + + struct route_entry *state; + struct prefix resolved_route; + struct list *client_list; + + /* pseudowires dependent on this nh */ + struct list *zebra_pseudowire_list; + + struct route_node *node; + + /* + * if this has been filtered for the client + */ + int filtered[ZEBRA_ROUTE_MAX]; + + struct rnh_list_item rnh_list_item; +}; + +#define DISTANCE_INFINITY 255 +#define ZEBRA_KERNEL_TABLE_MAX 252 /* support for no more than this rt tables */ + +PREDECL_LIST(re_list); + +struct re_opaque { + uint16_t length; + uint8_t data[]; +}; + +struct route_entry { + /* Link list. */ + struct re_list_item next; + + /* Nexthop group, shared/refcounted, based on the nexthop(s) + * provided by the owner of the route + */ + struct nhg_hash_entry *nhe; + + /* Nexthop group from FIB (optional), reflecting what is actually + * installed in the FIB if that differs. The 'backup' group is used + * when backup nexthops are present in the route's nhg. + */ + struct nexthop_group fib_ng; + struct nexthop_group fib_backup_ng; + + /* Nexthop group hash entry IDs. The "installed" id is the id + * used in linux/netlink, if available. + */ + uint32_t nhe_id; + uint32_t nhe_installed_id; + + /* Tag */ + route_tag_t tag; + + /* Uptime. */ + time_t uptime; + + /* Type of this route. */ + int type; + + /* VRF identifier. */ + vrf_id_t vrf_id; + + /* Which routing table */ + uint32_t table; + + /* Metric */ + uint32_t metric; + + /* MTU */ + uint32_t mtu; + uint32_t nexthop_mtu; + + /* Flags of this route. + * This flag's definition is in lib/zebra.h ZEBRA_FLAG_* and is exposed + * to clients via Zserv + */ + uint32_t flags; + + /* RIB internal status */ + uint32_t status; +#define ROUTE_ENTRY_REMOVED 0x1 +/* The Route Entry has changed */ +#define ROUTE_ENTRY_CHANGED 0x2 +/* The Label has changed on the Route entry */ +#define ROUTE_ENTRY_LABELS_CHANGED 0x4 +/* Route is queued for Installation into the Data Plane */ +#define ROUTE_ENTRY_QUEUED 0x8 +/* Route is installed into the Data Plane */ +#define ROUTE_ENTRY_INSTALLED 0x10 +/* Route has Failed installation into the Data Plane in some manner */ +#define ROUTE_ENTRY_FAILED 0x20 +/* Route has a 'fib' set of nexthops, probably because the installed set + * differs from the rib/normal set of nexthops. + */ +#define ROUTE_ENTRY_USE_FIB_NHG 0x40 +/* + * Route entries that are going to the dplane for a Route Replace + * let's note the fact that this is happening. This will + * be useful when zebra is determing if a route can be + * used for nexthops + */ +#define ROUTE_ENTRY_ROUTE_REPLACING 0x80 + + /* Sequence value incremented for each dataplane operation */ + uint32_t dplane_sequence; + + /* Source protocol instance */ + uint16_t instance; + + /* Distance. */ + uint8_t distance; + + struct re_opaque *opaque; +}; + +#define RIB_SYSTEM_ROUTE(R) RSYSTEM_ROUTE((R)->type) + +#define RIB_KERNEL_ROUTE(R) RKERNEL_ROUTE((R)->type) + +/* Define route types that are equivalent to "connected". */ +#define RIB_CONNECTED_ROUTE(R) \ + ((R)->type == ZEBRA_ROUTE_CONNECT || (R)->type == ZEBRA_ROUTE_NHRP) + +/* meta-queue structure: + * sub-queue 0: nexthop group objects + * sub-queue 1: EVPN/VxLAN objects + * sub-queue 2: Early Route Processing + * sub-queue 3: Early Label Processing + * sub-queue 4: connected + * sub-queue 5: kernel + * sub-queue 6: static + * sub-queue 7: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP + * sub-queue 8: iBGP, eBGP + * sub-queue 9: any other origin (if any) typically those that + * don't generate routes + */ +#define MQ_SIZE 11 + +/* For checking that an object has already queued in some sub-queue */ +#define MQ_BIT_MASK ((1 << MQ_SIZE) - 1) + +struct meta_queue { + struct list *subq[MQ_SIZE]; + uint32_t size; /* sum of lengths of all subqueues */ +}; + +/* + * Structure that represents a single destination (prefix). + */ +typedef struct rib_dest_t_ { + + /* + * Back pointer to the route node for this destination. This helps + * us get to the prefix that this structure is for. + */ + struct route_node *rnode; + + /* + * Doubly-linked list of routes for this prefix. + */ + struct re_list_head routes; + + struct route_entry *selected_fib; + + /* + * Flags, see below. + */ + uint32_t flags; + + /* + * The list of nht prefixes that have ended up + * depending on this route node. + * After route processing is returned from + * the data plane we will run evaluate_rnh + * on these prefixes. + */ + struct rnh_list_head nht; + + /* + * Linkage to put dest on the FPM processing queue. + */ + TAILQ_ENTRY(rib_dest_t_) fpm_q_entries; + +} rib_dest_t; + +DECLARE_LIST(rnh_list, struct rnh, rnh_list_item); +DECLARE_LIST(re_list, struct route_entry, next); + +#define RIB_ROUTE_QUEUED(x) (1 << (x)) +// If MQ_SIZE is modified this value needs to be updated. +#define RIB_ROUTE_ANY_QUEUED 0x3F + +/* + * The maximum qindex that can be used. + */ +#define ZEBRA_MAX_QINDEX (MQ_SIZE - 1) + +/* + * This flag indicates that a given prefix has been 'advertised' to + * the FPM to be installed in the forwarding plane. + */ +#define RIB_DEST_SENT_TO_FPM (1 << (ZEBRA_MAX_QINDEX + 1)) + +/* + * This flag is set when we need to send an update to the FPM about a + * dest. + */ +#define RIB_DEST_UPDATE_FPM (1 << (ZEBRA_MAX_QINDEX + 2)) + +#define RIB_DEST_UPDATE_LSPS (1 << (ZEBRA_MAX_QINDEX + 3)) + +/* + * Macro to iterate over each route for a destination (prefix). + */ +#define RE_DEST_FOREACH_ROUTE(dest, re) \ + for ((re) = (dest) ? re_list_first(&((dest)->routes)) : NULL; (re); \ + (re) = re_list_next(&((dest)->routes), (re))) + +/* + * Same as above, but allows the current node to be unlinked. + */ +#define RE_DEST_FOREACH_ROUTE_SAFE(dest, re, next) \ + for ((re) = (dest) ? re_list_first(&((dest)->routes)) : NULL; \ + (re) && ((next) = re_list_next(&((dest)->routes), (re)), 1); \ + (re) = (next)) + +#define RE_DEST_FIRST_ROUTE(dest, re) \ + ((re) = (dest) ? re_list_first(&((dest)->routes)) : NULL) + +#define RE_DEST_NEXT_ROUTE(dest, re) \ + ((re) = (dest) ? re_list_next(&((dest)->routes), (re)) : NULL) + +#define RNODE_FOREACH_RE(rn, re) \ + RE_DEST_FOREACH_ROUTE (rib_dest_from_rnode(rn), re) + +#define RNODE_FOREACH_RE_SAFE(rn, re, next) \ + RE_DEST_FOREACH_ROUTE_SAFE (rib_dest_from_rnode(rn), re, next) + +#define RNODE_FIRST_RE(rn, re) RE_DEST_FIRST_ROUTE(rib_dest_from_rnode(rn), re) + +#define RNODE_NEXT_RE(rn, re) RE_DEST_NEXT_ROUTE(rib_dest_from_rnode(rn), re) + +/* + * rib_table_info_t + * + * Structure that is hung off of a route_table that holds information about + * the table. + */ +struct rib_table_info { + + /* + * Back pointer to zebra_vrf. + */ + struct zebra_vrf *zvrf; + afi_t afi; + safi_t safi; + uint32_t table_id; +}; + +enum rib_tables_iter_state { + RIB_TABLES_ITER_S_INIT, + RIB_TABLES_ITER_S_ITERATING, + RIB_TABLES_ITER_S_DONE +}; + +/* + * Structure that holds state for iterating over all tables in the + * Routing Information Base. + */ +typedef struct rib_tables_iter_t_ { + vrf_id_t vrf_id; + int afi_safi_ix; + + enum rib_tables_iter_state state; +} rib_tables_iter_t; + +/* Events/reasons triggering a RIB update. */ +enum rib_update_event { + RIB_UPDATE_KERNEL, + RIB_UPDATE_RMAP_CHANGE, + RIB_UPDATE_OTHER, + RIB_UPDATE_MAX +}; +void rib_update_finish(void); + +int route_entry_update_nhe(struct route_entry *re, + struct nhg_hash_entry *new_nhghe); + +/* NHG replace has happend, we have to update route_entry pointers to new one */ +int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry); + +#define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re) +extern void _route_entry_dump(const char *func, union prefixconstptr pp, + union prefixconstptr src_pp, + const struct route_entry *re); + +struct route_entry * +zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, uint8_t instance, + uint32_t flags, uint32_t nhe_id, uint32_t table_id, + uint32_t metric, uint32_t mtu, uint8_t distance, + route_tag_t tag); + +#define ZEBRA_RIB_LOOKUP_ERROR -1 +#define ZEBRA_RIB_FOUND_EXACT 0 +#define ZEBRA_RIB_FOUND_NOGATE 1 +#define ZEBRA_RIB_FOUND_CONNECTED 2 +#define ZEBRA_RIB_NOTFOUND 3 + +extern int is_zebra_valid_kernel_table(uint32_t table_id); +extern int is_zebra_main_routing_table(uint32_t table_id); +extern int zebra_check_addr(const struct prefix *p); + +extern void rib_delnode(struct route_node *rn, struct route_entry *re); +extern void rib_install_kernel(struct route_node *rn, struct route_entry *re, + struct route_entry *old); +extern void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re); + +/* NOTE: + * All rib_add function will not just add prefix into RIB, but + * also implicitly withdraw equal prefix of same type. */ +extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, + unsigned short instance, uint32_t flags, struct prefix *p, + struct prefix_ipv6 *src_p, const struct nexthop *nh, + uint32_t nhe_id, uint32_t table_id, uint32_t metric, + uint32_t mtu, uint8_t distance, route_tag_t tag, + bool startup); +/* + * Multipath route apis. + */ +extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, struct route_entry *re, + struct nexthop_group *ng, bool startup); +/* + * -1 -> some sort of error + * 0 -> an add + * 1 -> an update + */ +extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, + struct route_entry *re, + struct nhg_hash_entry *nhe, bool startup); + +extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, + unsigned short instance, uint32_t flags, + struct prefix *p, struct prefix_ipv6 *src_p, + const struct nexthop *nh, uint32_t nhe_id, + uint32_t table_id, uint32_t metric, uint8_t distance, + bool fromkernel); + +extern struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, + const union g_addr *addr, + struct route_node **rn_out); +extern struct route_entry *rib_match_multicast(afi_t afi, vrf_id_t vrf_id, + union g_addr *gaddr, + struct route_node **rn_out); + +extern struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, + vrf_id_t vrf_id); + +extern void rib_update(enum rib_update_event event); +extern void rib_update_table(struct route_table *table, + enum rib_update_event event, int rtype); +extern void rib_sweep_route(struct event *t); +extern void rib_sweep_table(struct route_table *table); +extern void rib_close_table(struct route_table *table); +extern void rib_init(void); +extern unsigned long rib_score_proto(uint8_t proto, unsigned short instance); +extern unsigned long rib_score_proto_table(uint8_t proto, + unsigned short instance, + struct route_table *table); + +extern int rib_queue_add(struct route_node *rn); + +struct nhg_ctx; /* Forward declaration */ + +/* Enqueue incoming nhg from OS for processing */ +extern int rib_queue_nhg_ctx_add(struct nhg_ctx *ctx); + +/* Enqueue incoming nhg from proto daemon for processing */ +extern int rib_queue_nhe_add(struct nhg_hash_entry *nhe); + +/* Enqueue evpn route for processing */ +int zebra_rib_queue_evpn_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix); +int zebra_rib_queue_evpn_route_del(vrf_id_t vrf_id, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix); +/* Enqueue EVPN remote ES for processing */ +int zebra_rib_queue_evpn_rem_es_add(const esi_t *esi, + const struct in_addr *vtep_ip, + bool esr_rxed, uint8_t df_alg, + uint16_t df_pref); +int zebra_rib_queue_evpn_rem_es_del(const esi_t *esi, + const struct in_addr *vtep_ip); +/* Enqueue EVPN remote macip update for processing */ +int zebra_rib_queue_evpn_rem_macip_del(vni_t vni, const struct ethaddr *macaddr, + const struct ipaddr *ip, + struct in_addr vtep_ip); +int zebra_rib_queue_evpn_rem_macip_add(vni_t vni, const struct ethaddr *macaddr, + const struct ipaddr *ipaddr, + uint8_t flags, uint32_t seq, + struct in_addr vtep_ip, + const esi_t *esi); +/* Enqueue VXLAN remote vtep update for processing */ +int zebra_rib_queue_evpn_rem_vtep_add(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip, + int flood_control); +int zebra_rib_queue_evpn_rem_vtep_del(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip); + +extern void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf); +extern int zebra_rib_labeled_unicast(struct route_entry *re); +extern struct route_table *rib_table_ipv6; + +extern void rib_unlink(struct route_node *rn, struct route_entry *re); +extern int rib_gc_dest(struct route_node *rn); +extern struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter); + +extern uint8_t route_distance(int type); + +extern void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, + bool rt_delete); + +/* + * rib_find_rn_from_ctx + * + * Returns a lock increased route_node for the appropriate + * table and prefix specified by the context. Developer + * should unlock the node when done. + */ +extern struct route_node * +rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx); + +/* + * Inline functions. + */ + +/* + * rib_table_info + */ +static inline struct rib_table_info *rib_table_info(struct route_table *table) +{ + return (struct rib_table_info *)route_table_get_info(table); +} + +/* + * rib_dest_from_rnode + */ +static inline rib_dest_t *rib_dest_from_rnode(struct route_node *rn) +{ + return (rib_dest_t *)rn->info; +} + +/* + * rnode_to_ribs + * + * Returns a pointer to the list of routes corresponding to the given + * route_node. + */ +static inline struct route_entry *rnode_to_ribs(struct route_node *rn) +{ + rib_dest_t *dest; + + dest = rib_dest_from_rnode(rn); + if (!dest) + return NULL; + + return re_list_first(&dest->routes); +} + +/* + * rib_dest_prefix + */ +static inline struct prefix *rib_dest_prefix(rib_dest_t *dest) +{ + return &dest->rnode->p; +} + +/* + * rib_dest_af + * + * Returns the address family that the destination is for. + */ +static inline uint8_t rib_dest_af(rib_dest_t *dest) +{ + return dest->rnode->p.family; +} + +/* + * rib_dest_table + */ +static inline struct route_table *rib_dest_table(rib_dest_t *dest) +{ + return srcdest_rnode_table(dest->rnode); +} + +/* + * rib_dest_vrf + */ +static inline struct zebra_vrf *rib_dest_vrf(rib_dest_t *dest) +{ + return rib_table_info(rib_dest_table(dest))->zvrf; +} + +/* + * Create the rib_dest_t and attach it to the specified node + */ +extern rib_dest_t *zebra_rib_create_dest(struct route_node *rn); + +/* + * rib_tables_iter_init + */ +static inline void rib_tables_iter_init(rib_tables_iter_t *iter) + +{ + memset(iter, 0, sizeof(*iter)); + iter->state = RIB_TABLES_ITER_S_INIT; +} + +/* + * rib_tables_iter_started + * + * Returns true if this iterator has started iterating over the set of + * tables. + */ +static inline int rib_tables_iter_started(rib_tables_iter_t *iter) +{ + return iter->state != RIB_TABLES_ITER_S_INIT; +} + +/* + * rib_tables_iter_cleanup + */ +static inline void rib_tables_iter_cleanup(rib_tables_iter_t *iter) +{ + iter->state = RIB_TABLES_ITER_S_DONE; +} + +DECLARE_HOOK(rib_update, (struct route_node * rn, const char *reason), + (rn, reason)); +DECLARE_HOOK(rib_shutdown, (struct route_node * rn), (rn)); + +/* + * Access installed/fib nexthops, which may be a subset of the + * rib nexthops. + */ +static inline struct nexthop_group *rib_get_fib_nhg(struct route_entry *re) +{ + /* If the fib set is a subset of the active rib set, + * use the dedicated fib list. + */ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG)) + return &(re->fib_ng); + else + return &(re->nhe->nhg); +} + +/* + * Access backup nexthop-group that represents the installed backup nexthops; + * any installed backup will be on the fib list. + */ +static inline struct nexthop_group *rib_get_fib_backup_nhg( + struct route_entry *re) +{ + return &(re->fib_backup_ng); +} + +extern void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto, + uint8_t instance); + +extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto, + uint8_t instance); + +extern void zebra_vty_init(void); + +extern pid_t pid; + +extern bool v6_rr_semantics; + +extern uint32_t rt_table_main_id; + +/* Name of hook calls */ +#define ZEBRA_ON_RIB_PROCESS_HOOK_CALL "on_rib_process_dplane_results" + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_RIB_H */ diff --git a/zebra/router-id.c b/zebra/router-id.c new file mode 100644 index 0000000..ef87d92 --- /dev/null +++ b/zebra/router-id.c @@ -0,0 +1,607 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Router ID for zebra daemon. + * + * Copyright (C) 2004 James R. Leu + * + * This file is part of Quagga routing suite. + */ + +#include + +#include "if.h" +#include "vty.h" +#include "sockunion.h" +#include "prefix.h" +#include "stream.h" +#include "command.h" +#include "memory.h" +#include "ioctl.h" +#include "connected.h" +#include "network.h" +#include "log.h" +#include "table.h" +#include "rib.h" +#include "vrf.h" + +#include "zebra/zebra_router.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_vrf.h" +#include "zebra/router-id.h" +#include "zebra/redistribute.h" + +static struct connected *router_id_find_node(struct list *l, + struct connected *ifc) +{ + struct listnode *node; + struct connected *c; + + for (ALL_LIST_ELEMENTS_RO(l, node, c)) + if (prefix_same(ifc->address, c->address)) + return c; + + return NULL; +} + +static int router_id_bad_address(struct connected *ifc) +{ + /* non-redistributable addresses shouldn't be used for RIDs either */ + if (!zebra_check_addr(ifc->address)) + return 1; + + return 0; +} + +static bool router_id_v6_is_any(struct prefix *p) +{ + return memcmp(&p->u.prefix6, &in6addr_any, sizeof(struct in6_addr)) + == 0; +} + +int router_id_get(afi_t afi, struct prefix *p, struct zebra_vrf *zvrf) +{ + struct listnode *node; + struct connected *c; + struct in6_addr *addr = NULL; + + switch (afi) { + case AFI_IP: + p->u.prefix4.s_addr = INADDR_ANY; + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + if (zvrf->rid_user_assigned.u.prefix4.s_addr != INADDR_ANY) + p->u.prefix4.s_addr = + zvrf->rid_user_assigned.u.prefix4.s_addr; + else if (!list_isempty(zvrf->rid_lo_sorted_list)) { + node = listtail(zvrf->rid_lo_sorted_list); + c = listgetdata(node); + p->u.prefix4.s_addr = c->address->u.prefix4.s_addr; + } else if (!list_isempty(zvrf->rid_all_sorted_list)) { + node = listtail(zvrf->rid_all_sorted_list); + c = listgetdata(node); + p->u.prefix4.s_addr = c->address->u.prefix4.s_addr; + } + return 0; + case AFI_IP6: + p->u.prefix6 = in6addr_any; + p->family = AF_INET6; + p->prefixlen = IPV6_MAX_BITLEN; + if (!router_id_v6_is_any(&zvrf->rid6_user_assigned)) + addr = &zvrf->rid6_user_assigned.u.prefix6; + else if (!list_isempty(zvrf->rid6_lo_sorted_list)) { + node = listtail(zvrf->rid6_lo_sorted_list); + c = listgetdata(node); + addr = &c->address->u.prefix6; + } else if (!list_isempty(zvrf->rid6_all_sorted_list)) { + node = listtail(zvrf->rid6_all_sorted_list); + c = listgetdata(node); + addr = &c->address->u.prefix6; + } + if (addr) + memcpy(&p->u.prefix6, addr, sizeof(struct in6_addr)); + return 0; + case AFI_UNSPEC: + case AFI_L2VPN: + case AFI_MAX: + return -1; + } + + assert(!"Reached end of function we should never hit"); +} + +static int router_id_set(afi_t afi, struct prefix *p, struct zebra_vrf *zvrf) +{ + struct prefix after, before; + struct listnode *node; + struct zserv *client; + + router_id_get(afi, &before, zvrf); + + switch (afi) { + case AFI_IP: + zvrf->rid_user_assigned.u.prefix4.s_addr = p->u.prefix4.s_addr; + break; + case AFI_IP6: + zvrf->rid6_user_assigned.u.prefix6 = p->u.prefix6; + break; + case AFI_UNSPEC: + case AFI_L2VPN: + case AFI_MAX: + return -1; + } + + router_id_get(afi, &after, zvrf); + + /* + * If we've been told that the router-id is exactly the same + * do we need to really do anything here? + */ + if (prefix_same(&before, &after)) + return 0; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_router_id_update(client, afi, &after, zvrf->vrf->vrf_id); + + return 0; +} + +void router_id_add_address(struct connected *ifc) +{ + struct list *l = NULL; + struct listnode *node; + struct prefix before; + struct prefix after; + struct zserv *client; + struct zebra_vrf *zvrf = ifc->ifp->vrf->info; + afi_t afi; + struct list *rid_lo; + struct list *rid_all; + + if (router_id_bad_address(ifc)) + return; + + switch (ifc->address->family) { + case AF_INET: + afi = AFI_IP; + rid_lo = zvrf->rid_lo_sorted_list; + rid_all = zvrf->rid_all_sorted_list; + break; + case AF_INET6: + afi = AFI_IP6; + rid_lo = zvrf->rid6_lo_sorted_list; + rid_all = zvrf->rid6_all_sorted_list; + break; + default: + return; + } + + router_id_get(afi, &before, zvrf); + + l = if_is_loopback(ifc->ifp) ? rid_lo : rid_all; + + if (!router_id_find_node(l, ifc)) + listnode_add_sort(l, ifc); + + router_id_get(afi, &after, zvrf); + + if (prefix_same(&before, &after)) + return; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_router_id_update(client, afi, &after, zvrf_id(zvrf)); +} + +void router_id_del_address(struct connected *ifc) +{ + struct connected *c; + struct list *l; + struct prefix after; + struct prefix before; + struct listnode *node; + struct zserv *client; + struct zebra_vrf *zvrf = ifc->ifp->vrf->info; + afi_t afi; + struct list *rid_lo; + struct list *rid_all; + + if (router_id_bad_address(ifc)) + return; + + switch (ifc->address->family) { + case AF_INET: + afi = AFI_IP; + rid_lo = zvrf->rid_lo_sorted_list; + rid_all = zvrf->rid_all_sorted_list; + break; + case AF_INET6: + afi = AFI_IP6; + rid_lo = zvrf->rid6_lo_sorted_list; + rid_all = zvrf->rid6_all_sorted_list; + break; + default: + return; + } + + router_id_get(afi, &before, zvrf); + + if (if_is_loopback(ifc->ifp)) + l = rid_lo; + else + l = rid_all; + + if ((c = router_id_find_node(l, ifc))) + listnode_delete(l, c); + + router_id_get(afi, &after, zvrf); + + if (prefix_same(&before, &after)) + return; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_router_id_update(client, afi, &after, zvrf_id(zvrf)); +} + +void router_id_write(struct vty *vty, struct zebra_vrf *zvrf) +{ + char space[2]; + + memset(space, 0, sizeof(space)); + + if (zvrf_id(zvrf) != VRF_DEFAULT) + snprintf(space, sizeof(space), "%s", " "); + + if (zvrf->rid_user_assigned.u.prefix4.s_addr != INADDR_ANY) { + vty_out(vty, "%sip router-id %pI4\n", space, + &zvrf->rid_user_assigned.u.prefix4); + } + if (!router_id_v6_is_any(&zvrf->rid6_user_assigned)) { + vty_out(vty, "%sipv6 router-id %pI6\n", space, + &zvrf->rid_user_assigned.u.prefix6); + } +} + +DEFUN (ip_router_id, + ip_router_id_cmd, + "ip router-id A.B.C.D vrf NAME", + IP_STR + "Manually set the router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id; + struct zebra_vrf *zvrf; + + argv_find(argv, argc, "A.B.C.D", &idx); + + if (!inet_pton(AF_INET, argv[idx]->arg, &rid.u.prefix4)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV4_MAX_BITLEN; + rid.family = AF_INET; + + argv_find(argv, argc, "NAME", &idx); + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (ip_router_id, + router_id_cmd, + "router-id A.B.C.D vrf NAME", + "Manually set the router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR); + +DEFUN (ipv6_router_id, + ipv6_router_id_cmd, + "ipv6 router-id X:X::X:X vrf NAME", + IPV6_STR + "Manually set the router-id\n" + "IPv6 address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id; + struct zebra_vrf *zvrf; + + argv_find(argv, argc, "X:X::X:X", &idx); + + if (!inet_pton(AF_INET6, argv[idx]->arg, &rid.u.prefix6)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV6_MAX_BITLEN; + rid.family = AF_INET6; + + argv_find(argv, argc, "NAME", &idx); + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + + +DEFUN (ip_router_id_in_vrf, + ip_router_id_in_vrf_cmd, + "ip router-id A.B.C.D", + IP_STR + "Manually set the router-id\n" + "IP address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + int idx = 0; + struct prefix rid; + + argv_find(argv, argc, "A.B.C.D", &idx); + + if (!inet_pton(AF_INET, argv[idx]->arg, &rid.u.prefix4)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV4_MAX_BITLEN; + rid.family = AF_INET; + + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (ip_router_id_in_vrf, + router_id_in_vrf_cmd, + "router-id A.B.C.D", + "Manually set the router-id\n" + "IP address to use for router-id\n"); + +DEFUN (ipv6_router_id_in_vrf, + ipv6_router_id_in_vrf_cmd, + "ipv6 router-id X:X::X:X", + IP6_STR + "Manually set the IPv6 router-id\n" + "IPV6 address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + int idx = 0; + struct prefix rid; + + argv_find(argv, argc, "X:X::X:X", &idx); + + if (!inet_pton(AF_INET6, argv[idx]->arg, &rid.u.prefix6)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV6_MAX_BITLEN; + rid.family = AF_INET6; + + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (no_ip_router_id, + no_ip_router_id_cmd, + "no ip router-id [A.B.C.D vrf NAME]", + NO_STR + IP_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id = VRF_DEFAULT; + struct zebra_vrf *zvrf; + + rid.u.prefix4.s_addr = 0; + rid.prefixlen = 0; + rid.family = AF_INET; + + if (argv_find(argv, argc, "NAME", &idx)) + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (no_ip_router_id, + no_router_id_cmd, + "no router-id [A.B.C.D vrf NAME]", + NO_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR); + +DEFUN (no_ipv6_router_id, + no_ipv6_router_id_cmd, + "no ipv6 router-id [X:X::X:X vrf NAME]", + NO_STR + IPV6_STR + "Remove the manually configured IPv6 router-id\n" + "IPv6 address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id = VRF_DEFAULT; + struct zebra_vrf *zvrf; + + memset(&rid, 0, sizeof(rid)); + rid.family = AF_INET; + + if (argv_find(argv, argc, "NAME", &idx)) + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (no_ip_router_id_in_vrf, + no_ip_router_id_in_vrf_cmd, + "no ip router-id [A.B.C.D]", + NO_STR + IP_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + struct prefix rid; + + rid.u.prefix4.s_addr = 0; + rid.prefixlen = 0; + rid.family = AF_INET; + + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (no_ip_router_id_in_vrf, + no_router_id_in_vrf_cmd, + "no router-id [A.B.C.D]", + NO_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n"); + +DEFUN (no_ipv6_router_id_in_vrf, + no_ipv6_router_id_in_vrf_cmd, + "no ipv6 router-id [X:X::X:X]", + NO_STR + IP6_STR + "Remove the manually configured IPv6 router-id\n" + "IPv6 address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + struct prefix rid; + + memset(&rid, 0, sizeof(rid)); + rid.family = AF_INET; + + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (show_ip_router_id, + show_ip_router_id_cmd, + "show [ip|ipv6] router-id [vrf NAME]", + SHOW_STR + IP_STR + IPV6_STR + "Show the configured router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + vrf_id_t vrf_id = VRF_DEFAULT; + struct zebra_vrf *zvrf; + const char *vrf_name = "default"; + char addr_name[INET6_ADDRSTRLEN]; + int is_ipv6 = 0; + + is_ipv6 = argv_find(argv, argc, "ipv6", &idx); + + if (argv_find(argv, argc, "NAME", &idx)) { + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + vrf_name = argv[idx]->arg; + } + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + + if (zvrf != NULL) { + if (is_ipv6) { + if (router_id_v6_is_any(&zvrf->rid6_user_assigned)) + return CMD_SUCCESS; + inet_ntop(AF_INET6, &zvrf->rid6_user_assigned.u.prefix6, + addr_name, sizeof(addr_name)); + } else { + if (zvrf->rid_user_assigned.u.prefix4.s_addr + == INADDR_ANY) + return CMD_SUCCESS; + inet_ntop(AF_INET, &zvrf->rid_user_assigned.u.prefix4, + addr_name, sizeof(addr_name)); + } + + vty_out(vty, "zebra:\n"); + vty_out(vty, " router-id %s vrf %s\n", addr_name, vrf_name); + } + + return CMD_SUCCESS; +} + +static int router_id_cmp(void *a, void *b) +{ + const struct connected *ifa = (const struct connected *)a; + const struct connected *ifb = (const struct connected *)b; + + return IPV4_ADDR_CMP(&ifa->address->u.prefix4.s_addr, + &ifb->address->u.prefix4.s_addr); +} + +static int router_id_v6_cmp(void *a, void *b) +{ + const struct connected *ifa = (const struct connected *)a; + const struct connected *ifb = (const struct connected *)b; + + return IPV6_ADDR_CMP(&ifa->address->u.prefix6, + &ifb->address->u.prefix6); +} + +void router_id_cmd_init(void) +{ + install_element(CONFIG_NODE, &ip_router_id_cmd); + install_element(CONFIG_NODE, &router_id_cmd); + install_element(CONFIG_NODE, &ipv6_router_id_cmd); + install_element(CONFIG_NODE, &no_ip_router_id_cmd); + install_element(CONFIG_NODE, &no_router_id_cmd); + install_element(CONFIG_NODE, &ip_router_id_in_vrf_cmd); + install_element(VRF_NODE, &ip_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &router_id_in_vrf_cmd); + install_element(VRF_NODE, &router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &ipv6_router_id_in_vrf_cmd); + install_element(VRF_NODE, &ipv6_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &no_ipv6_router_id_cmd); + install_element(CONFIG_NODE, &no_ip_router_id_in_vrf_cmd); + install_element(VRF_NODE, &no_ip_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &no_router_id_in_vrf_cmd); + install_element(VRF_NODE, &no_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &no_ipv6_router_id_in_vrf_cmd); + install_element(VRF_NODE, &no_ipv6_router_id_in_vrf_cmd); + install_element(VIEW_NODE, &show_ip_router_id_cmd); +} + +void router_id_init(struct zebra_vrf *zvrf) +{ + zvrf->rid_all_sorted_list = &zvrf->_rid_all_sorted_list; + zvrf->rid_lo_sorted_list = &zvrf->_rid_lo_sorted_list; + zvrf->rid6_all_sorted_list = &zvrf->_rid6_all_sorted_list; + zvrf->rid6_lo_sorted_list = &zvrf->_rid6_lo_sorted_list; + + memset(zvrf->rid_all_sorted_list, 0, + sizeof(zvrf->_rid_all_sorted_list)); + memset(zvrf->rid_lo_sorted_list, 0, sizeof(zvrf->_rid_lo_sorted_list)); + memset(&zvrf->rid_user_assigned, 0, sizeof(zvrf->rid_user_assigned)); + memset(zvrf->rid6_all_sorted_list, 0, + sizeof(zvrf->_rid6_all_sorted_list)); + memset(zvrf->rid6_lo_sorted_list, 0, + sizeof(zvrf->_rid6_lo_sorted_list)); + memset(&zvrf->rid6_user_assigned, 0, sizeof(zvrf->rid6_user_assigned)); + + zvrf->rid_all_sorted_list->cmp = router_id_cmp; + zvrf->rid_lo_sorted_list->cmp = router_id_cmp; + zvrf->rid6_all_sorted_list->cmp = router_id_v6_cmp; + zvrf->rid6_lo_sorted_list->cmp = router_id_v6_cmp; + + zvrf->rid_user_assigned.family = AF_INET; + zvrf->rid_user_assigned.prefixlen = IPV4_MAX_BITLEN; + zvrf->rid6_user_assigned.family = AF_INET6; + zvrf->rid6_user_assigned.prefixlen = IPV6_MAX_BITLEN; +} diff --git a/zebra/router-id.h b/zebra/router-id.h new file mode 100644 index 0000000..45860d8 --- /dev/null +++ b/zebra/router-id.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Router ID for zebra daemon. + * + * Copyright (C) 2004 James R. Leu + * + * This file is part of Quagga routing suite. + */ + +#ifndef _ROUTER_ID_H_ +#define _ROUTER_ID_H_ + +#include + +#include "memory.h" +#include "prefix.h" +#include "zclient.h" +#include "if.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void router_id_add_address(struct connected *c); +extern void router_id_del_address(struct connected *c); +extern void router_id_init(struct zebra_vrf *zvrf); +extern void router_id_cmd_init(void); +extern void router_id_write(struct vty *vty, struct zebra_vrf *zvrf); +extern int router_id_get(afi_t afi, struct prefix *p, struct zebra_vrf *zvrf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/rt.h b/zebra/rt.h new file mode 100644 index 0000000..af170a2 --- /dev/null +++ b/zebra/rt.h @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * kernel routing table update prototype. + * Copyright (C) 1998 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_RT_H +#define _ZEBRA_RT_H + +#include "prefix.h" +#include "if.h" +#include "vlan.h" +#include "vxlan.h" +#include "zebra/rib.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_dplane.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ROUTE_INSTALLATION_METRIC 20 + +#define RKERNEL_ROUTE(type) ((type) == ZEBRA_ROUTE_KERNEL) + +#define RSYSTEM_ROUTE(type) \ + ((RKERNEL_ROUTE(type)) || (type) == ZEBRA_ROUTE_CONNECT) + +#ifndef HAVE_NETLINK +/* + * Update or delete a route, nexthop, LSP, pseudowire, or vxlan MAC from the + * kernel, using info from a dataplane context. + */ +extern enum zebra_dplane_result kernel_route_update( + struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_nexthop_update(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result kernel_lsp_update( + struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_pw_update(struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_pbr_rule_update(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_intf_update(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx); +extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx); + +#endif /* !HAVE_NETLINK */ + +extern int kernel_neigh_update(int cmd, int ifindex, void *addr, char *lla, + int llalen, ns_id_t ns_id, uint8_t family, + bool permanent); +extern int kernel_neigh_register(vrf_id_t vrf_id, struct zserv *client, + bool reg); +extern int kernel_interface_set_master(struct interface *master, + struct interface *slave); + +extern int mpls_kernel_init(void); + +/* Global init and deinit for platform-/OS-specific things */ +void kernel_router_init(void); +void kernel_router_terminate(void); + +extern uint32_t kernel_get_speed(struct interface *ifp, int *error); +extern int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *mroute); + +/* + * Southbound Initialization routines to get initial starting + * state. + */ +extern void interface_list(struct zebra_ns *zns); +extern void interface_list_tunneldump(struct zebra_ns *zns); +extern void interface_list_second(struct zebra_ns *zns); +extern void kernel_init(struct zebra_ns *zns); +extern void kernel_terminate(struct zebra_ns *zns, bool complete); +extern void macfdb_read(struct zebra_ns *zns); +extern void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if, vlanid_t vid); +extern void macfdb_read_mcast_entry_for_vni(struct zebra_ns *zns, + struct interface *ifp, vni_t vni); +extern void macfdb_read_specific_mac(struct zebra_ns *zns, + struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid); +extern void neigh_read(struct zebra_ns *zns); +extern void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *ifp); +extern void neigh_read_specific_ip(const struct ipaddr *ip, + struct interface *vlan_if); +extern void route_read(struct zebra_ns *zns); +extern int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip); +extern int kernel_del_mac_nh(uint32_t nh_id); +extern int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids); +extern int kernel_del_mac_nhg(uint32_t nhg_id); + +/* + * Message batching interface. + */ +extern void kernel_update_multi(struct dplane_ctx_list_head *ctx_list); + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info); +extern void vlan_read(struct zebra_ns *zns); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_RT_H */ diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c new file mode 100644 index 0000000..b8362bb --- /dev/null +++ b/zebra/rt_netlink.c @@ -0,0 +1,5133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Kernel routing table updates using netlink over GNU/Linux system. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + */ + +#include + +#ifdef HAVE_NETLINK + +/* The following definition is to workaround an issue in the Linux kernel + * header files with redefinition of 'struct in6_addr' in both + * netinet/in.h and linux/in6.h. + * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html + */ +#define _LINUX_IN6_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Hack for GNU libc version 2. */ +#ifndef MSG_TRUNC +#define MSG_TRUNC 0x20 +#endif /* MSG_TRUNC */ + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "plist.h" +#include "plist_int.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "rib.h" +#include "frrevent.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "vty.h" +#include "mpls.h" +#include "vxlan.h" +#include "printfrr.h" + +#include "zebra/zapi_msg.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_mpls.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_nhg.h" +#include "zebra/zebra_mroute.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_trace.h" +#include "zebra/zebra_neigh.h" +#include "lib/srv6.h" + +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif + +/* Re-defining as I am unable to include which has the + * UAPI for MAC sync. */ +#ifndef _UAPI_LINUX_IF_BRIDGE_H +#define BR_SPH_LIST_SIZE 10 +#endif + +DEFINE_MTYPE_STATIC(LIB, NH_SRV6, "Nexthop srv6"); + +static vlanid_t filter_vlan = 0; + +/* We capture whether the current kernel supports nexthop ids; by + * default, we'll use them if possible. There's also a configuration + * available to _disable_ use of kernel nexthops. + */ +static bool supports_nh; + +struct gw_family_t { + uint16_t filler; + uint16_t family; + union g_addr gate; +}; + +static const char ipv4_ll_buf[16] = "169.254.0.1"; +static struct in_addr ipv4_ll; + +/* Is this a ipv4 over ipv6 route? */ +static bool is_route_v4_over_v6(unsigned char rtm_family, + enum nexthop_types_t nexthop_type) +{ + if (rtm_family == AF_INET + && (nexthop_type == NEXTHOP_TYPE_IPV6 + || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX)) + return true; + + return false; +} + +/* Helper to control use of kernel-level nexthop ids */ +static bool kernel_nexthops_supported(void) +{ + return (supports_nh && !vrf_is_backend_netns() + && zebra_nhg_kernel_nexthops_enabled()); +} + +/* + * Some people may only want to use NHGs created by protos and not + * implicitly created by Zebra. This check accounts for that. + */ +static bool proto_nexthops_only(void) +{ + return zebra_nhg_proto_nexthops_only(); +} + +/* Is this a proto created NHG? */ +static bool is_proto_nhg(uint32_t id, int type) +{ + /* If type is available, use it as the source of truth */ + if (type) { + if (type != ZEBRA_ROUTE_NHG) + return true; + return false; + } + + if (id >= ZEBRA_NHG_PROTO_LOWER) + return true; + + return false; +} + +/* Is vni mcast group */ +static bool is_mac_vni_mcast_group(struct ethaddr *mac, vni_t vni, + struct in_addr grp_addr) +{ + if (!vni) + return false; + + if (!is_zero_mac(mac)) + return false; + + if (!IN_MULTICAST(ntohl(grp_addr.s_addr))) + return false; + + return true; +} + +/* + * The ipv4_ll data structure is used for all 5549 + * additions to the kernel. Let's figure out the + * correct value one time instead for every + * install/remove of a 5549 type route + */ +void rt_netlink_init(void) +{ + inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll); +} + +/* + * Mapping from dataplane neighbor flags to netlink flags + */ +static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags) +{ + uint8_t flags = 0; + + if (dplane_flags & DPLANE_NTF_EXT_LEARNED) + flags |= NTF_EXT_LEARNED; + if (dplane_flags & DPLANE_NTF_ROUTER) + flags |= NTF_ROUTER; + if (dplane_flags & DPLANE_NTF_USE) + flags |= NTF_USE; + + return flags; +} + +/* + * Mapping from dataplane neighbor state to netlink state + */ +static uint16_t neigh_state_to_netlink(uint16_t dplane_state) +{ + uint16_t state = 0; + + if (dplane_state & DPLANE_NUD_REACHABLE) + state |= NUD_REACHABLE; + if (dplane_state & DPLANE_NUD_STALE) + state |= NUD_STALE; + if (dplane_state & DPLANE_NUD_NOARP) + state |= NUD_NOARP; + if (dplane_state & DPLANE_NUD_PROBE) + state |= NUD_PROBE; + if (dplane_state & DPLANE_NUD_INCOMPLETE) + state |= NUD_INCOMPLETE; + if (dplane_state & DPLANE_NUD_PERMANENT) + state |= NUD_PERMANENT; + if (dplane_state & DPLANE_NUD_FAILED) + state |= NUD_FAILED; + + return state; +} + + +static inline bool is_selfroute(int proto) +{ + if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF) + || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA) + || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG) + || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP) + || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL) + || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP) + || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC) + || (proto == RTPROT_SRTE)) { + return true; + } + + return false; +} + +int zebra2proto(int proto) +{ + switch (proto) { + case ZEBRA_ROUTE_BABEL: + proto = RTPROT_BABEL; + break; + case ZEBRA_ROUTE_BGP: + proto = RTPROT_BGP; + break; + case ZEBRA_ROUTE_OSPF: + case ZEBRA_ROUTE_OSPF6: + proto = RTPROT_OSPF; + break; + case ZEBRA_ROUTE_STATIC: + proto = RTPROT_ZSTATIC; + break; + case ZEBRA_ROUTE_ISIS: + proto = RTPROT_ISIS; + break; + case ZEBRA_ROUTE_RIP: + proto = RTPROT_RIP; + break; + case ZEBRA_ROUTE_RIPNG: + proto = RTPROT_RIPNG; + break; + case ZEBRA_ROUTE_NHRP: + proto = RTPROT_NHRP; + break; + case ZEBRA_ROUTE_EIGRP: + proto = RTPROT_EIGRP; + break; + case ZEBRA_ROUTE_LDP: + proto = RTPROT_LDP; + break; + case ZEBRA_ROUTE_SHARP: + proto = RTPROT_SHARP; + break; + case ZEBRA_ROUTE_PBR: + proto = RTPROT_PBR; + break; + case ZEBRA_ROUTE_OPENFABRIC: + proto = RTPROT_OPENFABRIC; + break; + case ZEBRA_ROUTE_SRTE: + proto = RTPROT_SRTE; + break; + case ZEBRA_ROUTE_TABLE: + case ZEBRA_ROUTE_NHG: + proto = RTPROT_ZEBRA; + break; + case ZEBRA_ROUTE_CONNECT: + case ZEBRA_ROUTE_KERNEL: + proto = RTPROT_KERNEL; + break; + default: + /* + * When a user adds a new protocol this will show up + * to let them know to do something about it. This + * is intentionally a warn because we should see + * this as part of development of a new protocol + */ + zlog_debug( + "%s: Please add this protocol(%d) to proper rt_netlink.c handling", + __func__, proto); + proto = RTPROT_ZEBRA; + break; + } + + return proto; +} + +static inline int proto2zebra(int proto, int family, bool is_nexthop) +{ + switch (proto) { + case RTPROT_BABEL: + proto = ZEBRA_ROUTE_BABEL; + break; + case RTPROT_BGP: + proto = ZEBRA_ROUTE_BGP; + break; + case RTPROT_OSPF: + proto = (family == AF_INET) ? ZEBRA_ROUTE_OSPF + : ZEBRA_ROUTE_OSPF6; + break; + case RTPROT_ISIS: + proto = ZEBRA_ROUTE_ISIS; + break; + case RTPROT_RIP: + proto = ZEBRA_ROUTE_RIP; + break; + case RTPROT_RIPNG: + proto = ZEBRA_ROUTE_RIPNG; + break; + case RTPROT_NHRP: + proto = ZEBRA_ROUTE_NHRP; + break; + case RTPROT_EIGRP: + proto = ZEBRA_ROUTE_EIGRP; + break; + case RTPROT_LDP: + proto = ZEBRA_ROUTE_LDP; + break; + case RTPROT_STATIC: + case RTPROT_ZSTATIC: + proto = ZEBRA_ROUTE_STATIC; + break; + case RTPROT_SHARP: + proto = ZEBRA_ROUTE_SHARP; + break; + case RTPROT_PBR: + proto = ZEBRA_ROUTE_PBR; + break; + case RTPROT_OPENFABRIC: + proto = ZEBRA_ROUTE_OPENFABRIC; + break; + case RTPROT_SRTE: + proto = ZEBRA_ROUTE_SRTE; + break; + case RTPROT_UNSPEC: + case RTPROT_REDIRECT: + case RTPROT_KERNEL: + case RTPROT_BOOT: + case RTPROT_GATED: + case RTPROT_RA: + case RTPROT_MRT: + case RTPROT_BIRD: + case RTPROT_DNROUTED: + case RTPROT_XORP: + case RTPROT_NTK: + case RTPROT_MROUTED: + case RTPROT_KEEPALIVED: + case RTPROT_OPENR: + proto = ZEBRA_ROUTE_KERNEL; + break; + case RTPROT_ZEBRA: + if (is_nexthop) { + proto = ZEBRA_ROUTE_NHG; + break; + } + /* Intentional fall thru */ + default: + /* + * When a user adds a new protocol this will show up + * to let them know to do something about it. This + * is intentionally a warn because we should see + * this as part of development of a new protocol + */ + zlog_debug( + "%s: Please add this protocol(%d) to proper rt_netlink.c handling", + __func__, proto); + proto = ZEBRA_ROUTE_KERNEL; + break; + } + return proto; +} + +/** + * @parse_encap_mpls() - Parses encapsulated mpls attributes + * @tb: Pointer to rtattr to look for nested items in. + * @labels: Pointer to store labels in. + * + * Return: Number of mpls labels found. + */ +static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels) +{ + struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0}; + mpls_lse_t *lses = NULL; + int num_labels = 0; + uint32_t ttl = 0; + uint32_t bos = 0; + uint32_t exp = 0; + mpls_label_t label = 0; + + netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb); + lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]); + while (!bos && num_labels < MPLS_MAX_LABELS) { + mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos); + labels[num_labels++] = label; + } + + return num_labels; +} + +/** + * @parse_encap_seg6local_flavors() - Parses encapsulated SRv6 flavors + * attributes + * @tb: Pointer to rtattr to look for nested items in. + * @flv: Pointer to store SRv6 flavors info in. + * + * Return: 0 on success, non-zero on error + */ +static int parse_encap_seg6local_flavors(struct rtattr *tb, + struct seg6local_flavors_info *flv) +{ + struct rtattr *tb_encap[SEG6_LOCAL_FLV_MAX + 1] = {}; + + netlink_parse_rtattr_nested(tb_encap, SEG6_LOCAL_FLV_MAX, tb); + + if (tb_encap[SEG6_LOCAL_FLV_OPERATION]) + flv->flv_ops = *(uint32_t *)RTA_DATA( + tb_encap[SEG6_LOCAL_FLV_OPERATION]); + + if (tb_encap[SEG6_LOCAL_FLV_LCBLOCK_BITS]) + flv->lcblock_len = *(uint8_t *)RTA_DATA( + tb_encap[SEG6_LOCAL_FLV_LCBLOCK_BITS]); + + if (tb_encap[SEG6_LOCAL_FLV_LCNODE_FN_BITS]) + flv->lcnode_func_len = *(uint8_t *)RTA_DATA( + tb_encap[SEG6_LOCAL_FLV_LCNODE_FN_BITS]); + + return 0; +} + +static enum seg6local_action_t +parse_encap_seg6local(struct rtattr *tb, + struct seg6local_context *ctx) +{ + struct rtattr *tb_encap[SEG6_LOCAL_MAX + 1] = {}; + enum seg6local_action_t act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC; + + netlink_parse_rtattr_nested(tb_encap, SEG6_LOCAL_MAX, tb); + + if (tb_encap[SEG6_LOCAL_ACTION]) + act = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_ACTION]); + + if (tb_encap[SEG6_LOCAL_NH4]) + ctx->nh4 = *(struct in_addr *)RTA_DATA( + tb_encap[SEG6_LOCAL_NH4]); + + if (tb_encap[SEG6_LOCAL_NH6]) + ctx->nh6 = *(struct in6_addr *)RTA_DATA( + tb_encap[SEG6_LOCAL_NH6]); + + if (tb_encap[SEG6_LOCAL_TABLE]) + ctx->table = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_TABLE]); + + if (tb_encap[SEG6_LOCAL_VRFTABLE]) + ctx->table = + *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_VRFTABLE]); + + if (tb_encap[SEG6_LOCAL_FLAVORS]) { + parse_encap_seg6local_flavors(tb_encap[SEG6_LOCAL_FLAVORS], + &ctx->flv); + } + + return act; +} + +static int parse_encap_seg6(struct rtattr *tb, struct in6_addr *segs) +{ + struct rtattr *tb_encap[SEG6_IPTUNNEL_MAX + 1] = {}; + struct seg6_iptunnel_encap *ipt = NULL; + int i; + + netlink_parse_rtattr_nested(tb_encap, SEG6_IPTUNNEL_MAX, tb); + + if (tb_encap[SEG6_IPTUNNEL_SRH]) { + ipt = (struct seg6_iptunnel_encap *) + RTA_DATA(tb_encap[SEG6_IPTUNNEL_SRH]); + + for (i = ipt->srh[0].first_segment; i >= 0; i--) + memcpy(&segs[i], &ipt->srh[0].segments[i], + sizeof(struct in6_addr)); + + return ipt->srh[0].first_segment + 1; + } + + return 0; +} + + +static struct nexthop +parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb, + enum blackhole_type bh_type, int index, void *prefsrc, + void *gate, afi_t afi, vrf_id_t vrf_id) +{ + struct interface *ifp = NULL; + struct nexthop nh = {0}; + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC; + struct seg6local_context seg6l_ctx = {}; + struct in6_addr segs[SRV6_MAX_SIDS] = {}; + int num_segs = 0; + + vrf_id_t nh_vrf_id = vrf_id; + size_t sz = (afi == AFI_IP) ? 4 : 16; + + if (bh_type == BLACKHOLE_UNSPEC) { + if (index && !gate) + nh.type = NEXTHOP_TYPE_IFINDEX; + else if (index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX + : NEXTHOP_TYPE_IPV6_IFINDEX; + else if (!index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4 + : NEXTHOP_TYPE_IPV6; + else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + } else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + nh.ifindex = index; + if (prefsrc) + memcpy(&nh.src, prefsrc, sz); + if (gate) + memcpy(&nh.gate, gate, sz); + + if (index) { + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index); + if (ifp) + nh_vrf_id = ifp->vrf->vrf_id; + } + nh.vrf_id = nh_vrf_id; + + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels); + } + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6_LOCAL) { + seg6l_act = parse_encap_seg6local(tb[RTA_ENCAP], &seg6l_ctx); + } + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6) { + num_segs = parse_encap_seg6(tb[RTA_ENCAP], segs); + } + + if (rtm->rtm_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + + if (rtm->rtm_flags & RTNH_F_LINKDOWN) + SET_FLAG(nh.flags, NEXTHOP_FLAG_LINKDOWN); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels); + + /* Resolve default values for SRv6 flavors */ + if (seg6l_ctx.flv.flv_ops != ZEBRA_SEG6_LOCAL_FLV_OP_UNSPEC) { + if (seg6l_ctx.flv.lcblock_len == 0) + seg6l_ctx.flv.lcblock_len = + ZEBRA_DEFAULT_SEG6_LOCAL_FLV_LCBLOCK_LEN; + if (seg6l_ctx.flv.lcnode_func_len == 0) + seg6l_ctx.flv.lcnode_func_len = + ZEBRA_DEFAULT_SEG6_LOCAL_FLV_LCNODE_FN_LEN; + } + + if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) + nexthop_add_srv6_seg6local(&nh, seg6l_act, &seg6l_ctx); + + if (num_segs) + nexthop_add_srv6_seg6(&nh, segs, num_segs); + + return nh; +} + +static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id, + struct nexthop_group *ng, + struct rtmsg *rtm, + struct rtnexthop *rtnh, + struct rtattr **tb, + void *prefsrc, vrf_id_t vrf_id) +{ + void *gate = NULL; + struct interface *ifp = NULL; + int index = 0; + /* MPLS labels */ + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC; + struct seg6local_context seg6l_ctx = {}; + struct in6_addr segs[SRV6_MAX_SIDS] = {}; + int num_segs = 0; + struct rtattr *rtnh_tb[RTA_MAX + 1] = {}; + + int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + vrf_id_t nh_vrf_id = vrf_id; + + for (;;) { + struct nexthop *nh = NULL; + + if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len) + break; + + index = rtnh->rtnh_ifindex; + if (index) { + /* + * Yes we are looking this up + * for every nexthop and just + * using the last one looked + * up right now + */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + index); + if (ifp) + nh_vrf_id = ifp->vrf->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT", + __func__, index); + nh_vrf_id = VRF_DEFAULT; + } + } else + nh_vrf_id = vrf_id; + + if (rtnh->rtnh_len > sizeof(*rtnh)) { + netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh), + rtnh->rtnh_len - sizeof(*rtnh)); + if (rtnh_tb[RTA_GATEWAY]) + gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]); + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls( + rtnh_tb[RTA_ENCAP], labels); + } + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6_LOCAL) { + seg6l_act = parse_encap_seg6local( + rtnh_tb[RTA_ENCAP], &seg6l_ctx); + } + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6) { + num_segs = parse_encap_seg6(rtnh_tb[RTA_ENCAP], + segs); + } + } + + if (gate && rtm->rtm_family == AF_INET) { + if (index) + nh = nexthop_from_ipv4_ifindex( + gate, prefsrc, index, nh_vrf_id); + else + nh = nexthop_from_ipv4(gate, prefsrc, + nh_vrf_id); + } else if (gate && rtm->rtm_family == AF_INET6) { + if (index) + nh = nexthop_from_ipv6_ifindex( + gate, index, nh_vrf_id); + else + nh = nexthop_from_ipv6(gate, nh_vrf_id); + } else + nh = nexthop_from_ifindex(index, nh_vrf_id); + + if (nh) { + nh->weight = rtnh->rtnh_hops + 1; + + if (num_labels) + nexthop_add_labels(nh, ZEBRA_LSP_STATIC, + num_labels, labels); + + /* Resolve default values for SRv6 flavors */ + if (seg6l_ctx.flv.flv_ops != + ZEBRA_SEG6_LOCAL_FLV_OP_UNSPEC) { + if (seg6l_ctx.flv.lcblock_len == 0) + seg6l_ctx.flv.lcblock_len = + ZEBRA_DEFAULT_SEG6_LOCAL_FLV_LCBLOCK_LEN; + if (seg6l_ctx.flv.lcnode_func_len == 0) + seg6l_ctx.flv.lcnode_func_len = + ZEBRA_DEFAULT_SEG6_LOCAL_FLV_LCNODE_FN_LEN; + } + + if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) + nexthop_add_srv6_seg6local(nh, seg6l_act, + &seg6l_ctx); + + if (num_segs) + nexthop_add_srv6_seg6(nh, segs, num_segs); + + if (rtnh->rtnh_flags & RTNH_F_ONLINK) + SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK); + + /* Add to temporary list */ + nexthop_group_add_sorted(ng, nh); + } + + if (rtnh->rtnh_len == 0) + break; + + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + + uint8_t nhop_num = nexthop_group_nexthop_num(ng); + + return nhop_num; +} + +/* Looking up routing table by netlink interface. */ +int netlink_route_change_read_unicast_internal(struct nlmsghdr *h, + ns_id_t ns_id, int startup, + struct zebra_dplane_ctx *ctx) +{ + int len; + struct rtmsg *rtm; + struct rtattr *tb[RTA_MAX + 1]; + uint32_t flags = 0; + struct prefix p; + struct prefix_ipv6 src_p = {}; + vrf_id_t vrf_id; + bool selfroute; + + char anyaddr[16] = {0}; + + int proto = ZEBRA_ROUTE_KERNEL; + int index = 0; + int table; + int metric = 0; + uint32_t mtu = 0; + uint8_t distance = 0; + route_tag_t tag = 0; + uint32_t nhe_id = 0; + + void *dest = NULL; + void *gate = NULL; + void *prefsrc = NULL; /* IPv4 preferred source host address */ + void *src = NULL; /* IPv6 srcdest source prefix */ + enum blackhole_type bh_type = BLACKHOLE_UNSPEC; + + frrtrace(3, frr_zebra, netlink_route_change_read_unicast, h, ns_id, + startup); + + rtm = NLMSG_DATA(h); + + if (startup && h->nlmsg_type != RTM_NEWROUTE) + return 0; + switch (rtm->rtm_type) { + case RTN_UNICAST: + break; + case RTN_BLACKHOLE: + bh_type = BLACKHOLE_NULL; + break; + case RTN_UNREACHABLE: + bh_type = BLACKHOLE_REJECT; + break; + case RTN_PROHIBIT: + bh_type = BLACKHOLE_ADMINPROHIB; + break; + default: + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Route rtm_type: %s(%d) intentionally ignoring", + nl_rttype_to_str(rtm->rtm_type), + rtm->rtm_type); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct rtmsg))); + return -1; + } + + netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len); + + if (rtm->rtm_flags & RTM_F_CLONED) + return 0; + if (rtm->rtm_protocol == RTPROT_REDIRECT) + return 0; + if (rtm->rtm_protocol == RTPROT_KERNEL) + return 0; + + selfroute = is_selfroute(rtm->rtm_protocol); + + if (!startup && selfroute && h->nlmsg_type == RTM_NEWROUTE && + !zrouter.asic_offloaded && !ctx) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Route type: %d Received that we think we have originated, ignoring", + rtm->rtm_protocol); + return 0; + } + + /* We don't care about change notifications for the MPLS table. */ + /* TODO: Revisit this. */ + if (rtm->rtm_family == AF_MPLS) + return 0; + + /* Table corresponding to route. */ + if (tb[RTA_TABLE]) + table = *(int *)RTA_DATA(tb[RTA_TABLE]); + else + table = rtm->rtm_table; + + /* Map to VRF */ + vrf_id = zebra_vrf_lookup_by_table(table, ns_id); + if (vrf_id == VRF_DEFAULT) { + if (!is_zebra_valid_kernel_table(table) + && !is_zebra_main_routing_table(table)) + return 0; + } + + if (rtm->rtm_flags & RTM_F_TRAP) + flags |= ZEBRA_FLAG_TRAPPED; + if (rtm->rtm_flags & RTM_F_OFFLOAD) + flags |= ZEBRA_FLAG_OFFLOADED; + if (rtm->rtm_flags & RTM_F_OFFLOAD_FAILED) + flags |= ZEBRA_FLAG_OFFLOAD_FAILED; + + if (h->nlmsg_flags & NLM_F_APPEND) + flags |= ZEBRA_FLAG_OUTOFSYNC; + + /* Route which inserted by Zebra. */ + if (selfroute) { + flags |= ZEBRA_FLAG_SELFROUTE; + proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false); + } + if (tb[RTA_OIF]) + index = *(int *)RTA_DATA(tb[RTA_OIF]); + + if (tb[RTA_DST]) + dest = RTA_DATA(tb[RTA_DST]); + else + dest = anyaddr; + + if (tb[RTA_SRC]) + src = RTA_DATA(tb[RTA_SRC]); + else + src = anyaddr; + + if (tb[RTA_PREFSRC]) + prefsrc = RTA_DATA(tb[RTA_PREFSRC]); + + if (tb[RTA_GATEWAY]) + gate = RTA_DATA(tb[RTA_GATEWAY]); + + if (tb[RTA_NH_ID]) + nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]); + + if (tb[RTA_PRIORITY]) + metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]); + +#if defined(SUPPORT_REALMS) + if (tb[RTA_FLOW]) + tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]); +#endif + + if (tb[RTA_METRICS]) { + struct rtattr *mxrta[RTAX_MAX + 1]; + + netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]), + RTA_PAYLOAD(tb[RTA_METRICS])); + + if (mxrta[RTAX_MTU]) + mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]); + } + + if (rtm->rtm_family == AF_INET) { + p.family = AF_INET; + if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) { + zlog_err( + "Invalid destination prefix length: %u received from kernel route change", + rtm->rtm_dst_len); + return -1; + } + memcpy(&p.u.prefix4, dest, 4); + p.prefixlen = rtm->rtm_dst_len; + + if (rtm->rtm_src_len != 0) { + flog_warn( + EC_ZEBRA_UNSUPPORTED_V4_SRCDEST, + "unsupported IPv4 sourcedest route (dest %pFX vrf %u)", + &p, vrf_id); + return 0; + } + + /* Force debug below to not display anything for source */ + src_p.prefixlen = 0; + } else if (rtm->rtm_family == AF_INET6) { + p.family = AF_INET6; + if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) { + zlog_err( + "Invalid destination prefix length: %u received from kernel route change", + rtm->rtm_dst_len); + return -1; + } + memcpy(&p.u.prefix6, dest, 16); + p.prefixlen = rtm->rtm_dst_len; + + src_p.family = AF_INET6; + if (rtm->rtm_src_len > IPV6_MAX_BITLEN) { + zlog_err( + "Invalid source prefix length: %u received from kernel route change", + rtm->rtm_src_len); + return -1; + } + memcpy(&src_p.prefix, src, 16); + src_p.prefixlen = rtm->rtm_src_len; + } else { + /* We only handle the AFs we handle... */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: unknown address-family %u", __func__, + rtm->rtm_family); + return 0; + } + + /* + * For ZEBRA_ROUTE_KERNEL types: + * + * The metric/priority of the route received from the kernel + * is a 32 bit number. We are going to interpret the high + * order byte as the Admin Distance and the low order 3 bytes + * as the metric. + * + * This will allow us to do two things: + * 1) Allow the creation of kernel routes that can be + * overridden by zebra. + * 2) Allow the old behavior for 'most' kernel route types + * if a user enters 'ip route ...' v4 routes get a metric + * of 0 and v6 routes get a metric of 1024. Both of these + * values will end up with a admin distance of 0, which + * will cause them to win for the purposes of zebra. + */ + if (proto == ZEBRA_ROUTE_KERNEL) { + distance = (metric >> 24) & 0xFF; + metric = (metric & 0x00FFFFFF); + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + char buf2[PREFIX_STRLEN]; + + zlog_debug( + "%s %pFX%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d", + nl_msg_type_to_str(h->nlmsg_type), &p, + src_p.prefixlen ? " from " : "", + src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2)) + : "", + vrf_id_to_name(vrf_id), vrf_id, table, metric, + distance); + } + + afi_t afi = AFI_IP; + if (rtm->rtm_family == AF_INET6) + afi = AFI_IP6; + + if (h->nlmsg_type == RTM_NEWROUTE) { + struct route_entry *re; + struct nexthop_group *ng = NULL; + + re = zebra_rib_route_entry_new(vrf_id, proto, 0, flags, nhe_id, + table, metric, mtu, distance, + tag); + if (!nhe_id) + ng = nexthop_group_new(); + + if (!tb[RTA_MULTIPATH]) { + struct nexthop *nexthop, nh; + + if (!nhe_id) { + nh = parse_nexthop_unicast( + ns_id, rtm, tb, bh_type, index, prefsrc, + gate, afi, vrf_id); + + nexthop = nexthop_new(); + *nexthop = nh; + nexthop_group_add_sorted(ng, nexthop); + } + } else { + /* This is a multipath route */ + struct rtnexthop *rtnh = + (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]); + + if (!nhe_id) { + uint8_t nhop_num; + + /* Use temporary list of nexthops; parse + * message payload's nexthops. + */ + nhop_num = + parse_multipath_nexthops_unicast( + ns_id, ng, rtm, rtnh, tb, + prefsrc, vrf_id); + + zserv_nexthop_num_warn( + __func__, (const struct prefix *)&p, + nhop_num); + + if (nhop_num == 0) { + nexthop_group_delete(&ng); + ng = NULL; + } + } + } + if (nhe_id || ng) { + dplane_rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p, + re, ng, startup, ctx); + if (ng) + nexthop_group_delete(&ng); + } else { + /* + * I really don't see how this is possible + * but since we are testing for it let's + * let the end user know why the route + * that was just received was swallowed + * up and forgotten + */ + zlog_err( + "%s: %pFX multipath RTM_NEWROUTE has a invalid nexthop group from the kernel", + __func__, &p); + XFREE(MTYPE_RE, re); + } + } else { + if (ctx) { + zlog_err( + "%s: %pFX RTM_DELROUTE received but received a context as well", + __func__, &p); + return 0; + } + + if (nhe_id) { + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, + &p, &src_p, NULL, nhe_id, table, metric, + distance, true); + } else { + if (!tb[RTA_MULTIPATH]) { + struct nexthop nh; + + nh = parse_nexthop_unicast( + ns_id, rtm, tb, bh_type, index, prefsrc, + gate, afi, vrf_id); + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, &nh, 0, table, + metric, distance, true); + } else { + /* XXX: need to compare the entire list of + * nexthops here for NLM_F_APPEND stupidity */ + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, NULL, 0, table, + metric, distance, true); + } + } + } + + return 1; +} + +static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, + int startup) +{ + return netlink_route_change_read_unicast_internal(h, ns_id, startup, + NULL); +} + +static struct mcast_route_data *mroute = NULL; + +static int netlink_route_change_read_multicast(struct nlmsghdr *h, + ns_id_t ns_id, int startup) +{ + int len; + struct rtmsg *rtm; + struct rtattr *tb[RTA_MAX + 1]; + struct mcast_route_data *m; + int iif = 0; + int count; + int oif[256]; + int oif_count = 0; + char oif_list[256] = "\0"; + vrf_id_t vrf; + int table; + + assert(mroute); + m = mroute; + + rtm = NLMSG_DATA(h); + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + + netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len); + + if (tb[RTA_TABLE]) + table = *(int *)RTA_DATA(tb[RTA_TABLE]); + else + table = rtm->rtm_table; + + vrf = zebra_vrf_lookup_by_table(table, ns_id); + + if (tb[RTA_IIF]) + iif = *(int *)RTA_DATA(tb[RTA_IIF]); + + if (tb[RTA_SRC]) { + if (rtm->rtm_family == RTNL_FAMILY_IPMR) + m->src.ipaddr_v4 = + *(struct in_addr *)RTA_DATA(tb[RTA_SRC]); + else + m->src.ipaddr_v6 = + *(struct in6_addr *)RTA_DATA(tb[RTA_SRC]); + } + + if (tb[RTA_DST]) { + if (rtm->rtm_family == RTNL_FAMILY_IPMR) + m->grp.ipaddr_v4 = + *(struct in_addr *)RTA_DATA(tb[RTA_DST]); + else + m->grp.ipaddr_v6 = + *(struct in6_addr *)RTA_DATA(tb[RTA_DST]); + } + + if (tb[RTA_EXPIRES]) + m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]); + + if (tb[RTA_MULTIPATH]) { + struct rtnexthop *rtnh = + (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]); + + len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + for (;;) { + if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len) + break; + + oif[oif_count] = rtnh->rtnh_ifindex; + oif_count++; + + if (rtnh->rtnh_len == 0) + break; + + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + } + + if (rtm->rtm_family == RTNL_FAMILY_IPMR) { + SET_IPADDR_V4(&m->src); + SET_IPADDR_V4(&m->grp); + } else if (rtm->rtm_family == RTNL_FAMILY_IP6MR) { + SET_IPADDR_V6(&m->src); + SET_IPADDR_V6(&m->grp); + } else { + zlog_warn("%s: Invalid rtm_family received", __func__); + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + struct interface *ifp = NULL; + struct zebra_vrf *zvrf = NULL; + + for (count = 0; count < oif_count; count++) { + ifp = if_lookup_by_index(oif[count], vrf); + char temp[256]; + + snprintf(temp, sizeof(temp), "%s(%d) ", + ifp ? ifp->name : "Unknown", oif[count]); + strlcat(oif_list, temp, sizeof(oif_list)); + } + zvrf = zebra_vrf_lookup_by_id(vrf); + ifp = if_lookup_by_index(iif, vrf); + zlog_debug( + "MCAST VRF: %s(%d) %s (%pIA,%pIA) IIF: %s(%d) OIF: %s jiffies: %lld", + zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type), + &m->src, &m->grp, ifp ? ifp->name : "Unknown", iif, + oif_list, m->lastused); + } + return 0; +} + +int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct rtmsg *rtm; + + rtm = NLMSG_DATA(h); + + if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) { + /* If this is not route add/delete message print warning. */ + zlog_debug("Kernel message: %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), ns_id); + return 0; + } + + switch (rtm->rtm_family) { + case AF_INET: + case AF_INET6: + break; + + case RTNL_FAMILY_IPMR: + case RTNL_FAMILY_IP6MR: + /* notifications on IPMR are irrelevant to zebra, we only care + * about responses to RTM_GETROUTE requests we sent. + */ + return 0; + + default: + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel route change: %s", + rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + /* Connected route. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s %s %s proto %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(rtm->rtm_family), + nl_rttype_to_str(rtm->rtm_type), + nl_rtproto_to_str(rtm->rtm_protocol), ns_id); + + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct rtmsg))); + return -1; + } + + /* these are "magic" kernel-managed *unicast* routes used for + * outputting locally generated multicast traffic (which uses unicast + * handling on Linux because ~reasons~. + */ + if (rtm->rtm_type == RTN_MULTICAST) + return 0; + + netlink_route_change_read_unicast(h, ns_id, startup); + return 0; +} + +/* Request for specific route information from the kernel */ +static int netlink_request_route(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct rtmsg rtm; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.rtm.rtm_family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* Routing table read function using netlink interface. Only called + bootstrap time. */ +int netlink_route_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get IPv4 routing table. */ + ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_route_change_read_unicast, + &zns->netlink_cmd, &dp_info, 0, true); + if (ret < 0) + return ret; + + /* Get IPv6 routing table. */ + ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_route_change_read_unicast, + &zns->netlink_cmd, &dp_info, 0, true); + if (ret < 0) + return ret; + + return 0; +} + +/* + * The function returns true if the gateway info could be added + * to the message, otherwise false is returned. + */ +static bool _netlink_route_add_gateway_info(uint8_t route_family, + uint8_t gw_family, + struct nlmsghdr *nlmsg, + size_t req_size, int bytelen, + const struct nexthop *nexthop) +{ + if (route_family == AF_MPLS) { + struct gw_family_t gw_fam; + + gw_fam.family = gw_family; + if (gw_family == AF_INET) + memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen); + else + memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen); + if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family, + bytelen + 2)) + return false; + } else { + if (!(nexthop->rparent + && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) { + if (gw_family == AF_INET) { + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, + &nexthop->gate.ipv4, bytelen)) + return false; + } else { + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, + &nexthop->gate.ipv6, bytelen)) + return false; + } + } + } + + return true; +} + +static int build_label_stack(struct mpls_label_stack *nh_label, + enum lsp_types_t nh_label_type, + mpls_lse_t *out_lse, char *label_buf, + size_t label_buf_size) +{ + char label_buf1[20]; + int num_labels = 0; + + for (int i = 0; nh_label && i < nh_label->num_labels; i++) { + if (nh_label_type != ZEBRA_LSP_EVPN && + nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) + continue; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (!num_labels) + snprintf(label_buf, label_buf_size, "label %u", + nh_label->label[i]); + else { + snprintf(label_buf1, sizeof(label_buf1), "/%u", + nh_label->label[i]); + strlcat(label_buf, label_buf1, label_buf_size); + } + } + + if (nh_label_type == ZEBRA_LSP_EVPN) + out_lse[num_labels] = label2vni(&nh_label->label[i]); + else + out_lse[num_labels] = + mpls_lse_encode(nh_label->label[i], 0, 0, 0); + num_labels++; + } + + return num_labels; +} + +static bool _netlink_nexthop_encode_dvni_label(const struct nexthop *nexthop, + struct nlmsghdr *nlmsg, + mpls_lse_t *out_lse, + size_t buflen, char *label_buf) +{ + struct in_addr ipv4; + + if (!nl_attr_put64(nlmsg, buflen, LWTUNNEL_IP_ID, + htonll((uint64_t)out_lse[0]))) + return false; + + if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + if (!nl_attr_put(nlmsg, buflen, LWTUNNEL_IP_DST, + &nexthop->gate.ipv4, 4)) + return false; + + } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) { + ipv4_mapped_ipv6_to_ipv4(&nexthop->gate.ipv6, &ipv4); + if (!nl_attr_put(nlmsg, buflen, LWTUNNEL_IP_DST, &ipv4, + 4)) + return false; + + } else { + if (!nl_attr_put(nlmsg, buflen, LWTUNNEL_IP_DST, + &nexthop->gate.ipv6, 16)) + return false; + } + } else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: nexthop %pNHv %s must NEXTHOP_TYPE_IPV*_IFINDEX to be vxlan encapped", + __func__, nexthop, label_buf); + + return false; + } + + return true; +} + +static bool _netlink_route_encode_label_info(const struct nexthop *nexthop, + struct nlmsghdr *nlmsg, + size_t buflen, struct rtmsg *rtmsg, + char *label_buf, + size_t label_buf_size) +{ + mpls_lse_t out_lse[MPLS_MAX_LABELS]; + int num_labels; + struct rtattr *nest; + struct mpls_label_stack *nh_label; + enum lsp_types_t nh_label_type; + + nh_label = nexthop->nh_label; + nh_label_type = nexthop->nh_label_type; + + /* + * label_buf is *only* currently used within debugging. + * As such when we assign it we are guarding it inside + * a debug test. If you want to change this make sure + * you fix this assumption + */ + label_buf[0] = '\0'; + + num_labels = build_label_stack(nh_label, nh_label_type, out_lse, + label_buf, label_buf_size); + + if (num_labels && nh_label_type == ZEBRA_LSP_EVPN) { + if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_IP)) + return false; + + nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP); + if (!nest) + return false; + + if (_netlink_nexthop_encode_dvni_label(nexthop, nlmsg, out_lse, + buflen, + label_buf) == false) + return false; + + nl_attr_nest_end(nlmsg, nest); + + } else if (num_labels) { + /* Set the BoS bit */ + out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT); + + if (rtmsg->rtm_family == AF_MPLS) { + if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse, + num_labels * sizeof(mpls_lse_t))) + return false; + } else { + if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_MPLS)) + return false; + + nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP); + if (!nest) + return false; + + if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST, + &out_lse, + num_labels * sizeof(mpls_lse_t))) + return false; + nl_attr_nest_end(nlmsg, nest); + } + } + + return true; +} + +static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop, + int family, + struct nlmsghdr *nlmsg, + size_t buflen, int bytelen) +{ + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->rmap_src.ipv4, bytelen)) + return false; + } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->src.ipv4, bytelen)) + return false; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->rmap_src.ipv6, bytelen)) + return false; + } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->src.ipv6, bytelen)) + return false; + } + } + + return true; +} + +static ssize_t fill_seg6ipt_encap(char *buffer, size_t buflen, + struct seg6_seg_stack *segs) +{ + struct seg6_iptunnel_encap *ipt; + struct ipv6_sr_hdr *srh; + size_t srhlen; + int i; + + if (segs->num_segs > SRV6_MAX_SEGS) { + /* Exceeding maximum supported SIDs */ + return -1; + } + + srhlen = SRH_BASE_HEADER_LENGTH + SRH_SEGMENT_LENGTH * segs->num_segs; + + if (buflen < (sizeof(struct seg6_iptunnel_encap) + srhlen)) + return -1; + + memset(buffer, 0, buflen); + + ipt = (struct seg6_iptunnel_encap *)buffer; + ipt->mode = SEG6_IPTUN_MODE_ENCAP; + + srh = (struct ipv6_sr_hdr *)&ipt->srh; + srh->hdrlen = (srhlen >> 3) - 1; + srh->type = 4; + srh->segments_left = segs->num_segs - 1; + srh->first_segment = segs->num_segs - 1; + + for (i = 0; i < segs->num_segs; i++) { + memcpy(&srh->segments[i], &segs->seg[i], + sizeof(struct in6_addr)); + } + + return sizeof(struct seg6_iptunnel_encap) + srhlen; +} + +static bool +_netlink_nexthop_encode_seg6local_flavor(const struct nexthop *nexthop, + struct nlmsghdr *nlmsg, size_t buflen) +{ + struct rtattr *nest; + struct seg6local_flavors_info *flv; + + assert(nexthop); + + if (!nexthop->nh_srv6) + return false; + + flv = &nexthop->nh_srv6->seg6local_ctx.flv; + + if (flv->flv_ops == ZEBRA_SEG6_LOCAL_FLV_OP_UNSPEC) + return true; + + nest = nl_attr_nest(nlmsg, buflen, SEG6_LOCAL_FLAVORS); + if (!nest) + return false; + + if (!nl_attr_put32(nlmsg, buflen, SEG6_LOCAL_FLV_OPERATION, + flv->flv_ops)) + return false; + + if (flv->lcblock_len) + if (!nl_attr_put8(nlmsg, buflen, SEG6_LOCAL_FLV_LCBLOCK_BITS, + flv->lcblock_len)) + return false; + + if (flv->lcnode_func_len) + if (!nl_attr_put8(nlmsg, buflen, SEG6_LOCAL_FLV_LCNODE_FN_BITS, + flv->lcnode_func_len)) + return false; + + nl_attr_nest_end(nlmsg, nest); + + return true; +} + +/* This function takes a nexthop as argument and adds + * the appropriate netlink attributes to an existing + * netlink message. + * + * @param routedesc: Human readable description of route type + * (direct/recursive, single-/multipath) + * @param bytelen: Length of addresses in bytes. + * @param nexthop: Nexthop information + * @param nlmsg: nlmsghdr structure to fill in. + * @param req_size: The size allocated for the message. + * + * The function returns true if the nexthop could be added + * to the message, otherwise false is returned. + */ +static bool _netlink_route_build_singlepath(const struct prefix *p, + const char *routedesc, int bytelen, + const struct nexthop *nexthop, + struct nlmsghdr *nlmsg, + struct rtmsg *rtmsg, + size_t req_size, int cmd) +{ + + char label_buf[256]; + struct vrf *vrf; + char addrstr[INET6_ADDRSTRLEN]; + + assert(nexthop); + + vrf = vrf_lookup_by_id(nexthop->vrf_id); + + if (!_netlink_route_encode_label_info(nexthop, nlmsg, req_size, rtmsg, + label_buf, sizeof(label_buf))) + return false; + + if (nexthop->nh_srv6) { + if (nexthop->nh_srv6->seg6local_action != + ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) { + struct rtattr *nest; + const struct seg6local_context *ctx; + + ctx = &nexthop->nh_srv6->seg6local_ctx; + if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_SEG6_LOCAL)) + return false; + + nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP); + if (!nest) + return false; + + switch (nexthop->nh_srv6->seg6local_action) { + case ZEBRA_SEG6_LOCAL_ACTION_END: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_X: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_X)) + return false; + if (!nl_attr_put(nlmsg, req_size, + SEG6_LOCAL_NH6, &ctx->nh6, + sizeof(struct in6_addr))) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_T: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_T)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_TABLE, + ctx->table)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DX4: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DX4)) + return false; + if (!nl_attr_put(nlmsg, req_size, + SEG6_LOCAL_NH4, &ctx->nh4, + sizeof(struct in_addr))) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DT6: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT6)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_TABLE, + ctx->table)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DT4: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT4)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_VRFTABLE, + ctx->table)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DT46: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT46)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_VRFTABLE, + ctx->table)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DX2: + case ZEBRA_SEG6_LOCAL_ACTION_END_DX6: + case ZEBRA_SEG6_LOCAL_ACTION_END_B6: + case ZEBRA_SEG6_LOCAL_ACTION_END_B6_ENCAP: + case ZEBRA_SEG6_LOCAL_ACTION_END_BM: + case ZEBRA_SEG6_LOCAL_ACTION_END_S: + case ZEBRA_SEG6_LOCAL_ACTION_END_AS: + case ZEBRA_SEG6_LOCAL_ACTION_END_AM: + case ZEBRA_SEG6_LOCAL_ACTION_END_BPF: + case ZEBRA_SEG6_LOCAL_ACTION_UNSPEC: + zlog_err("%s: unsupport seg6local behaviour action=%u", + __func__, + nexthop->nh_srv6->seg6local_action); + return false; + } + + if (!_netlink_nexthop_encode_seg6local_flavor( + nexthop, nlmsg, req_size)) + return false; + + nl_attr_nest_end(nlmsg, nest); + } + + if (nexthop->nh_srv6->seg6_segs && + nexthop->nh_srv6->seg6_segs->num_segs && + !sid_zero(nexthop->nh_srv6->seg6_segs)) { + char tun_buf[4096]; + ssize_t tun_len; + struct rtattr *nest; + + if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_SEG6)) + return false; + nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP); + if (!nest) + return false; + tun_len = + fill_seg6ipt_encap(tun_buf, sizeof(tun_buf), + nexthop->nh_srv6->seg6_segs); + if (tun_len < 0) + return false; + if (!nl_attr_put(nlmsg, req_size, SEG6_IPTUNNEL_SRH, + tun_buf, tun_len)) + return false; + nl_attr_nest_end(nlmsg, nest); + } + } + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + rtmsg->rtm_flags |= RTNH_F_ONLINK; + + if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) { + rtmsg->rtm_flags |= RTNH_F_ONLINK; + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4)) + return false; + if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex)) + return false; + + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET, nlmsg, req_size, bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, ipv4_ll_buf, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + return true; + } + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + /* Send deletes to the kernel without specifying the next-hop */ + if (cmd != RTM_DELROUTE) { + if (!_netlink_route_add_gateway_info( + rtmsg->rtm_family, AF_INET, nlmsg, req_size, + bytelen, nexthop)) + return false; + } + + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET, nlmsg, req_size, bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr, + sizeof(addrstr)); + zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, addrstr, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + } + } + + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, + AF_INET6, nlmsg, req_size, + bytelen, nexthop)) + return false; + + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET6, nlmsg, req_size, + bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr, + sizeof(addrstr)); + zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, addrstr, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + } + } + + /* + * We have the ifindex so we should always send it + * This is especially useful if we are doing route + * leaking. + */ + if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) { + if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex)) + return false; + } + + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) { + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET, nlmsg, req_size, bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)", + __func__, routedesc, p, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + + return true; +} + +/* This function appends tag value as rtnl flow attribute + * to the given netlink msg only if value is less than 256. + * Used only if SUPPORT_REALMS enabled. + * + * @param nlmsg: nlmsghdr structure to fill in. + * @param maxlen: The size allocated for the message. + * @param tag: The route tag. + * + * The function returns true if the flow attribute could + * be added to the message, otherwise false is returned. + */ +static inline bool _netlink_set_tag(struct nlmsghdr *n, unsigned int maxlen, + route_tag_t tag) +{ + if (tag > 0 && tag <= 255) { + if (!nl_attr_put32(n, maxlen, RTA_FLOW, tag)) + return false; + } + return true; +} + +/* This function takes a nexthop as argument and + * appends to the given netlink msg. If the nexthop + * defines a preferred source, the src parameter + * will be modified to point to that src, otherwise + * it will be kept unmodified. + * + * @param routedesc: Human readable description of route type + * (direct/recursive, single-/multipath) + * @param bytelen: Length of addresses in bytes. + * @param nexthop: Nexthop information + * @param nlmsg: nlmsghdr structure to fill in. + * @param req_size: The size allocated for the message. + * @param src: pointer pointing to a location where + * the prefsrc should be stored. + * + * The function returns true if the nexthop could be added + * to the message, otherwise false is returned. + */ +static bool _netlink_route_build_multipath( + const struct prefix *p, const char *routedesc, int bytelen, + const struct nexthop *nexthop, struct nlmsghdr *nlmsg, size_t req_size, + struct rtmsg *rtmsg, const union g_addr **src, route_tag_t tag) +{ + char label_buf[256]; + struct vrf *vrf; + struct rtnexthop *rtnh; + + rtnh = nl_attr_rtnh(nlmsg, req_size); + if (rtnh == NULL) + return false; + + assert(nexthop); + + vrf = vrf_lookup_by_id(nexthop->vrf_id); + + if (!_netlink_route_encode_label_info(nexthop, nlmsg, req_size, rtmsg, + label_buf, sizeof(label_buf))) + return false; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + rtnh->rtnh_flags |= RTNH_F_ONLINK; + + if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) { + rtnh->rtnh_flags |= RTNH_F_ONLINK; + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4)) + return false; + rtnh->rtnh_ifindex = nexthop->ifindex; + if (nexthop->weight) + rtnh->rtnh_hops = nexthop->weight - 1; + + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->rmap_src; + else if (nexthop->src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, ipv4_ll_buf, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + nl_attr_rtnh_end(nlmsg, rtnh); + return true; + } + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET, + nlmsg, req_size, bytelen, + nexthop)) + return false; + + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->rmap_src; + else if (nexthop->src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)", + __func__, routedesc, p, &nexthop->gate.ipv4, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, + AF_INET6, nlmsg, req_size, + bytelen, nexthop)) + return false; + + if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) + *src = &nexthop->rmap_src; + else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)", + __func__, routedesc, p, &nexthop->gate.ipv6, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + + /* + * We have figured out the ifindex so we should always send it + * This is especially useful if we are doing route + * leaking. + */ + if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) + rtnh->rtnh_ifindex = nexthop->ifindex; + + /* ifindex */ + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) { + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->rmap_src; + else if (nexthop->src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)", + __func__, routedesc, p, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + + if (nexthop->weight) + rtnh->rtnh_hops = nexthop->weight - 1; + + if (!_netlink_set_tag(nlmsg, req_size, tag)) + return false; + + nl_attr_rtnh_end(nlmsg, rtnh); + return true; +} + +static inline bool +_netlink_mpls_build_singlepath(const struct prefix *p, const char *routedesc, + const struct zebra_nhlfe *nhlfe, + struct nlmsghdr *nlmsg, struct rtmsg *rtmsg, + size_t req_size, int cmd) +{ + int bytelen; + uint8_t family; + + family = NHLFE_FAMILY(nhlfe); + bytelen = (family == AF_INET ? 4 : 16); + return _netlink_route_build_singlepath(p, routedesc, bytelen, + nhlfe->nexthop, nlmsg, rtmsg, + req_size, cmd); +} + + +static inline bool +_netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc, + const struct zebra_nhlfe *nhlfe, + struct nlmsghdr *nlmsg, size_t req_size, + struct rtmsg *rtmsg, const union g_addr **src) +{ + int bytelen; + uint8_t family; + + family = NHLFE_FAMILY(nhlfe); + bytelen = (family == AF_INET ? 4 : 16); + return _netlink_route_build_multipath(p, routedesc, bytelen, + nhlfe->nexthop, nlmsg, req_size, + rtmsg, src, 0); +} + +static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc) +{ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20", + routedesc, nl_msg_type_to_str(cmd), label); +} + +static int netlink_neigh_update(int cmd, int ifindex, void *addr, char *lla, + int llalen, ns_id_t ns_id, uint8_t family, + bool permanent, uint8_t protocol) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + + struct zebra_ns *zns = zebra_ns_lookup(ns_id); + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH + req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.ndm.ndm_family = family; + req.ndm.ndm_ifindex = ifindex; + req.ndm.ndm_type = RTN_UNICAST; + if (cmd == RTM_NEWNEIGH) { + if (!permanent) + req.ndm.ndm_state = NUD_REACHABLE; + else + req.ndm.ndm_state = NUD_PERMANENT; + } else + req.ndm.ndm_state = NUD_FAILED; + + nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol, + sizeof(protocol)); + req.ndm.ndm_type = RTN_UNICAST; + nl_attr_put(&req.n, sizeof(req), NDA_DST, addr, + family2addrsize(family)); + if (lla) + nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen); + + if (IS_ZEBRA_DEBUG_KERNEL) { + char ip_str[INET6_ADDRSTRLEN + 8]; + struct interface *ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(ns_id), ifindex); + if (ifp) { + if (family == AF_INET6) + snprintfrr(ip_str, sizeof(ip_str), "ipv6 %pI6", + (struct in6_addr *)addr); + else + snprintfrr(ip_str, sizeof(ip_str), "ipv4 %pI4", + (in_addr_t *)addr); + zlog_debug( + "%s: %s ifname %s ifindex %u addr %s mac %pEA vrf %s(%u)", + __func__, nl_msg_type_to_str(cmd), ifp->name, + ifindex, ip_str, (struct ethaddr *)lla, + vrf_id_to_name(ifp->vrf->vrf_id), + ifp->vrf->vrf_id); + } + } + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static bool nexthop_set_src(const struct nexthop *nexthop, int family, + union g_addr *src) +{ + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) { + src->ipv4 = nexthop->rmap_src.ipv4; + return true; + } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) { + src->ipv4 = nexthop->src.ipv4; + return true; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) { + src->ipv6 = nexthop->rmap_src.ipv6; + return true; + } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) { + src->ipv6 = nexthop->src.ipv6; + return true; + } + } + + return false; +} + +/* + * The function returns true if the attribute could be added + * to the message, otherwise false is returned. + */ +static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen, + struct nexthop *nh) +{ + struct rtattr *nest; + + switch (nh->nh_encap_type) { + case NET_VXLAN: + if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type)) + return false; + + nest = nl_attr_nest(n, nlen, RTA_ENCAP); + if (!nest) + return false; + + if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */, + nh->nh_encap.vni)) + return false; + nl_attr_nest_end(n, nest); + break; + } + + return true; +} + +/* + * Routing table change via netlink interface, using a dataplane context object + * + * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer + * otherwise the number of bytes written to buf. + */ +ssize_t netlink_route_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen, + bool fpm, bool force_nhg, + bool force_rr) +{ + int bytelen; + struct nexthop *nexthop = NULL; + unsigned int nexthop_num; + const char *routedesc; + bool setsrc = false; + union g_addr src; + const struct prefix *p, *src_p; + uint32_t table_id; + struct nlsock *nl; + route_tag_t tag = 0; + + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[]; + } *req = (void *)data; + + p = dplane_ctx_get_dest(ctx); + src_p = dplane_ctx_get_src(ctx); + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + bytelen = (p->family == AF_INET ? 4 : 16); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (((cmd == RTM_NEWROUTE) && + ((p->family == AF_INET) || v6_rr_semantics)) || + force_rr) + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->r.rtm_family = p->family; + req->r.rtm_dst_len = p->prefixlen; + req->r.rtm_src_len = src_p ? src_p->prefixlen : 0; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; + + if (cmd == RTM_DELROUTE) + req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx)); + else + req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx)); + + /* + * blackhole routes are not RTN_UNICAST, they are + * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT + * so setting this value as a RTN_UNICAST would + * cause the route lookup of just the prefix + * to fail. So no need to specify this for + * the RTM_DELROUTE case + */ + if (cmd != RTM_DELROUTE) + req->r.rtm_type = RTN_UNICAST; + + if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen)) + return 0; + if (src_p) { + if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix, + bytelen)) + return 0; + } + + /* Metric. */ + /* Hardcode the metric for all routes coming from zebra. Metric isn't + * used + * either by the kernel or by zebra. Its purely for calculating best + * path(s) + * by the routing protocol and for communicating with protocol peers. + */ + if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY, + ROUTE_INSTALLATION_METRIC)) + return 0; + +#if defined(SUPPORT_REALMS) + if (cmd == RTM_DELROUTE) + tag = dplane_ctx_get_old_tag(ctx); + else + tag = dplane_ctx_get_tag(ctx); +#endif + + /* Table corresponding to this route. */ + table_id = dplane_ctx_get_table(ctx); + if (table_id < 256) + req->r.rtm_table = table_id; + else { + req->r.rtm_table = RT_TABLE_UNSPEC; + if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id)) + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: %s %pFX vrf %u(%u)", __func__, + nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx), + table_id); + + /* + * If we are not updating the route and we have received + * a route delete, then all we need to fill in is the + * prefix information to tell the kernel to schwack + * it. + */ + if (cmd == RTM_DELROUTE) { + if (!_netlink_set_tag(&req->n, datalen, tag)) + return 0; + return NLMSG_ALIGN(req->n.nlmsg_len); + } + + if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) { + struct rtattr *nest; + uint32_t mtu = dplane_ctx_get_mtu(ctx); + uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx); + + if (!mtu || (nexthop_mtu && nexthop_mtu < mtu)) + mtu = nexthop_mtu; + + nest = nl_attr_nest(&req->n, datalen, RTA_METRICS); + if (nest == NULL) + return 0; + + if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu))) + return 0; + nl_attr_nest_end(&req->n, nest); + } + + /* + * Always install blackhole routes without using nexthops, because of + * the following kernel problems: + * 1. Kernel nexthops don't suport unreachable/prohibit route types. + * 2. Blackhole kernel nexthops are deleted when loopback is down. + */ + nexthop = dplane_ctx_get_ng(ctx)->nexthop; + if (nexthop) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + nexthop = nexthop->resolved; + + if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { + switch (nexthop->bh_type) { + case BLACKHOLE_ADMINPROHIB: + req->r.rtm_type = RTN_PROHIBIT; + break; + case BLACKHOLE_REJECT: + req->r.rtm_type = RTN_UNREACHABLE; + break; + case BLACKHOLE_UNSPEC: + case BLACKHOLE_NULL: + req->r.rtm_type = RTN_BLACKHOLE; + break; + } + return NLMSG_ALIGN(req->n.nlmsg_len); + } + } + + if ((!fpm && kernel_nexthops_supported() + && (!proto_nexthops_only() + || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0))) + || (fpm && force_nhg)) { + /* Kernel supports nexthop objects */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %pFX nhg_id is %u", __func__, p, + dplane_ctx_get_nhe_id(ctx)); + + if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID, + dplane_ctx_get_nhe_id(ctx))) + return 0; + + /* Have to determine src still */ + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (setsrc) + break; + + setsrc = nexthop_set_src(nexthop, p->family, &src); + } + + if (setsrc) { + if (p->family == AF_INET) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv4, bytelen)) + return 0; + } else if (p->family == AF_INET6) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv6, bytelen)) + return 0; + } + } + + return NLMSG_ALIGN(req->n.nlmsg_len); + } + + /* Count overall nexthops so we can decide whether to use singlepath + * or multipath case. + */ + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + if (!NEXTHOP_IS_ACTIVE(nexthop->flags)) + continue; + + nexthop_num++; + } + + /* Singlepath case. */ + if (nexthop_num == 1) { + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + + if (setsrc) + continue; + + setsrc = nexthop_set_src(nexthop, p->family, + &src); + continue; + } + + if (NEXTHOP_IS_ACTIVE(nexthop->flags)) { + routedesc = nexthop->rparent + ? "recursive, single-path" + : "single-path"; + + if (!_netlink_set_tag(&req->n, datalen, tag)) + return 0; + + if (!_netlink_route_build_singlepath( + p, routedesc, bytelen, nexthop, + &req->n, &req->r, datalen, cmd)) + return 0; + + /* + * Add encapsulation information when + * installing via FPM. + */ + if (fpm) { + if (!netlink_route_nexthop_encap(&req->n, + datalen, + nexthop)) + return 0; + } + + nexthop_num++; + break; + } + } + + if (setsrc) { + if (p->family == AF_INET) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv4, bytelen)) + return 0; + } else if (p->family == AF_INET6) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv6, bytelen)) + return 0; + } + } + } else { /* Multipath case */ + struct rtattr *nest; + const union g_addr *src1 = NULL; + + nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH); + if (nest == NULL) + return 0; + + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + /* This only works for IPv4 now */ + if (setsrc) + continue; + + setsrc = nexthop_set_src(nexthop, p->family, + &src); + continue; + } + + if (NEXTHOP_IS_ACTIVE(nexthop->flags)) { + routedesc = nexthop->rparent + ? "recursive, multipath" + : "multipath"; + nexthop_num++; + + if (!_netlink_route_build_multipath( + p, routedesc, bytelen, nexthop, + &req->n, datalen, &req->r, &src1, + tag)) + return 0; + + /* + * Add encapsulation information when installing via + * FPM. + */ + if (fpm) { + if (!netlink_route_nexthop_encap( + &req->n, datalen, nexthop)) + return 0; + } + + if (!setsrc && src1) { + if (p->family == AF_INET) + src.ipv4 = src1->ipv4; + else if (p->family == AF_INET6) + src.ipv6 = src1->ipv6; + + setsrc = 1; + } + } + } + + nl_attr_nest_end(&req->n, nest); + + if (setsrc) { + if (p->family == AF_INET) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv4, bytelen)) + return 0; + } else if (p->family == AF_INET6) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv6, bytelen)) + return 0; + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Setting source"); + } + } + + /* If there is no useful nexthop then return. */ + if (nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: No useful nexthop.", __func__); + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) +{ + uint32_t actual_table; + int suc = 0; + struct mcast_route_data *mr = (struct mcast_route_data *)in; + struct { + struct nlmsghdr n; + struct rtmsg rtm; + char buf[256]; + } req; + + mroute = mr; + struct zebra_ns *zns; + + zns = zvrf->zns; + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.n.nlmsg_type = RTM_GETROUTE; + + if (mroute->family == AF_INET) { + req.rtm.rtm_family = RTNL_FAMILY_IPMR; + req.rtm.rtm_dst_len = IPV4_MAX_BITLEN; + req.rtm.rtm_src_len = IPV4_MAX_BITLEN; + + nl_attr_put(&req.n, sizeof(req), RTA_SRC, + &mroute->src.ipaddr_v4, + sizeof(mroute->src.ipaddr_v4)); + nl_attr_put(&req.n, sizeof(req), RTA_DST, + &mroute->grp.ipaddr_v4, + sizeof(mroute->grp.ipaddr_v4)); + } else { + req.rtm.rtm_family = RTNL_FAMILY_IP6MR; + req.rtm.rtm_dst_len = IPV6_MAX_BITLEN; + req.rtm.rtm_src_len = IPV6_MAX_BITLEN; + + nl_attr_put(&req.n, sizeof(req), RTA_SRC, + &mroute->src.ipaddr_v6, + sizeof(mroute->src.ipaddr_v6)); + nl_attr_put(&req.n, sizeof(req), RTA_DST, + &mroute->grp.ipaddr_v6, + sizeof(mroute->grp.ipaddr_v6)); + } + + /* + * What? + * + * So during the namespace cleanup we started storing + * the zvrf table_id for the default table as RT_TABLE_MAIN + * which is what the normal routing table for ip routing is. + * This change caused this to break our lookups of sg data + * because prior to this change the zvrf->table_id was 0 + * and when the pim multicast kernel code saw a 0, + * it was auto-translated to RT_TABLE_DEFAULT. But since + * we are now passing in RT_TABLE_MAIN there is no auto-translation + * and the kernel goes screw you and the delicious cookies you + * are trying to give me. So now we have this little hack. + */ + if (mroute->family == AF_INET) + actual_table = (zvrf->table_id == rt_table_main_id) + ? RT_TABLE_DEFAULT + : zvrf->table_id; + else + actual_table = zvrf->table_id; + + nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table); + + suc = netlink_talk(netlink_route_change_read_multicast, &req.n, + &zns->netlink_cmd, zns, false); + + mroute = NULL; + return suc; +} + +/* Char length to debug ID with */ +#define ID_LENGTH 10 + +static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size, + uint32_t id, + const struct nh_grp *z_grp, + const uint8_t count, bool resilient, + const struct nhg_resilience *nhgr) +{ + struct nexthop_grp grp[count]; + /* Need space for max group size, "/", and null term */ + char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1]; + char buf1[ID_LENGTH + 2]; + + buf[0] = '\0'; + + memset(grp, 0, sizeof(grp)); + + if (count) { + for (int i = 0; i < count; i++) { + grp[i].id = z_grp[i].id; + grp[i].weight = z_grp[i].weight - 1; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (i == 0) + snprintf(buf, sizeof(buf1), "group %u", + grp[i].id); + else { + snprintf(buf1, sizeof(buf1), "/%u", + grp[i].id); + strlcat(buf, buf1, sizeof(buf)); + } + } + } + if (!nl_attr_put(n, req_size, NHA_GROUP, grp, + count * sizeof(*grp))) + return false; + + if (resilient) { + struct rtattr *nest; + + nest = nl_attr_nest(n, req_size, NHA_RES_GROUP); + + nl_attr_put16(n, req_size, NHA_RES_GROUP_BUCKETS, + nhgr->buckets); + nl_attr_put32(n, req_size, NHA_RES_GROUP_IDLE_TIMER, + nhgr->idle_timer * 1000); + nl_attr_put32(n, req_size, + NHA_RES_GROUP_UNBALANCED_TIMER, + nhgr->unbalanced_timer * 1000); + nl_attr_nest_end(n, nest); + + nl_attr_put16(n, req_size, NHA_GROUP_TYPE, + NEXTHOP_GRP_TYPE_RES); + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ID (%u): %s", __func__, id, buf); + + return true; +} + +/** + * Next hop packet encoding helper function. + * + * \param[in] cmd netlink command. + * \param[in] ctx dataplane context (information snapshot). + * \param[out] buf buffer to hold the packet. + * \param[in] buflen amount of buffer bytes. + * + * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer + * otherwise the number of bytes written to buf. + */ +ssize_t netlink_nexthop_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen, bool fpm) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[]; + } *req = buf; + + mpls_lse_t out_lse[MPLS_MAX_LABELS]; + char label_buf[256]; + int num_labels = 0; + uint32_t id = dplane_ctx_get_nhe_id(ctx); + int type = dplane_ctx_get_nhe_type(ctx); + struct rtattr *nest; + uint16_t encap; + struct nlsock *nl = + kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + if (!id) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed trying to update a nexthop group in the kernel that does not have an ID"); + return -1; + } + + /* + * Nothing to do if the kernel doesn't support nexthop objects or + * we dont want to install this type of NHG, but FPM may possible to + * handle this. + */ + if (!fpm && !kernel_nexthops_supported()) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring", + __func__, id, zebra_route_string(type)); + return 0; + } + + if (proto_nexthops_only() && !is_proto_nhg(id, type)) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: nhg_id %u (%s): proto-based nexthops only, ignoring", + __func__, id, zebra_route_string(type)); + return 0; + } + + label_buf[0] = '\0'; + + if (buflen < sizeof(*req)) + return 0; + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (cmd == RTM_NEWNEXTHOP) + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->nhm.nh_family = AF_UNSPEC; + /* TODO: Scope? */ + + if (!nl_attr_put32(&req->n, buflen, NHA_ID, id)) + return 0; + + if (cmd == RTM_NEWNEXTHOP) { + /* + * We distinguish between a "group", which is a collection + * of ids, and a singleton nexthop with an id. The + * group is installed as an id that just refers to a list of + * other ids. + */ + if (dplane_ctx_get_nhe_nh_grp_count(ctx)) { + const struct nexthop_group *nhg; + const struct nhg_resilience *nhgr; + + nhg = dplane_ctx_get_nhe_ng(ctx); + nhgr = &nhg->nhgr; + if (!_netlink_nexthop_build_group( + &req->n, buflen, id, + dplane_ctx_get_nhe_nh_grp(ctx), + dplane_ctx_get_nhe_nh_grp_count(ctx), + !!nhgr->buckets, nhgr)) + return 0; + } else { + const struct nexthop *nh = + dplane_ctx_get_nhe_ng(ctx)->nexthop; + afi_t afi = dplane_ctx_get_nhe_afi(ctx); + + if (afi == AFI_IP) + req->nhm.nh_family = AF_INET; + else if (afi == AFI_IP6) + req->nhm.nh_family = AF_INET6; + + switch (nh->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY, + &nh->gate.ipv4, + IPV4_MAX_BYTELEN)) + return 0; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY, + &nh->gate.ipv6, + IPV6_MAX_BYTELEN)) + return 0; + break; + case NEXTHOP_TYPE_BLACKHOLE: + if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE, + NULL, 0)) + return 0; + /* Blackhole shouldn't have anymore attributes + */ + goto nexthop_done; + case NEXTHOP_TYPE_IFINDEX: + /* Don't need anymore info for this */ + break; + } + + if (!nh->ifindex) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update without an interface"); + return -1; + } + + if (!nl_attr_put32(&req->n, buflen, NHA_OIF, + nh->ifindex)) + return 0; + + if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK)) + req->nhm.nh_flags |= RTNH_F_ONLINK; + + num_labels = build_label_stack( + nh->nh_label, nh->nh_label_type, out_lse, + label_buf, sizeof(label_buf)); + + if (num_labels && nh->nh_label_type == ZEBRA_LSP_EVPN) { + if (!nl_attr_put16(&req->n, buflen, + NHA_ENCAP_TYPE, + LWTUNNEL_ENCAP_IP)) + return 0; + + nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP); + if (!nest) + return 0; + + if (_netlink_nexthop_encode_dvni_label( + nh, &req->n, out_lse, buflen, + label_buf) == false) + return 0; + + nl_attr_nest_end(&req->n, nest); + + } else if (num_labels) { + /* Set the BoS bit */ + out_lse[num_labels - 1] |= + htonl(1 << MPLS_LS_S_SHIFT); + + /* + * TODO: MPLS unsupported for now in kernel. + */ + if (req->nhm.nh_family == AF_MPLS) + goto nexthop_done; + + encap = LWTUNNEL_ENCAP_MPLS; + if (!nl_attr_put16(&req->n, buflen, + NHA_ENCAP_TYPE, encap)) + return 0; + nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP); + if (!nest) + return 0; + if (!nl_attr_put( + &req->n, buflen, MPLS_IPTUNNEL_DST, + &out_lse, + num_labels * sizeof(mpls_lse_t))) + return 0; + + nl_attr_nest_end(&req->n, nest); + } + + if (nh->nh_srv6) { + if (nh->nh_srv6->seg6local_action != + ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) { + uint32_t action; + uint16_t encap; + struct rtattr *nest; + const struct seg6local_context *ctx; + + req->nhm.nh_family = AF_INET6; + action = nh->nh_srv6->seg6local_action; + ctx = &nh->nh_srv6->seg6local_ctx; + encap = LWTUNNEL_ENCAP_SEG6_LOCAL; + if (!nl_attr_put(&req->n, buflen, + NHA_ENCAP_TYPE, + &encap, + sizeof(uint16_t))) + return 0; + + nest = nl_attr_nest(&req->n, buflen, + NHA_ENCAP | NLA_F_NESTED); + if (!nest) + return 0; + + switch (action) { + case SEG6_LOCAL_ACTION_END: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_X: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_X)) + return 0; + if (!nl_attr_put( + &req->n, buflen, + SEG6_LOCAL_NH6, &ctx->nh6, + sizeof(struct in6_addr))) + return 0; + break; + case SEG6_LOCAL_ACTION_END_T: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_T)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_TABLE, + ctx->table)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DX4: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DX4)) + return 0; + if (!nl_attr_put( + &req->n, buflen, + SEG6_LOCAL_NH4, &ctx->nh4, + sizeof(struct in_addr))) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DT6: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT6)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_TABLE, + ctx->table)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DT4: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT4)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_VRFTABLE, + ctx->table)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DT46: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT46)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_VRFTABLE, + ctx->table)) + return 0; + break; + default: + zlog_err("%s: unsupport seg6local behaviour action=%u", + __func__, action); + return 0; + } + + if (!_netlink_nexthop_encode_seg6local_flavor( + nh, &req->n, buflen)) + return false; + + nl_attr_nest_end(&req->n, nest); + } + + if (nh->nh_srv6->seg6_segs && + nh->nh_srv6->seg6_segs->num_segs && + !sid_zero(nh->nh_srv6->seg6_segs)) { + char tun_buf[4096]; + ssize_t tun_len; + struct rtattr *nest; + + if (!nl_attr_put16(&req->n, buflen, + NHA_ENCAP_TYPE, + LWTUNNEL_ENCAP_SEG6)) + return 0; + nest = nl_attr_nest(&req->n, buflen, + NHA_ENCAP | NLA_F_NESTED); + if (!nest) + return 0; + tun_len = fill_seg6ipt_encap( + tun_buf, sizeof(tun_buf), + nh->nh_srv6->seg6_segs); + if (tun_len < 0) + return 0; + if (!nl_attr_put(&req->n, buflen, + SEG6_IPTUNNEL_SRH, + tun_buf, tun_len)) + return 0; + nl_attr_nest_end(&req->n, nest); + } + } + +nexthop_done: + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ", + __func__, id, nh, nh->ifindex, + vrf_id_to_name(nh->vrf_id), + nh->vrf_id, label_buf); + } + + req->nhm.nh_protocol = zebra2proto(type); + + } else if (cmd != RTM_DELNEXTHOP) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Nexthop group kernel update command (%d) does not exist", + cmd); + return -1; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd), + id); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + enum dplane_op_e op; + int cmd = 0; + + op = dplane_ctx_get_op(ctx); + if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE) + cmd = RTM_NEWNEXTHOP; + else if (op == DPLANE_OP_NH_DELETE) + cmd = RTM_DELNEXTHOP; + else { + flog_err(EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update with incorrect OP code (%u)", + op); + return -1; + } + + return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen, false); +} + +enum netlink_msg_status +netlink_put_nexthop_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + /* Nothing to do if the kernel doesn't support nexthop objects */ + if (!kernel_nexthops_supported()) + return FRR_NETLINK_SUCCESS; + + return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder, + false); +} + +static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf, + buflen, false, false, false); +} + +static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf, + buflen, false, false, false); +} + +enum netlink_msg_status +netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + int cmd; + const struct prefix *p = dplane_ctx_get_dest(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { + cmd = RTM_DELROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) { + cmd = RTM_NEWROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + + if (p->family == AF_INET || v6_rr_semantics) { + /* Single 'replace' operation */ + + /* + * With route replace semantics in place + * for v4 routes and the new route is a system + * route we do not install anything. + * The problem here is that the new system + * route should cause us to withdraw from + * the kernel the old non-system route + */ + if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) + && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) + return netlink_batch_add_msg( + bth, ctx, netlink_delroute_msg_encoder, + true); + } else { + /* + * So v6 route replace semantics are not in + * the kernel at this point as I understand it. + * so let's do a delete then an add. + * In the future once v6 route replace semantics + * are in we can figure out what to do here to + * allow working with old and new kernels. + * + * I'm also intentionally ignoring the failure case + * of the route delete. If that happens yeah we're + * screwed. + */ + if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) + netlink_batch_add_msg( + bth, ctx, netlink_delroute_msg_encoder, + true); + } + + cmd = RTM_NEWROUTE; + } else + return FRR_NETLINK_ERROR; + + if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))) + return FRR_NETLINK_SUCCESS; + + return netlink_batch_add_msg(bth, ctx, + cmd == RTM_NEWROUTE + ? netlink_newroute_msg_encoder + : netlink_delroute_msg_encoder, + false); +} + +/** + * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop + * + * @tb: Netlink RTA data + * @family: Address family in the nhmsg + * @ifp: Interface connected - this should be NULL, we fill it in + * @ns_id: Namspace id + * + * Return: New nexthop + */ +static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb, + unsigned char family, + struct interface **ifp, + ns_id_t ns_id) +{ + struct nexthop nh = {}; + void *gate = NULL; + enum nexthop_types_t type = 0; + int if_index = 0; + size_t sz = 0; + struct interface *ifp_lookup; + + if_index = *(int *)RTA_DATA(tb[NHA_OIF]); + + + if (tb[NHA_GATEWAY]) { + switch (family) { + case AF_INET: + type = NEXTHOP_TYPE_IPV4_IFINDEX; + sz = 4; + break; + case AF_INET6: + type = NEXTHOP_TYPE_IPV6_IFINDEX; + sz = 16; + break; + default: + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop gateway with bad address family (%d) received from kernel", + family); + return nh; + } + gate = RTA_DATA(tb[NHA_GATEWAY]); + } else + type = NEXTHOP_TYPE_IFINDEX; + + if (type) + nh.type = type; + + if (gate) + memcpy(&(nh.gate), gate, sz); + + if (if_index) + nh.ifindex = if_index; + + ifp_lookup = + if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex); + + if (ifp) + *ifp = ifp_lookup; + if (ifp_lookup) + nh.vrf_id = ifp_lookup->vrf->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT", + __func__, nh.ifindex); + + nh.vrf_id = VRF_DEFAULT; + } + + if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) { + uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]); + int num_labels = 0; + + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + + if (encap_type == LWTUNNEL_ENCAP_MPLS) + num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, + labels); + } + + return nh; +} + +static int netlink_nexthop_process_group(struct rtattr **tb, + struct nh_grp *z_grp, int z_grp_size, + struct nhg_resilience *nhgr) +{ + uint8_t count = 0; + /* linux/nexthop.h group struct */ + struct nexthop_grp *n_grp = NULL; + + n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]); + count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp)); + + if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) { + flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid nexthop group received from the kernel"); + return count; + } + + for (int i = 0; ((i < count) && (i < z_grp_size)); i++) { + z_grp[i].id = n_grp[i].id; + z_grp[i].weight = n_grp[i].weight + 1; + } + + memset(nhgr, 0, sizeof(*nhgr)); + if (tb[NHA_RES_GROUP]) { + struct rtattr *tbn[NHA_RES_GROUP_MAX + 1]; + struct rtattr *rta; + struct rtattr *res_group = tb[NHA_RES_GROUP]; + + netlink_parse_rtattr_nested(tbn, NHA_RES_GROUP_MAX, res_group); + + if (tbn[NHA_RES_GROUP_BUCKETS]) { + rta = tbn[NHA_RES_GROUP_BUCKETS]; + nhgr->buckets = *(uint16_t *)RTA_DATA(rta); + } + + if (tbn[NHA_RES_GROUP_IDLE_TIMER]) { + rta = tbn[NHA_RES_GROUP_IDLE_TIMER]; + nhgr->idle_timer = *(uint32_t *)RTA_DATA(rta); + } + + if (tbn[NHA_RES_GROUP_UNBALANCED_TIMER]) { + rta = tbn[NHA_RES_GROUP_UNBALANCED_TIMER]; + nhgr->unbalanced_timer = *(uint32_t *)RTA_DATA(rta); + } + + if (tbn[NHA_RES_GROUP_UNBALANCED_TIME]) { + rta = tbn[NHA_RES_GROUP_UNBALANCED_TIME]; + nhgr->unbalanced_time = *(uint64_t *)RTA_DATA(rta); + } + } + + return count; +} + +/** + * netlink_nexthop_change() - Read in change about nexthops from the kernel + * + * @h: Netlink message header + * @ns_id: Namspace id + * @startup: Are we reading under startup conditions? + * + * Return: Result status + */ +int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + /* nexthop group id */ + uint32_t id; + unsigned char family; + int type; + afi_t afi = AFI_UNSPEC; + vrf_id_t vrf_id = VRF_DEFAULT; + struct interface *ifp = NULL; + struct nhmsg *nhm = NULL; + struct nexthop nh = {}; + struct nh_grp grp[MULTIPATH_NUM] = {}; + /* Count of nexthops in group array */ + uint8_t grp_count = 0; + struct rtattr *tb[NHA_MAX + 1] = {}; + + frrtrace(3, frr_zebra, netlink_nexthop_change, h, ns_id, startup); + + nhm = NLMSG_DATA(h); + + if (ns_id) + vrf_id = ns_id; + + if (startup && h->nlmsg_type != RTM_NEWNEXTHOP) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg)); + if (len < 0) { + zlog_warn( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct nhmsg))); + return -1; + } + + netlink_parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len, + NLA_F_NESTED); + + + if (!tb[NHA_ID]) { + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop group without an ID received from the kernel"); + return -1; + } + + /* We use the ID key'd nhg table for kernel updates */ + id = *((uint32_t *)RTA_DATA(tb[NHA_ID])); + + if (zebra_evpn_mh_is_fdb_nh(id)) { + /* If this is a L2 NH just ignore it */ + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x", + h->nlmsg_type, id); + } + return 0; + } + + family = nhm->nh_family; + afi = family2afi(family); + + type = proto2zebra(nhm->nh_protocol, 0, true); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s ID (%u) %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), id, + nl_family_to_str(family), ns_id); + + + if (h->nlmsg_type == RTM_NEWNEXTHOP) { + struct nhg_resilience nhgr = {}; + + if (tb[NHA_GROUP]) { + /** + * If this is a group message its only going to have + * an array of nexthop IDs associated with it + */ + grp_count = netlink_nexthop_process_group( + tb, grp, array_size(grp), &nhgr); + } else { + if (tb[NHA_BLACKHOLE]) { + /** + * This nexthop is just for blackhole-ing + * traffic, it should not have an OIF, GATEWAY, + * or ENCAP + */ + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_UNSPEC; + } else if (tb[NHA_OIF]) + /** + * This is a true new nexthop, so we need + * to parse the gateway and device info + */ + nh = netlink_nexthop_process_nh(tb, family, + &ifp, ns_id); + else { + + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid Nexthop message received from the kernel with ID (%u)", + id); + return -1; + } + SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE); + if (nhm->nh_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + vrf_id = nh.vrf_id; + } + + if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi, + type, startup, &nhgr)) + return -1; + + } else if (h->nlmsg_type == RTM_DELNEXTHOP) + zebra_nhg_kernel_del(id, vrf_id); + + return 0; +} + +/** + * netlink_request_nexthop() - Request nextop information from the kernel + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* route type + * + * Return: Result status + */ +static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.nhm.nh_family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + + +/** + * netlink_nexthop_read() - Nexthop read function using netlink interface + * + * @zns: Zebra name space + * + * Return: Result status + * Only called at bootstrap time. + */ +int netlink_nexthop_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get nexthop objects */ + ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd, + &dp_info, 0, true); + + if (!ret) + /* If we succesfully read in nexthop objects, + * this kernel must support them. + */ + supports_nh = true; + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug("Nexthop objects %ssupported on this kernel", + supports_nh ? "" : "not "); + + zebra_router_set_supports_nhgs(supports_nh); + + return ret; +} + + +int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen, + ns_id_t ns_id, uint8_t family, bool permanent) +{ + return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex, + addr, lla, llalen, ns_id, family, permanent, + RTPROT_ZEBRA); +} + +/** + * netlink_neigh_update_msg_encode() - Common helper api for encoding + * evpn neighbor update as netlink messages using dataplane context object. + * Here, a neighbor refers to a bridge forwarding database entry for + * either unicast forwarding or head-end replication or an IP neighbor + * entry. + * @ctx: Dataplane context + * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH) + * @lla: A pointer to neighbor cache link layer address + * @llalen: Length of the pointer to neighbor cache link layer + * address + * @ip: A neighbor cache n/w layer destination address + * In the case of bridge FDB, this represnts the remote + * VTEP IP. + * @replace_obj: Whether NEW request should replace existing object or + * add to the end of the list + * @family: AF_* netlink family + * @type: RTN_* route type + * @flags: NTF_* flags + * @state: NUD_* states + * @data: data buffer pointer + * @datalen: total amount of data buffer space + * @protocol: protocol information + * + * Return: 0 when the msg doesn't fit entirely in the buffer + * otherwise the number of bytes written to buf. + */ +static ssize_t netlink_neigh_update_msg_encode( + const struct zebra_dplane_ctx *ctx, int cmd, const void *lla, + int llalen, const struct ipaddr *ip, bool replace_obj, uint8_t family, + uint8_t type, uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy, + uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data, + size_t datalen, uint8_t protocol) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[]; + } *req = data; + int ipa_len; + enum dplane_op_e op; + + if (datalen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + + op = dplane_ctx_get_op(ctx); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + if (cmd == RTM_NEWNEIGH) + req->n.nlmsg_flags |= + NLM_F_CREATE + | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND); + req->n.nlmsg_type = cmd; + req->ndm.ndm_family = family; + req->ndm.ndm_type = type; + req->ndm.ndm_state = state; + req->ndm.ndm_flags = flags; + req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx); + + if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol, + sizeof(protocol))) + return 0; + + if (lla) { + if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, lla, llalen)) + return 0; + } + + if (nfy) { + struct rtattr *nest; + + nest = nl_attr_nest(&req->n, datalen, + NDA_FDB_EXT_ATTRS | NLA_F_NESTED); + if (!nest) + return 0; + + if (!nl_attr_put(&req->n, datalen, NFEA_ACTIVITY_NOTIFY, + &nfy_flags, sizeof(nfy_flags))) + return 0; + if (!nl_attr_put(&req->n, datalen, NFEA_DONT_REFRESH, NULL, 0)) + return 0; + + nl_attr_nest_end(&req->n, nest); + } + + + if (ext) { + if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags, + sizeof(ext_flags))) + return 0; + } + + if (nhg_id) { + if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id)) + return 0; + } else { + ipa_len = + IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN; + if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr, + ipa_len)) + return 0; + } + + if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) { + vlanid_t vid = dplane_ctx_mac_get_vlan(ctx); + vni_t vni = dplane_ctx_mac_get_vni(ctx); + + if (vid > 0) { + if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid)) + return 0; + } + + if (vni > 0) { + if (!nl_attr_put32(&req->n, datalen, NDA_SRC_VNI, vni)) + return 0; + } + + if (!nl_attr_put32(&req->n, datalen, NDA_MASTER, + dplane_ctx_mac_get_br_ifindex(ctx))) + return 0; + } + + if (op == DPLANE_OP_VTEP_ADD || op == DPLANE_OP_VTEP_DELETE) { + vni_t vni = dplane_ctx_neigh_get_vni(ctx); + + if (vni > 0) { + if (!nl_attr_put32(&req->n, datalen, NDA_SRC_VNI, vni)) + return 0; + } + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Add remote VTEP to the flood list for this VxLAN interface (VNI). This + * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00. + */ +static ssize_t +netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd, + void *buf, size_t buflen) +{ + struct ethaddr dst_mac = {.octet = {0}}; + int proto = RTPROT_ZEBRA; + + if (dplane_ctx_get_type(ctx) != 0) + proto = zebra2proto(dplane_ctx_get_type(ctx)); + + return netlink_neigh_update_msg_encode( + ctx, cmd, (const void *)&dst_mac, ETH_ALEN, + dplane_ctx_neigh_get_ipaddr(ctx), false, PF_BRIDGE, 0, NTF_SELF, + (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/, false /*nfy*/, + 0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/, buf, buflen, + proto); +} + +#ifndef NDA_RTA +#define NDA_RTA(r) \ + ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#endif + +static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) +{ + struct ndmsg *ndm; + struct interface *ifp; + struct zebra_if *zif; + struct rtattr *tb[NDA_MAX + 1]; + struct interface *br_if; + struct ethaddr mac; + vlanid_t vid = 0; + struct in_addr vtep_ip; + int vid_present = 0, dst_present = 0; + char vid_buf[20]; + char dst_buf[30]; + bool sticky; + bool local_inactive = false; + bool dp_static = false; + vni_t vni = 0; + uint32_t nhg_id = 0; + bool vni_mcast_grp = false; + + ndm = NLMSG_DATA(h); + + /* We only process macfdb notifications if EVPN is enabled */ + if (!is_evpn_enabled()) + return 0; + + /* Parse attributes and extract fields of interest. Do basic + * validation of the fields. + */ + netlink_parse_rtattr_flags(tb, NDA_MAX, NDA_RTA(ndm), len, + NLA_F_NESTED); + + if (!tb[NDA_LLADDR]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s AF_BRIDGE IF %u - no LLADDR", + nl_msg_type_to_str(h->nlmsg_type), + ndm->ndm_ifindex); + return 0; + } + + if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu", + nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex, + (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR])); + return 0; + } + + memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN); + + if (tb[NDA_VLAN]) { + vid_present = 1; + vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]); + snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid); + } + + if (tb[NDA_DST]) { + /* TODO: Only IPv4 supported now. */ + dst_present = 1; + memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]), + IPV4_MAX_BYTELEN); + snprintfrr(dst_buf, sizeof(dst_buf), " dst %pI4", + &vtep_ip); + } else + memset(&vtep_ip, 0, sizeof(vtep_ip)); + + if (tb[NDA_NH_ID]) + nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]); + + if (ndm->ndm_state & NUD_STALE) + local_inactive = true; + + if (tb[NDA_FDB_EXT_ATTRS]) { + struct rtattr *attr = tb[NDA_FDB_EXT_ATTRS]; + struct rtattr *nfea_tb[NFEA_MAX + 1] = {0}; + + netlink_parse_rtattr_nested(nfea_tb, NFEA_MAX, attr); + if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) { + uint8_t nfy_flags; + + nfy_flags = *(uint8_t *)RTA_DATA( + nfea_tb[NFEA_ACTIVITY_NOTIFY]); + if (nfy_flags & FDB_NOTIFY_BIT) + dp_static = true; + if (nfy_flags & FDB_NOTIFY_INACTIVE_BIT) + local_inactive = true; + } + } + + if (tb[NDA_SRC_VNI]) + vni = *(vni_t *)RTA_DATA(tb[NDA_SRC_VNI]); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %pEA%s nhg %d vni %d", + nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex, + vid_present ? vid_buf : "", ndm->ndm_state, + ndm->ndm_flags, &mac, dst_present ? dst_buf : "", + nhg_id, vni); + + /* The interface should exist. */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + ndm->ndm_ifindex); + if (!ifp || !ifp->info) + return 0; + + /* The interface should be something we're interested in. */ + if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) + return 0; + + zif = (struct zebra_if *)ifp->info; + if ((br_if = zif->brslave_info.br_if) == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master", + nl_msg_type_to_str(h->nlmsg_type), ifp->name, + ndm->ndm_ifindex, + zif->brslave_info.bridge_ifindex); + return 0; + } + + /* For per vni device, vni comes from device itself */ + if (IS_ZEBRA_IF_VXLAN(ifp) && IS_ZEBRA_VXLAN_IF_VNI(zif)) { + struct zebra_vxlan_vni *vnip; + + vnip = zebra_vxlan_if_vni_find(zif, 0); + vni = vnip->vni; + } + + sticky = !!(ndm->ndm_flags & NTF_STICKY); + + if (filter_vlan && vid != filter_vlan) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug(" Filtered due to filter vlan: %d", + filter_vlan); + return 0; + } + + /* + * Check if this is a mcast group update (svd case) + */ + vni_mcast_grp = is_mac_vni_mcast_group(&mac, vni, vtep_ip); + + /* If add or update, do accordingly if learnt on a "local" interface; if + * the notification is over VxLAN, this has to be related to + * multi-homing, + * so perform an implicit delete of any local entry (if it exists). + */ + if (h->nlmsg_type == RTM_NEWNEIGH) { + /* Drop "permanent" entries. */ + if (!vni_mcast_grp && (ndm->ndm_state & NUD_PERMANENT)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + " Dropping entry because of NUD_PERMANENT"); + return 0; + } + + if (IS_ZEBRA_IF_VXLAN(ifp)) { + if (!dst_present) + return 0; + + if (vni_mcast_grp) + return zebra_vxlan_if_vni_mcast_group_add_update( + ifp, vni, &vtep_ip); + + return zebra_vxlan_dp_network_mac_add( + ifp, br_if, &mac, vid, vni, nhg_id, sticky, + !!(ndm->ndm_flags & NTF_EXT_LEARNED)); + } + + return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid, + sticky, local_inactive, dp_static); + } + + /* This is a delete notification. + * Ignore the notification with IP dest as it may just signify that the + * MAC has moved from remote to local. The exception is the special + * all-zeros MAC that represents the BUM flooding entry; we may have + * to readd it. Otherwise, + * 1. For a MAC over VxLan, check if it needs to be refreshed(readded) + * 2. For a MAC over "local" interface, delete the mac + * Note: We will get notifications from both bridge driver and VxLAN + * driver. + */ + if (nhg_id) + return 0; + + if (dst_present) { + if (vni_mcast_grp) + return zebra_vxlan_if_vni_mcast_group_del(ifp, vni, + &vtep_ip); + + if (is_zero_mac(&mac) && vni) + return zebra_vxlan_check_readd_vtep(ifp, vni, vtep_ip); + + return 0; + } + + if (IS_ZEBRA_IF_VXLAN(ifp)) + return 0; + + return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid); +} + +static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ndmsg *ndm; + + if (h->nlmsg_type != RTM_NEWNEIGH) + return 0; + + /* Length validity. */ + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg)); + if (len < 0) + return -1; + + /* We are interested only in AF_BRIDGE notifications. */ + ndm = NLMSG_DATA(h); + if (ndm->ndm_family != AF_BRIDGE) + return 0; + + return netlink_macfdb_change(h, len, ns_id); +} + +/* Request for MAC FDB information from the kernel */ +static int netlink_request_macs(struct nlsock *netlink_cmd, int family, + int type, ifindex_t master_ifindex) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifm; + char buf[256]; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.ifm.ifi_family = family; + if (master_ifindex) + nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex); + + return netlink_request(netlink_cmd, &req); +} + +/* + * MAC forwarding database read using netlink interface. This is invoked + * at startup. + */ +int netlink_macfdb_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get bridge FDB table. */ + ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH, + 0); + if (ret < 0) + return ret; + /* We are reading entire table. */ + filter_vlan = 0; + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 0, true); + + return ret; +} + +/* + * MAC forwarding database read using netlink interface. This is for a + * specific bridge and matching specific access VLAN (if VLAN-aware bridge). + */ +int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if, vlanid_t vid) +{ + struct zebra_if *br_zif; + struct zebra_dplane_info dp_info; + int ret = 0; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Save VLAN we're filtering on, if needed. */ + br_zif = (struct zebra_if *)br_if->info; + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) + filter_vlan = vid; + + /* Get bridge FDB table for specific bridge - we do the VLAN filtering. + */ + ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH, + br_if->ifindex); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 0, false); + + /* Reset VLAN filter. */ + filter_vlan = 0; + return ret; +} + + +/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */ +static int netlink_request_specific_mac(struct zebra_ns *zns, int family, + int type, struct interface *ifp, + const struct ethaddr *mac, vlanid_t vid, + vni_t vni, uint8_t flags) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + struct zebra_if *zif; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_type = type; /* RTM_GETNEIGH */ + req.n.nlmsg_flags = NLM_F_REQUEST; + req.ndm.ndm_family = family; /* AF_BRIDGE */ + req.ndm.ndm_flags = flags; + /* req.ndm.ndm_state = NUD_REACHABLE; */ + + nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6); + + zif = (struct zebra_if *)ifp->info; + /* Is this a read on a VXLAN interface? */ + if (IS_ZEBRA_IF_VXLAN(ifp)) { + nl_attr_put32(&req.n, sizeof(req), NDA_VNI, vni); + /* TBD: Why is ifindex not filled in the non-vxlan case? */ + req.ndm.ndm_ifindex = ifp->ifindex; + } else { + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) && vid > 0) + nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid); + nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, ifp->ifindex); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Tx %s %s IF %s(%u) MAC %pEA vid %u vni %u", + nl_msg_type_to_str(type), + nl_family_to_str(req.ndm.ndm_family), ifp->name, + ifp->ifindex, mac, vid, vni); + + return netlink_request(&zns->netlink_cmd, &req); +} + +int netlink_macfdb_read_specific_mac(struct zebra_ns *zns, + struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid) +{ + int ret = 0; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get bridge FDB table for specific bridge - we do the VLAN filtering. + */ + ret = netlink_request_specific_mac(zns, AF_BRIDGE, RTM_GETNEIGH, br_if, + mac, vid, 0, 0); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 1, 0); + + return ret; +} + +int netlink_macfdb_read_mcast_for_vni(struct zebra_ns *zns, + struct interface *ifp, vni_t vni) +{ + struct zebra_if *zif; + struct ethaddr mac = {.octet = {0}}; + struct zebra_dplane_info dp_info; + int ret = 0; + + zif = ifp->info; + if (IS_ZEBRA_VXLAN_IF_VNI(zif)) + return 0; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get specific FDB entry for BUM handling, if any */ + ret = netlink_request_specific_mac(zns, AF_BRIDGE, RTM_GETNEIGH, ifp, + &mac, 0, vni, NTF_SELF); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 1, false); + + return ret; +} + +/* + * Netlink-specific handler for MAC updates using dataplane context object. + */ +ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data, + size_t datalen) +{ + struct ipaddr vtep_ip; + vlanid_t vid; + ssize_t total; + int cmd; + uint8_t flags; + uint16_t state; + uint32_t nhg_id; + uint32_t update_flags; + bool nfy = false; + uint8_t nfy_flags = 0; + int proto = RTPROT_ZEBRA; + + if (dplane_ctx_get_type(ctx) != 0) + proto = zebra2proto(dplane_ctx_get_type(ctx)); + + cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL + ? RTM_NEWNEIGH : RTM_DELNEIGH; + + flags = NTF_MASTER; + state = NUD_REACHABLE; + + update_flags = dplane_ctx_mac_get_update_flags(ctx); + if (update_flags & DPLANE_MAC_REMOTE) { + flags |= NTF_SELF; + if (dplane_ctx_mac_is_sticky(ctx)) { + /* NUD_NOARP prevents the entry from expiring */ + state |= NUD_NOARP; + /* sticky the entry from moving */ + flags |= NTF_STICKY; + } else { + flags |= NTF_EXT_LEARNED; + } + /* if it was static-local previously we need to clear the + * notify flags on replace with remote + */ + if (update_flags & DPLANE_MAC_WAS_STATIC) + nfy = true; + } else { + /* local mac */ + if (update_flags & DPLANE_MAC_SET_STATIC) { + nfy_flags |= FDB_NOTIFY_BIT; + state |= NUD_NOARP; + } + + if (update_flags & DPLANE_MAC_SET_INACTIVE) + nfy_flags |= FDB_NOTIFY_INACTIVE_BIT; + + nfy = true; + } + + nhg_id = dplane_ctx_mac_get_nhg_id(ctx); + vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx)); + SET_IPADDR_V4(&vtep_ip); + + if (IS_ZEBRA_DEBUG_KERNEL) { + char vid_buf[20]; + const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx); + + vid = dplane_ctx_mac_get_vlan(ctx); + if (vid > 0) + snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid); + else + vid_buf[0] = '\0'; + + zlog_debug( + "Tx %s family %s IF %s(%u)%s %sMAC %pEA dst %pIA nhg %u%s%s%s%s%s", + nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE), + dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx), + vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "", + mac, &vtep_ip, nhg_id, + (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "", + (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync" + : "", + (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "", + (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive" + : "", + nfy ? " nfy" : ""); + } + + total = netlink_neigh_update_msg_encode( + ctx, cmd, (const void *)dplane_ctx_mac_get_addr(ctx), ETH_ALEN, + &vtep_ip, true, AF_BRIDGE, 0, flags, state, nhg_id, nfy, + nfy_flags, false /*ext*/, 0 /*ext_flags*/, data, datalen, + proto); + + return total; +} + +/* + * In the event the kernel deletes ipv4 link-local neighbor entries created for + * 5549 support, re-install them. + */ +static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif, + struct interface *ifp, struct ipaddr *ip, + bool handle_failed) +{ + if (ndm->ndm_family != AF_INET) + return; + + if (!zif->v6_2_v4_ll_neigh_entry) + return; + + if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr) + return; + + if (handle_failed && ndm->ndm_state & NUD_FAILED) { + zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling", + ifp->name); + return; + } + + if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true); +} + +#define NUD_VALID \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \ + | NUD_DELAY) +#define NUD_LOCAL_ACTIVE \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE) + +static int netlink_nbr_entry_state_to_zclient(int nbr_state) +{ + /* an exact match is done between + * - netlink neighbor state values: NDM_XXX (see in linux/neighbour.h) + * - zclient neighbor state values: ZEBRA_NEIGH_STATE_XXX + * (see in lib/zclient.h) + */ + return nbr_state; +} +static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) +{ + struct ndmsg *ndm; + struct interface *ifp; + struct zebra_if *zif; + struct rtattr *tb[NDA_MAX + 1]; + struct interface *link_if; + struct ethaddr mac; + struct ipaddr ip; + char buf[ETHER_ADDR_STRLEN]; + int mac_present = 0; + bool is_ext; + bool is_router; + bool local_inactive; + uint32_t ext_flags = 0; + bool dp_static = false; + int l2_len = 0; + int cmd; + + ndm = NLMSG_DATA(h); + + /* The interface should exist. */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + ndm->ndm_ifindex); + if (!ifp || !ifp->info) + return 0; + + zif = (struct zebra_if *)ifp->info; + + /* Parse attributes and extract fields of interest. */ + netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len); + + if (!tb[NDA_DST]) { + zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(ndm->ndm_family), ifp->name, + ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id); + return 0; + } + + memset(&ip, 0, sizeof(ip)); + ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6; + memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); + + /* if kernel deletes our rfc5549 neighbor entry, re-install it */ + if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) { + netlink_handle_5549(ndm, zif, ifp, &ip, false); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + " Neighbor Entry Received is a 5549 entry, finished"); + return 0; + } + + /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */ + if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID)) + netlink_handle_5549(ndm, zif, ifp, &ip, true); + + /* we send link layer information to client: + * - nlmsg_type = RTM_DELNEIGH|NEWNEIGH|GETNEIGH + * - struct ipaddr ( for DEL and GET) + * - struct ethaddr mac; (for NEW) + */ + if (h->nlmsg_type == RTM_NEWNEIGH) + cmd = ZEBRA_NHRP_NEIGH_ADDED; + else if (h->nlmsg_type == RTM_GETNEIGH) + cmd = ZEBRA_NHRP_NEIGH_GET; + else if (h->nlmsg_type == RTM_DELNEIGH) + cmd = ZEBRA_NHRP_NEIGH_REMOVED; + else { + zlog_debug("%s(): unknown nlmsg type %u", __func__, + h->nlmsg_type); + return 0; + } + if (tb[NDA_LLADDR]) { + /* copy LLADDR information */ + l2_len = RTA_PAYLOAD(tb[NDA_LLADDR]); + } + if (l2_len == IPV4_MAX_BYTELEN || l2_len == 0) { + union sockunion link_layer_ipv4; + + if (l2_len) { + sockunion_family(&link_layer_ipv4) = AF_INET; + memcpy((void *)sockunion_get_addr(&link_layer_ipv4), + RTA_DATA(tb[NDA_LLADDR]), l2_len); + } else + sockunion_family(&link_layer_ipv4) = AF_UNSPEC; + zsend_nhrp_neighbor_notify( + cmd, ifp, &ip, + netlink_nbr_entry_state_to_zclient(ndm->ndm_state), + &link_layer_ipv4); + } + + if (h->nlmsg_type == RTM_GETNEIGH) + return 0; + + /* The neighbor is present on an SVI. From this, we locate the + * underlying + * bridge because we're only interested in neighbors on a VxLAN bridge. + * The bridge is located based on the nature of the SVI: + * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN + * interface + * and is linked to the bridge + * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge + * interface + * itself + */ + if (IS_ZEBRA_IF_VLAN(ifp)) { + link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + zif->link_ifindex); + if (!link_if) + return 0; + } else if (IS_ZEBRA_IF_BRIDGE(ifp)) + link_if = ifp; + else { + link_if = NULL; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + " Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring"); + } + + memset(&mac, 0, sizeof(mac)); + if (h->nlmsg_type == RTM_NEWNEIGH) { + if (tb[NDA_LLADDR]) { + if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu", + nl_msg_type_to_str( + h->nlmsg_type), + nl_family_to_str( + ndm->ndm_family), + ifp->name, ndm->ndm_ifindex, + ifp->vrf->name, + ifp->vrf->vrf_id, + (unsigned long)RTA_PAYLOAD( + tb[NDA_LLADDR])); + return 0; + } + + mac_present = 1; + memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN); + } + + is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED); + is_router = !!(ndm->ndm_flags & NTF_ROUTER); + + if (tb[NDA_EXT_FLAGS]) { + ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]); + if (ext_flags & NTF_E_MH_PEER_SYNC) + dp_static = true; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA MAC %s state 0x%x flags 0x%x ext_flags 0x%x", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(ndm->ndm_family), ifp->name, + ndm->ndm_ifindex, ifp->vrf->name, + ifp->vrf->vrf_id, &ip, + mac_present + ? prefix_mac2str(&mac, buf, sizeof(buf)) + : "", + ndm->ndm_state, ndm->ndm_flags, ext_flags); + + /* If the neighbor state is valid for use, process as an add or + * update + * else process as a delete. Note that the delete handling may + * result + * in re-adding the neighbor if it is a valid "remote" neighbor. + */ + if (ndm->ndm_state & NUD_VALID) { + if (zebra_evpn_mh_do_adv_reachable_neigh_only()) + local_inactive = + !(ndm->ndm_state & NUD_LOCAL_ACTIVE); + else + /* If EVPN-MH is not enabled we treat STALE + * neighbors as locally-active and advertise + * them + */ + local_inactive = false; + + /* Add local neighbors to the l3 interface database */ + if (is_ext) + zebra_neigh_del(ifp, &ip); + else + zebra_neigh_add(ifp, &ip, &mac); + + if (link_if) + zebra_vxlan_handle_kernel_neigh_update( + ifp, link_if, &ip, &mac, ndm->ndm_state, + is_ext, is_router, local_inactive, + dp_static); + return 0; + } + + + zebra_neigh_del(ifp, &ip); + if (link_if) + zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip); + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(ndm->ndm_family), ifp->name, + ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id, + &ip); + + /* Process the delete - it may result in re-adding the neighbor if it is + * a valid "remote" neighbor. + */ + zebra_neigh_del(ifp, &ip); + if (link_if) + zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip); + + return 0; +} + +static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ndmsg *ndm; + + if (h->nlmsg_type != RTM_NEWNEIGH) + return 0; + + /* Length validity. */ + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg)); + if (len < 0) + return -1; + + /* We are interested only in AF_INET or AF_INET6 notifications. */ + ndm = NLMSG_DATA(h); + if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6) + return 0; + + return netlink_neigh_change(h, len); +} + +/* Request for IP neighbor information from the kernel */ +static int netlink_request_neigh(struct nlsock *netlink_cmd, int family, + int type, ifindex_t ifindex) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.ndm.ndm_family = family; + if (ifindex) + nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex); + + return netlink_request(netlink_cmd, &req); +} + +/* + * IP Neighbor table read using netlink interface. This is invoked + * at startup. + */ +int netlink_neigh_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get IP neighbor table. */ + ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH, + 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, + &dp_info, 0, true); + + return ret; +} + +/* + * IP Neighbor table read using netlink interface. This is for a specific + * VLAN device. + */ +int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if) +{ + int ret = 0; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH, + vlan_if->ifindex); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, + &dp_info, 0, false); + + return ret; +} + +/* + * Request for a specific IP in VLAN (SVI) device from IP Neighbor table, + * read using netlink interface. + */ +static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns, + int type, + const struct ipaddr *ip, + ifindex_t ifindex) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + int ipa_len; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = type; /* RTM_GETNEIGH */ + req.ndm.ndm_ifindex = ifindex; + + if (IS_IPADDR_V4(ip)) { + ipa_len = IPV4_MAX_BYTELEN; + req.ndm.ndm_family = AF_INET; + + } else { + ipa_len = IPV6_MAX_BYTELEN; + req.ndm.ndm_family = AF_INET6; + } + + nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: Tx %s family %s IF %u IP %pIA flags 0x%x", + __func__, nl_msg_type_to_str(type), + nl_family_to_str(req.ndm.ndm_family), ifindex, ip, + req.n.nlmsg_flags); + + return netlink_request(&zns->netlink_cmd, &req); +} + +int netlink_neigh_read_specific_ip(const struct ipaddr *ip, + struct interface *vlan_if) +{ + int ret = 0; + struct zebra_ns *zns; + struct zebra_vrf *zvrf = vlan_if->vrf->info; + struct zebra_dplane_info dp_info; + + zns = zvrf->zns; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: neigh request IF %s(%u) IP %pIA vrf %s(%u)", + __func__, vlan_if->name, vlan_if->ifindex, ip, + vlan_if->vrf->name, vlan_if->vrf->vrf_id); + + ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip, + vlan_if->ifindex); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, + &dp_info, 1, false); + + return ret; +} + +int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id) +{ + int len; + struct ndmsg *ndm; + + if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH + || h->nlmsg_type == RTM_GETNEIGH)) + return 0; + + /* Length validity. */ + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ndmsg))); + return -1; + } + + /* Is this a notification for the MAC FDB or IP neighbor table? */ + ndm = NLMSG_DATA(h); + if (ndm->ndm_family == AF_BRIDGE) + return netlink_macfdb_change(h, len, ns_id); + + if (ndm->ndm_type != RTN_UNICAST) + return 0; + + if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6) + return netlink_ipneigh_change(h, len, ns_id); + else { + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel neighbor change: %s", + ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + return 0; +} + +/* + * Utility neighbor-update function, using info from dplane context. + */ +static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, + int cmd, void *buf, size_t buflen) +{ + const struct ipaddr *ip; + const struct ethaddr *mac = NULL; + const struct ipaddr *link_ip = NULL; + const void *link_ptr = NULL; + char buf2[ETHER_ADDR_STRLEN]; + + int llalen; + uint8_t flags; + uint16_t state; + uint8_t family; + uint32_t update_flags; + uint32_t ext_flags = 0; + bool ext = false; + int proto = RTPROT_ZEBRA; + + if (dplane_ctx_get_type(ctx) != 0) + proto = zebra2proto(dplane_ctx_get_type(ctx)); + + ip = dplane_ctx_neigh_get_ipaddr(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_INSTALL + || dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_DELETE) { + link_ip = dplane_ctx_neigh_get_link_ip(ctx); + llalen = IPADDRSZ(link_ip); + link_ptr = (const void *)&(link_ip->ip.addr); + ipaddr2str(link_ip, buf2, sizeof(buf2)); + } else { + mac = dplane_ctx_neigh_get_mac(ctx); + llalen = ETH_ALEN; + link_ptr = (const void *)mac; + if (is_zero_mac(mac)) + mac = NULL; + if (mac) + prefix_mac2str(mac, buf2, sizeof(buf2)); + else + snprintf(buf2, sizeof(buf2), "null"); + } + update_flags = dplane_ctx_neigh_get_update_flags(ctx); + flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx)); + state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx)); + + family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6; + + if (update_flags & DPLANE_NEIGH_REMOTE) { + flags |= NTF_EXT_LEARNED; + /* if it was static-local previously we need to clear the + * ext flags on replace with remote + */ + if (update_flags & DPLANE_NEIGH_WAS_STATIC) + ext = true; + } else if (!(update_flags & DPLANE_NEIGH_NO_EXTENSION)) { + ext = true; + /* local neigh */ + if (update_flags & DPLANE_NEIGH_SET_STATIC) + ext_flags |= NTF_E_MH_PEER_SYNC; + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Tx %s family %s IF %s(%u) Neigh %pIA %s %s flags 0x%x state 0x%x %sext_flags 0x%x", + nl_msg_type_to_str(cmd), nl_family_to_str(family), + dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx), + ip, link_ip ? "Link" : "MAC", buf2, flags, state, + ext ? "ext " : "", ext_flags); + + return netlink_neigh_update_msg_encode( + ctx, cmd, link_ptr, llalen, ip, true, family, RTN_UNICAST, + flags, state, 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext, + ext_flags, buf, buflen, proto); +} + +static int netlink_neigh_table_update_ctx(const struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct { + struct nlmsghdr n; + struct ndtmsg ndtm; + char buf[]; + } *req = data; + struct rtattr *nest; + uint8_t family; + ifindex_t idx; + uint32_t val; + + if (datalen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + family = dplane_ctx_neightable_get_family(ctx); + idx = dplane_ctx_get_ifindex(ctx); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE; + req->n.nlmsg_type = RTM_SETNEIGHTBL; + req->ndtm.ndtm_family = family; + + nl_attr_put(&req->n, datalen, NDTA_NAME, + family == AF_INET ? "arp_cache" : "ndisc_cache", 10); + nest = nl_attr_nest(&req->n, datalen, NDTA_PARMS); + if (nest == NULL) + return 0; + if (!nl_attr_put(&req->n, datalen, NDTPA_IFINDEX, &idx, sizeof(idx))) + return 0; + val = dplane_ctx_neightable_get_app_probes(ctx); + if (!nl_attr_put(&req->n, datalen, NDTPA_APP_PROBES, &val, sizeof(val))) + return 0; + val = dplane_ctx_neightable_get_mcast_probes(ctx); + if (!nl_attr_put(&req->n, datalen, NDTPA_MCAST_PROBES, &val, + sizeof(val))) + return 0; + val = dplane_ctx_neightable_get_ucast_probes(ctx); + if (!nl_attr_put(&req->n, datalen, NDTPA_UCAST_PROBES, &val, + sizeof(val))) + return 0; + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + ssize_t ret = 0; + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen); + break; + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_IP_DELETE: + ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen); + break; + case DPLANE_OP_VTEP_ADD: + ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf, + buflen); + break; + case DPLANE_OP_VTEP_DELETE: + ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf, + buflen); + break; + case DPLANE_OP_NEIGH_TABLE_UPDATE: + ret = netlink_neigh_table_update_ctx(ctx, buf, buflen); + break; + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + case DPLANE_OP_NONE: + case DPLANE_OP_STARTUP_STAGE: + ret = -1; + } + + return ret; +} + +/* + * Update MAC, using dataplane context object. + */ + +enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx, + false); +} + +enum netlink_msg_status +netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder, + false); +} + +/* + * MPLS label forwarding table change via netlink interface, using dataplane + * context information. + */ +ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + mpls_lse_t lse; + const struct nhlfe_list_head *head; + const struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop = NULL; + unsigned int nexthop_num; + const char *routedesc; + int route_type; + struct prefix p = {0}; + struct nlsock *nl = + kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[0]; + } *req = buf; + + if (buflen < sizeof(*req)) + return 0; + + memset(req, 0, sizeof(*req)); + + /* + * Count # nexthops so we can decide whether to use singlepath + * or multipath case. + */ + nexthop_num = 0; + head = dplane_ctx_get_nhlfe_list(ctx); + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + if (cmd == RTM_NEWROUTE) { + /* Count all selected NHLFEs */ + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + nexthop_num++; + } else { /* DEL */ + /* Count all installed NHLFEs */ + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + nexthop_num++; + } + } + + if ((nexthop_num == 0) || + (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE))) + return 0; + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->r.rtm_family = AF_MPLS; + req->r.rtm_table = rt_table_main_id; + req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; + req->r.rtm_type = RTN_UNICAST; + + if (cmd == RTM_NEWROUTE) { + /* We do a replace to handle update. */ + req->n.nlmsg_flags |= NLM_F_REPLACE; + + /* set the protocol value if installing */ + route_type = re_type_from_lsp_type( + dplane_ctx_get_best_nhlfe(ctx)->type); + req->r.rtm_protocol = zebra2proto(route_type); + } + + /* Fill destination */ + lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1); + if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t))) + return 0; + + /* Fill nexthops (paths) based on single-path or multipath. The paths + * chosen depend on the operation. + */ + if (nexthop_num == 1) { + routedesc = "single-path"; + _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx), + routedesc); + + nexthop_num = 0; + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if ((cmd == RTM_NEWROUTE + && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE))) + || (cmd == RTM_DELROUTE + && (CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB)))) { + /* Add the gateway */ + if (!_netlink_mpls_build_singlepath( + &p, routedesc, nhlfe, &req->n, + &req->r, buflen, cmd)) + return false; + + nexthop_num++; + break; + } + } + } else { /* Multipath case */ + struct rtattr *nest; + const union g_addr *src1 = NULL; + + nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH); + if (!nest) + return 0; + + routedesc = "multipath"; + _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx), + routedesc); + + nexthop_num = 0; + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if ((cmd == RTM_NEWROUTE + && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE))) + || (cmd == RTM_DELROUTE + && (CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB)))) { + nexthop_num++; + + /* Build the multipath */ + if (!_netlink_mpls_build_multipath( + &p, routedesc, nhlfe, &req->n, + buflen, &req->r, &src1)) + return 0; + } + } + + /* Add the multipath */ + nl_attr_nest_end(&req->n, nest); + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/**************************************************************************** +* This code was developed in a branch that didn't have dplane APIs for +* MAC updates. Hence the use of the legacy style. It will be moved to +* the new dplane style pre-merge to master. XXX +*/ +static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_NEWNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = zebra_vrf_get_evpn(); + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_INET; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY, + &vtep_ip, IPV4_MAX_BYTELEN)) + return -1; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Tx %s fdb-nh 0x%x %pI4", + nl_msg_type_to_str(cmd), nh_id, &vtep_ip); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static int netlink_fdb_nh_del(uint32_t nh_id) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_DELNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = zebra_vrf_get_evpn(); + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_UNSPEC; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id)) + return -1; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Tx %s fdb-nh 0x%x", + nl_msg_type_to_str(cmd), nh_id); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_NEWNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + struct nexthop_grp grp[nh_cnt]; + uint32_t i; + + zvrf = zebra_vrf_get_evpn(); + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_UNSPEC; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0)) + return -1; + memset(&grp, 0, sizeof(grp)); + for (i = 0; i < nh_cnt; ++i) { + grp[i].id = nh_ids[i].id; + grp[i].weight = nh_ids[i].weight; + } + if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP, + grp, nh_cnt * sizeof(struct nexthop_grp))) + return -1; + + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + char vtep_str[ES_VTEP_LIST_STR_SZ]; + char nh_buf[16]; + + vtep_str[0] = '\0'; + for (i = 0; i < nh_cnt; ++i) { + snprintf(nh_buf, sizeof(nh_buf), "%u ", + grp[i].id); + strlcat(vtep_str, nh_buf, sizeof(vtep_str)); + } + + zlog_debug("Tx %s fdb-nhg 0x%x %s", + nl_msg_type_to_str(cmd), nhg_id, vtep_str); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static int netlink_fdb_nhg_del(uint32_t nhg_id) +{ + return netlink_fdb_nh_del(nhg_id); +} + +int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip) +{ + return netlink_fdb_nh_update(nh_id, vtep_ip); +} + +int kernel_del_mac_nh(uint32_t nh_id) +{ + return netlink_fdb_nh_del(nh_id); +} + +int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids); +} + +int kernel_del_mac_nhg(uint32_t nhg_id) +{ + return netlink_fdb_nhg_del(nhg_id); +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h new file mode 100644 index 0000000..d51944f --- /dev/null +++ b/zebra/rt_netlink.h @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Header file exported by rt_netlink.c to zebra. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_RT_NETLINK_H +#define _ZEBRA_RT_NETLINK_H + +#ifdef HAVE_NETLINK + +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_dplane.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Additional protocol strings to push into routes + * If we add anything new here please make sure + * to update: + * zebra2proto Function + * proto2zebra Function + * is_selfroute Function + * tools/frr To flush the route upon exit + * + * Finally update this file to allow iproute2 to + * know about this new route. + * tools/etc/iproute2/rt_protos.d + */ +#define RTPROT_BGP 186 +#define RTPROT_ISIS 187 +#define RTPROT_OSPF 188 +#define RTPROT_RIP 189 +#define RTPROT_RIPNG 190 +#if !defined(RTPROT_BABEL) +#define RTPROT_BABEL 42 +#endif +#define RTPROT_NHRP 191 +#define RTPROT_EIGRP 192 +#define RTPROT_LDP 193 +#define RTPROT_SHARP 194 +#define RTPROT_PBR 195 +#define RTPROT_ZSTATIC 196 +#define RTPROT_OPENFABRIC 197 +#define RTPROT_SRTE 198 + +void rt_netlink_init(void); + +/* MPLS label forwarding table change, using dataplane context information. */ +extern ssize_t netlink_mpls_multipath_msg_encode(int cmd, + struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen); + +extern ssize_t netlink_route_multipath_msg_encode(int cmd, + struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen, + bool fpm, bool force_nhg, + bool force_rr); +extern ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, + void *data, size_t datalen); + +extern int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); +extern int netlink_route_read(struct zebra_ns *zns); + +extern int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +extern int netlink_nexthop_read(struct zebra_ns *zns); +extern ssize_t netlink_nexthop_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen, bool fpm); + +extern ssize_t netlink_lsp_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen); + +extern int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id); +extern int netlink_macfdb_read(struct zebra_ns *zns); +extern int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, + struct interface *ifp, + struct interface *br_if, + vlanid_t vid); +extern int netlink_macfdb_read_mcast_for_vni(struct zebra_ns *zns, + struct interface *ifp, vni_t vni); +extern int netlink_neigh_read(struct zebra_ns *zns); +extern int netlink_neigh_read_for_vlan(struct zebra_ns *zns, + struct interface *vlan_if); +extern int netlink_macfdb_read_specific_mac(struct zebra_ns *zns, + struct interface *br_if, + const struct ethaddr *mac, + uint16_t vid); +extern int netlink_neigh_read_specific_ip(const struct ipaddr *ip, + struct interface *vlan_if); + +struct nl_batch; +extern enum netlink_msg_status +netlink_put_route_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_nexthop_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_mac_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_neigh_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_lsp_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_pw_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +int netlink_route_change_read_unicast_internal(struct nlmsghdr *h, + ns_id_t ns_id, int startup, + struct zebra_dplane_ctx *ctx); + +#ifdef NETLINK_DEBUG +const char *nlmsg_type2str(uint16_t type); +const char *af_type2str(int type); +const char *ifi_type2str(int type); +const char *rta_type2str(int type); +const char *rtm_type2str(int type); +const char *ifla_pdr_type2str(int type); +const char *ifla_info_type2str(int type); +const char *rtm_protocol2str(int type); +const char *rtm_scope2str(int type); +const char *rtm_rta2str(int type); +const char *neigh_rta2str(int type); +const char *ifa_rta2str(int type); +const char *nhm_rta2str(int type); +const char *frh_rta2str(int type); +const char *frh_action2str(uint8_t action); +const char *nlmsg_flags2str(uint16_t flags, char *buf, size_t buflen); +const char *if_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *rtm_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *neigh_state2str(uint32_t flags, char *buf, size_t buflen); +const char *neigh_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *ifa_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *nh_flags2str(uint32_t flags, char *buf, size_t buflen); + +void nl_dump(void *msg, size_t msglen); + +extern int zebra2proto(int proto); + +#endif /* NETLINK_DEBUG */ + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_RT_NETLINK_H */ diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c new file mode 100644 index 0000000..f9888b1 --- /dev/null +++ b/zebra/rt_socket.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Kernel routing table updates by routing socket. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#ifndef HAVE_NETLINK + +#ifdef __OpenBSD__ +#include +#endif + +#include "if.h" +#include "prefix.h" +#include "sockunion.h" +#include "log.h" +#include "privs.h" +#include "vxlan.h" +#include "lib_errors.h" + +#include "zebra/debug.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/kernel_socket.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_errors.h" + +extern struct zebra_privs_t zserv_privs; + +#ifdef __OpenBSD__ +static int kernel_rtm_add_labels(struct mpls_label_stack *nh_label, + struct sockaddr_mpls *smpls) +{ + if (nh_label->num_labels > 1) { + flog_warn(EC_ZEBRA_MAX_LABELS_PUSH, + "%s: can't push %u labels at once (maximum is 1)", + __func__, nh_label->num_labels); + return -1; + } + + memset(smpls, 0, sizeof(*smpls)); + smpls->smpls_len = sizeof(*smpls); + smpls->smpls_family = AF_MPLS; + smpls->smpls_label = htonl(nh_label->label[0] << MPLS_LABEL_OFFSET); + + return 0; +} +#endif + +/* Interface between zebra message and rtm message. */ +static int kernel_rtm(int cmd, const struct prefix *p, + const struct nexthop_group *ng, uint32_t metric) + +{ + union sockunion sin_dest, sin_mask, sin_gate; +#ifdef __OpenBSD__ + struct sockaddr_mpls smpls; +#endif + union sockunion *smplsp = NULL; + struct nexthop *nexthop; + int nexthop_num = 0; + ifindex_t ifindex = 0; + bool gate = false; + int error; + char gate_buf[INET6_BUFSIZ]; + enum blackhole_type bh_type = BLACKHOLE_UNSPEC; + + /* + * We only have the ability to ADD or DELETE at this point + * in time. + */ + if (cmd != RTM_ADD && cmd != RTM_DELETE) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %pFX odd command %s", __func__, p, + lookup_msg(rtm_type_str, cmd, NULL)); + return 0; + } + + memset(&sin_dest, 0, sizeof(sin_dest)); + memset(&sin_gate, 0, sizeof(sin_gate)); + memset(&sin_mask, 0, sizeof(sin_mask)); + + switch (p->family) { + case AF_INET: + sin_dest.sin.sin_family = AF_INET; + sin_dest.sin.sin_addr = p->u.prefix4; + sin_gate.sin.sin_family = AF_INET; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_dest.sin.sin_len = sizeof(struct sockaddr_in); + sin_gate.sin.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + case AF_INET6: + sin_dest.sin6.sin6_family = AF_INET6; + sin_dest.sin6.sin6_addr = p->u.prefix6; + sin_gate.sin6.sin6_family = AF_INET6; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_dest.sin6.sin6_len = sizeof(struct sockaddr_in6); + sin_gate.sin6.sin6_len = sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + } + + /* Make gateway. */ + for (ALL_NEXTHOPS_PTR(ng, nexthop)) { + /* + * We only want to use the actual good nexthops + */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE) || + !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + smplsp = NULL; + gate = false; + snprintf(gate_buf, sizeof(gate_buf), "NULL"); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + sin_gate.sin.sin_addr = nexthop->gate.ipv4; + sin_gate.sin.sin_family = AF_INET; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_gate.sin.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + ifindex = nexthop->ifindex; + gate = true; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + sin_gate.sin6.sin6_addr = nexthop->gate.ipv6; + sin_gate.sin6.sin6_family = AF_INET6; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_gate.sin6.sin6_len = sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + ifindex = nexthop->ifindex; +/* Under kame set interface index to link local address */ +#ifdef KAME + +#define SET_IN6_LINKLOCAL_IFINDEX(a, i) \ + do { \ + (a).s6_addr[2] = ((i) >> 8) & 0xff; \ + (a).s6_addr[3] = (i)&0xff; \ + } while (0) + + if (IN6_IS_ADDR_LINKLOCAL(&sin_gate.sin6.sin6_addr)) + SET_IN6_LINKLOCAL_IFINDEX( + sin_gate.sin6.sin6_addr, + ifindex); +#endif /* KAME */ + + gate = true; + break; + case NEXTHOP_TYPE_IFINDEX: + ifindex = nexthop->ifindex; + break; + case NEXTHOP_TYPE_BLACKHOLE: + bh_type = nexthop->bh_type; + switch (p->family) { + case AF_INET: { + struct in_addr loopback; + + loopback.s_addr = htonl(INADDR_LOOPBACK); + sin_gate.sin.sin_addr = loopback; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_gate.sin.sin_len = + sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + gate = true; + } break; + case AF_INET6: { + struct in6_addr loopback; + + inet_pton(AF_INET6, "::1", &loopback); + + sin_gate.sin6.sin6_addr = loopback; + sin_gate.sin6.sin6_family = AF_INET6; + +#ifdef HAVE_STRUCTSOCKADDR_SA_LEN + sin_gate.sin6.sin6_len = + sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCTSOCKADDR_SA_LEN */ + gate = true; + } break; + } + } + + switch (p->family) { + case AF_INET: + masklen2ip(p->prefixlen, &sin_mask.sin.sin_addr); + sin_mask.sin.sin_family = AF_INET; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_mask.sin.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + case AF_INET6: + masklen2ip6(p->prefixlen, &sin_mask.sin6.sin6_addr); + sin_mask.sin6.sin6_family = AF_INET6; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_mask.sin6.sin6_len = sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + } + +#ifdef __OpenBSD__ + if (nexthop->nh_label) { + if (kernel_rtm_add_labels(nexthop->nh_label, + &smpls) != 0) + continue; + smplsp = (union sockunion *)&smpls; + } +#endif + error = rtm_write(cmd, &sin_dest, &sin_mask, + gate ? &sin_gate : NULL, smplsp, + ifindex, bh_type, metric); + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (!gate) { + zlog_debug( + "%s: %pFX: attention! gate not found for re", + __func__, p); + } else { + switch (p->family) { + case AF_INET: + inet_ntop(AF_INET, + &sin_gate.sin.sin_addr, + gate_buf, sizeof(gate_buf)); + break; + + case AF_INET6: + inet_ntop(AF_INET6, + &sin_gate.sin6.sin6_addr, + gate_buf, sizeof(gate_buf)); + break; + + default: + snprintf(gate_buf, sizeof(gate_buf), + "(invalid-af)"); + break; + } + } + } + switch (error) { + /* We only flag nexthops as being in FIB if + * rtm_write() did its work. */ + case ZEBRA_ERR_NOERROR: + nexthop_num++; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %pFX: successfully did NH %s", + __func__, p, gate_buf); + if (cmd == RTM_ADD) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + break; + + /* The only valid case for this error is + * kernel's failure to install a multipath + * route, which is common for FreeBSD. This + * should be ignored silently, but logged as an error + * otherwise. + */ + case ZEBRA_ERR_RTEXIST: + if (cmd != RTM_ADD) + flog_err(EC_LIB_SYSTEM_CALL, + "%s: rtm_write() returned %d for command %d", + __func__, error, cmd); + continue; + + /* Note any unexpected status returns */ + case ZEBRA_ERR_RTNOEXIST: + if (cmd != RTM_DELETE) + flog_err(EC_LIB_SYSTEM_CALL, + "%s: rtm_write() returned %d for command %d", + __func__, error, cmd); + break; + default: + flog_err( + EC_LIB_SYSTEM_CALL, + "%s: %pFX: rtm_write() unexpectedly returned %d for command %s", + __func__, p, error, + lookup_msg(rtm_type_str, cmd, NULL)); + break; + } + } /* for (ALL_NEXTHOPS(...))*/ + + /* If there was no useful nexthop, then complain. */ + if (nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: No useful nexthops were found in RIB prefix %pFX", + __func__, p); + return 1; + } + + return 0; /*XXX*/ +} + +/* + * Update or delete a prefix from the kernel, + * using info from a dataplane context struct. + */ +enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS; + uint32_t type, old_type; + + if (dplane_ctx_get_src(ctx) != NULL) { + zlog_err("route add: IPv6 sourcedest routes unsupported!"); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + type = dplane_ctx_get_type(ctx); + old_type = dplane_ctx_get_old_type(ctx); + + frr_with_privs(&zserv_privs) { + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { + if (!RSYSTEM_ROUTE(type)) + kernel_rtm(RTM_DELETE, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) { + if (!RSYSTEM_ROUTE(type)) + kernel_rtm(RTM_ADD, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + /* Must do delete and add separately - + * no update available + */ + if (!RSYSTEM_ROUTE(old_type)) + kernel_rtm(RTM_DELETE, dplane_ctx_get_dest(ctx), + dplane_ctx_get_old_ng(ctx), + dplane_ctx_get_old_metric(ctx)); + + if (!RSYSTEM_ROUTE(type)) + kernel_rtm(RTM_ADD, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else { + zlog_err("Invalid routing socket update op %s (%u)", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_op(ctx)); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + } + } /* Elevated privs */ + + return res; +} + +enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +int kernel_neigh_register(vrf_id_t vrf_id, struct zserv *client, bool reg) +{ + /* TODO */ + return 0; +} + +int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen, + ns_id_t ns_id, uint8_t family, bool permanent) +{ + /* TODO */ + return 0; +} + +/* NYI on routing-socket platforms, but we've always returned 'success'... */ +enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +extern int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *mroute) +{ + return 0; +} + +/* + * Update MAC, using dataplane context object. No-op here for now. + */ +enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +extern int kernel_interface_set_master(struct interface *master, + struct interface *slave) +{ + return 0; +} + +uint32_t kernel_get_speed(struct interface *ifp, int *error) +{ + return ifp->speed; +} + +int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip) +{ + return 0; +} + +int kernel_del_mac_nh(uint32_t nh_id) +{ + return 0; +} + +int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + return 0; +} + +int kernel_del_mac_nhg(uint32_t nhg_id) +{ + return 0; +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/rtadv.c b/zebra/rtadv.c new file mode 100644 index 0000000..9af41cb --- /dev/null +++ b/zebra/rtadv.c @@ -0,0 +1,3071 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Router advertisement + * Copyright (C) 2016 Cumulus Networks + * Copyright (C) 2005 6WIND + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#include + +#include "memory.h" +#include "sockopt.h" +#include "frrevent.h" +#include "if.h" +#include "stream.h" +#include "log.h" +#include "prefix.h" +#include "linklist.h" +#include "command.h" +#include "privs.h" +#include "vrf.h" +#include "ns.h" +#include "lib_errors.h" + +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/debug.h" +#include "zebra/rib.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_router.h" + +extern struct zebra_privs_t zserv_privs; + +static uint32_t interfaces_configured_for_ra_from_bgp; + +#if defined(HAVE_RTADV) + +#include "zebra/rtadv_clippy.c" + +DEFINE_MTYPE_STATIC(ZEBRA, RTADV_PREFIX, "Router Advertisement Prefix"); +DEFINE_MTYPE_STATIC(ZEBRA, ADV_IF, "Advertised Interface"); + +#ifdef OPEN_BSD +#include +#endif + +/* If RFC2133 definition is used. */ +#ifndef IPV6_JOIN_GROUP +#define IPV6_JOIN_GROUP IPV6_ADD_MEMBERSHIP +#endif +#ifndef IPV6_LEAVE_GROUP +#define IPV6_LEAVE_GROUP IPV6_DROP_MEMBERSHIP +#endif + +#define ALLNODE "ff02::1" +#define ALLROUTER "ff02::2" + +/* adv list node */ +struct adv_if { + char name[INTERFACE_NAMSIZ]; + struct adv_if_list_item list_item; +}; + +static int adv_if_cmp(const struct adv_if *a, const struct adv_if *b) +{ + return if_cmp_name_func(a->name, b->name); +} + +DECLARE_SORTLIST_UNIQ(adv_if_list, struct adv_if, list_item, adv_if_cmp); + +static int rtadv_prefix_cmp(const struct rtadv_prefix *a, + const struct rtadv_prefix *b) +{ + return prefix_cmp(&a->prefix, &b->prefix); +} + +DECLARE_RBTREE_UNIQ(rtadv_prefixes, struct rtadv_prefix, item, + rtadv_prefix_cmp); + +DEFINE_MTYPE_STATIC(ZEBRA, RTADV_RDNSS, "Router Advertisement RDNSS"); +DEFINE_MTYPE_STATIC(ZEBRA, RTADV_DNSSL, "Router Advertisement DNSSL"); + +/* Order is intentional. Matches RFC4191. This array is also used for + command matching, so only modify with care. */ +static const char *const rtadv_pref_strs[] = { + "medium", "high", "INVALID", "low", 0 +}; + +enum rtadv_event { + RTADV_START, + RTADV_STOP, + RTADV_TIMER, + RTADV_TIMER_MSEC, + RTADV_READ +}; + +static void rtadv_event(struct zebra_vrf *, enum rtadv_event, int); + +static int if_join_all_router(int, struct interface *); +static int if_leave_all_router(int, struct interface *); + +static struct zebra_vrf *rtadv_interface_get_zvrf(const struct interface *ifp) +{ + /* We use the default vrf for rtadv handling except in netns */ + if (!vrf_is_backend_netns()) + return vrf_info_lookup(VRF_DEFAULT); + + return ifp->vrf->info; +} + +static int rtadv_increment_received(struct zebra_vrf *zvrf, ifindex_t *ifindex) +{ + int ret = -1; + struct interface *iface; + struct zebra_if *zif; + + iface = if_lookup_by_index(*ifindex, zvrf->vrf->vrf_id); + if (iface && iface->info) { + zif = iface->info; + zif->ra_rcvd++; + ret = 0; + } + return ret; +} + +static int rtadv_recv_packet(struct zebra_vrf *zvrf, int sock, uint8_t *buf, + int buflen, struct sockaddr_in6 *from, + ifindex_t *ifindex, int *hoplimit) +{ + int ret; + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsgptr; + struct in6_addr dst; + + char adata[1024]; + + /* Fill in message and iovec. */ + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void *)from; + msg.msg_namelen = sizeof(struct sockaddr_in6); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (void *)adata; + msg.msg_controllen = sizeof(adata); + iov.iov_base = buf; + iov.iov_len = buflen; + + /* If recvmsg fail return minus value. */ + ret = recvmsg(sock, &msg, 0); + if (ret < 0) + return ret; + + for (cmsgptr = CMSG_FIRSTHDR(&msg); cmsgptr != NULL; + cmsgptr = CMSG_NXTHDR(&msg, cmsgptr)) { + /* I want interface index which this packet comes from. */ + if (cmsgptr->cmsg_level == IPPROTO_IPV6 + && cmsgptr->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *ptr; + + ptr = (struct in6_pktinfo *)CMSG_DATA(cmsgptr); + *ifindex = ptr->ipi6_ifindex; + memcpy(&dst, &ptr->ipi6_addr, sizeof(ptr->ipi6_addr)); + } + + /* Incoming packet's hop limit. */ + if (cmsgptr->cmsg_level == IPPROTO_IPV6 + && cmsgptr->cmsg_type == IPV6_HOPLIMIT) { + int *hoptr = (int *)CMSG_DATA(cmsgptr); + *hoplimit = *hoptr; + } + } + + rtadv_increment_received(zvrf, ifindex); + return ret; +} + +#define RTADV_MSG_SIZE 4096 + +/* Send router advertisement packet. */ +static void rtadv_send_packet(int sock, struct interface *ifp, + enum ipv6_nd_suppress_ra_status stop) +{ + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsgptr; + struct in6_pktinfo *pkt; + struct sockaddr_in6 addr; + static void *adata = NULL; + unsigned char buf[RTADV_MSG_SIZE]; + struct nd_router_advert *rtadv; + int ret; + int len = 0; + struct zebra_if *zif; + struct rtadv_prefix *rprefix; + uint8_t all_nodes_addr[] = {0xff, 0x02, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1}; + struct listnode *node; + uint16_t pkt_RouterLifetime; + + /* + * Allocate control message bufffer. This is dynamic because + * CMSG_SPACE is not guaranteed not to call a function. Note that + * the size will be different on different architectures due to + * differing alignment rules. + */ + if (adata == NULL) { + /* XXX Free on shutdown. */ + adata = calloc(1, CMSG_SPACE(sizeof(struct in6_pktinfo))); + + if (adata == NULL) { + zlog_debug("%s: can't malloc control data", __func__); + exit(-1); + } + } + + /* Logging of packet. */ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s(%s:%u): Tx RA, socket %u", ifp->name, + ifp->vrf->name, ifp->ifindex, sock); + + /* Fill in sockaddr_in6. */ + memset(&addr, 0, sizeof(struct sockaddr_in6)); + addr.sin6_family = AF_INET6; +#ifdef SIN6_LEN + addr.sin6_len = sizeof(struct sockaddr_in6); +#endif /* SIN6_LEN */ + addr.sin6_port = htons(IPPROTO_ICMPV6); + IPV6_ADDR_COPY(&addr.sin6_addr, all_nodes_addr); + + /* Fetch interface information. */ + zif = ifp->info; + + /* Make router advertisement message. */ + rtadv = (struct nd_router_advert *)buf; + + rtadv->nd_ra_type = ND_ROUTER_ADVERT; + rtadv->nd_ra_code = 0; + rtadv->nd_ra_cksum = 0; + + rtadv->nd_ra_curhoplimit = zif->rtadv.AdvCurHopLimit; + + /* RFC4191: Default Router Preference is 0 if Router Lifetime is 0. */ + rtadv->nd_ra_flags_reserved = zif->rtadv.AdvDefaultLifetime == 0 + ? 0 + : zif->rtadv.DefaultPreference; + rtadv->nd_ra_flags_reserved <<= 3; + + if (zif->rtadv.AdvManagedFlag) + rtadv->nd_ra_flags_reserved |= ND_RA_FLAG_MANAGED; + if (zif->rtadv.AdvOtherConfigFlag) + rtadv->nd_ra_flags_reserved |= ND_RA_FLAG_OTHER; + if (zif->rtadv.AdvHomeAgentFlag) + rtadv->nd_ra_flags_reserved |= ND_RA_FLAG_HOME_AGENT; + /* Note that according to Neighbor Discovery (RFC 4861 [18]), + * AdvDefaultLifetime is by default based on the value of + * MaxRtrAdvInterval. AdvDefaultLifetime is used in the Router Lifetime + * field of Router Advertisements. Given that this field is expressed + * in seconds, a small MaxRtrAdvInterval value can result in a zero + * value for this field. To prevent this, routers SHOULD keep + * AdvDefaultLifetime in at least one second, even if the use of + * MaxRtrAdvInterval would result in a smaller value. -- RFC6275, 7.5 */ + pkt_RouterLifetime = + zif->rtadv.AdvDefaultLifetime != -1 + ? zif->rtadv.AdvDefaultLifetime + : MAX(1, 0.003 * zif->rtadv.MaxRtrAdvInterval); + + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv->nd_ra_router_lifetime = + (stop == RA_SUPPRESS) ? htons(0) : htons(pkt_RouterLifetime); + rtadv->nd_ra_reachable = htonl(zif->rtadv.AdvReachableTime); + rtadv->nd_ra_retransmit = htonl(zif->rtadv.AdvRetransTimer); + + len = sizeof(struct nd_router_advert); + + /* If both the Home Agent Preference and Home Agent Lifetime are set to + * their default values specified above, this option SHOULD NOT be + * included in the Router Advertisement messages sent by this home + * agent. -- RFC6275, 7.4 */ + if (zif->rtadv.AdvHomeAgentFlag + && (zif->rtadv.HomeAgentPreference + || zif->rtadv.HomeAgentLifetime != -1)) { + struct nd_opt_homeagent_info *ndopt_hai = + (struct nd_opt_homeagent_info *)(buf + len); + ndopt_hai->nd_opt_hai_type = ND_OPT_HA_INFORMATION; + ndopt_hai->nd_opt_hai_len = 1; + ndopt_hai->nd_opt_hai_reserved = 0; + ndopt_hai->nd_opt_hai_preference = + htons(zif->rtadv.HomeAgentPreference); + /* 16-bit unsigned integer. The lifetime associated with the + * home + * agent in units of seconds. The default value is the same as + * the + * Router Lifetime, as specified in the main body of the Router + * Advertisement. The maximum value corresponds to 18.2 hours. + * A + * value of 0 MUST NOT be used. -- RFC6275, 7.5 */ + ndopt_hai->nd_opt_hai_lifetime = + htons(zif->rtadv.HomeAgentLifetime != -1 + ? zif->rtadv.HomeAgentLifetime + : MAX(1, pkt_RouterLifetime) /* 0 is OK + for RL, + but not + for HAL*/ + ); + len += sizeof(struct nd_opt_homeagent_info); + } + + if (zif->rtadv.AdvIntervalOption) { + struct nd_opt_adv_interval *ndopt_adv = + (struct nd_opt_adv_interval *)(buf + len); + ndopt_adv->nd_opt_ai_type = ND_OPT_ADV_INTERVAL; + ndopt_adv->nd_opt_ai_len = 1; + ndopt_adv->nd_opt_ai_reserved = 0; + ndopt_adv->nd_opt_ai_interval = + htonl(zif->rtadv.MaxRtrAdvInterval); + len += sizeof(struct nd_opt_adv_interval); + } + + /* Fill in prefix. */ + frr_each (rtadv_prefixes, zif->rtadv.prefixes, rprefix) { + struct nd_opt_prefix_info *pinfo; + + pinfo = (struct nd_opt_prefix_info *)(buf + len); + + pinfo->nd_opt_pi_type = ND_OPT_PREFIX_INFORMATION; + pinfo->nd_opt_pi_len = 4; + pinfo->nd_opt_pi_prefix_len = rprefix->prefix.prefixlen; + + pinfo->nd_opt_pi_flags_reserved = 0; + if (rprefix->AdvOnLinkFlag) + pinfo->nd_opt_pi_flags_reserved |= + ND_OPT_PI_FLAG_ONLINK; + if (rprefix->AdvAutonomousFlag) + pinfo->nd_opt_pi_flags_reserved |= ND_OPT_PI_FLAG_AUTO; + if (rprefix->AdvRouterAddressFlag) + pinfo->nd_opt_pi_flags_reserved |= ND_OPT_PI_FLAG_RADDR; + + pinfo->nd_opt_pi_valid_time = htonl(rprefix->AdvValidLifetime); + pinfo->nd_opt_pi_preferred_time = + htonl(rprefix->AdvPreferredLifetime); + pinfo->nd_opt_pi_reserved2 = 0; + + IPV6_ADDR_COPY(&pinfo->nd_opt_pi_prefix, + &rprefix->prefix.prefix); + + len += sizeof(struct nd_opt_prefix_info); + } + + /* Hardware address. */ + if (ifp->hw_addr_len != 0) { + buf[len++] = ND_OPT_SOURCE_LINKADDR; + + /* Option length should be rounded up to next octet if + the link address does not end on an octet boundary. */ + buf[len++] = (ifp->hw_addr_len + 9) >> 3; + + memcpy(buf + len, ifp->hw_addr, ifp->hw_addr_len); + len += ifp->hw_addr_len; + + /* Pad option to end on an octet boundary. */ + memset(buf + len, 0, -(ifp->hw_addr_len + 2) & 0x7); + len += -(ifp->hw_addr_len + 2) & 0x7; + } + + /* MTU */ + if (zif->rtadv.AdvLinkMTU) { + struct nd_opt_mtu *opt = (struct nd_opt_mtu *)(buf + len); + opt->nd_opt_mtu_type = ND_OPT_MTU; + opt->nd_opt_mtu_len = 1; + opt->nd_opt_mtu_reserved = 0; + opt->nd_opt_mtu_mtu = htonl(zif->rtadv.AdvLinkMTU); + len += sizeof(struct nd_opt_mtu); + } + + /* + * There is no limit on the number of configurable recursive DNS + * servers or search list entries. We don't want the RA message + * to exceed the link's MTU (risking fragmentation) or even + * blow the stack buffer allocated for it. + */ + size_t max_len = MIN(ifp->mtu6 - 40, sizeof(buf)); + + /* Recursive DNS servers */ + struct rtadv_rdnss *rdnss; + + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvRDNSSList, node, rdnss)) { + size_t opt_len = + sizeof(struct nd_opt_rdnss) + sizeof(struct in6_addr); + + if (len + opt_len > max_len) { + zlog_warn( + "%s(%s:%u): Tx RA: RDNSS option would exceed MTU, omitting it", + ifp->name, ifp->vrf->name, ifp->ifindex); + goto no_more_opts; + } + struct nd_opt_rdnss *opt = (struct nd_opt_rdnss *)(buf + len); + + opt->nd_opt_rdnss_type = ND_OPT_RDNSS; + opt->nd_opt_rdnss_len = opt_len / 8; + opt->nd_opt_rdnss_reserved = 0; + opt->nd_opt_rdnss_lifetime = htonl( + rdnss->lifetime_set + ? rdnss->lifetime + : MAX(1, 0.003 * zif->rtadv.MaxRtrAdvInterval)); + + len += sizeof(struct nd_opt_rdnss); + + IPV6_ADDR_COPY(buf + len, &rdnss->addr); + len += sizeof(struct in6_addr); + } + + /* DNS search list */ + struct rtadv_dnssl *dnssl; + + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvDNSSLList, node, dnssl)) { + size_t opt_len = sizeof(struct nd_opt_dnssl) + + ((dnssl->encoded_len + 7) & ~7); + + if (len + opt_len > max_len) { + zlog_warn( + "%s(%u): Tx RA: DNSSL option would exceed MTU, omitting it", + ifp->name, ifp->ifindex); + goto no_more_opts; + } + struct nd_opt_dnssl *opt = (struct nd_opt_dnssl *)(buf + len); + + opt->nd_opt_dnssl_type = ND_OPT_DNSSL; + opt->nd_opt_dnssl_len = opt_len / 8; + opt->nd_opt_dnssl_reserved = 0; + opt->nd_opt_dnssl_lifetime = htonl( + dnssl->lifetime_set + ? dnssl->lifetime + : MAX(1, 0.003 * zif->rtadv.MaxRtrAdvInterval)); + + len += sizeof(struct nd_opt_dnssl); + + memcpy(buf + len, dnssl->encoded_name, dnssl->encoded_len); + len += dnssl->encoded_len; + + /* Zero-pad to 8-octet boundary */ + while (len % 8) + buf[len++] = '\0'; + } + +no_more_opts: + + msg.msg_name = (void *)&addr; + msg.msg_namelen = sizeof(struct sockaddr_in6); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (void *)adata; + msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); + msg.msg_flags = 0; + iov.iov_base = buf; + iov.iov_len = len; + + cmsgptr = CMSG_FIRSTHDR(&msg); + cmsgptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + cmsgptr->cmsg_level = IPPROTO_IPV6; + cmsgptr->cmsg_type = IPV6_PKTINFO; + + pkt = (struct in6_pktinfo *)CMSG_DATA(cmsgptr); + memset(&pkt->ipi6_addr, 0, sizeof(struct in6_addr)); + pkt->ipi6_ifindex = ifp->ifindex; + + ret = sendmsg(sock, &msg, 0); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "%s(%u): Tx RA failed, socket %u error %d (%s)", + ifp->name, ifp->ifindex, sock, errno, + safe_strerror(errno)); + } else + zif->ra_sent++; +} + +static void rtadv_timer(struct event *thread) +{ + struct zebra_vrf *zvrf = EVENT_ARG(thread); + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *zif; + int period; + + zvrf->rtadv.ra_timer = NULL; + if (adv_if_list_count(&zvrf->rtadv.adv_msec_if) == 0) { + period = 1000; /* 1 s */ + rtadv_event(zvrf, RTADV_TIMER, 1 /* 1 s */); + } else { + period = 10; /* 10 ms */ + rtadv_event(zvrf, RTADV_TIMER_MSEC, 10 /* 10 ms */); + } + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) + FOR_ALL_INTERFACES (vrf, ifp) { + if (if_is_loopback(ifp) || !if_is_operative(ifp) || + IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || + !connected_get_linklocal(ifp) || + (vrf_is_backend_netns() && + ifp->vrf->vrf_id != zvrf->vrf->vrf_id)) + continue; + + zif = ifp->info; + + if (zif->rtadv.AdvSendAdvertisements) { + if (zif->rtadv.inFastRexmit + && zif->rtadv.UseFastRexmit) { + /* We assume we fast rexmit every sec so + * no + * additional vars */ + if (--zif->rtadv.NumFastReXmitsRemain + <= 0) + zif->rtadv.inFastRexmit = 0; + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug( + "Fast RA Rexmit on interface %s(%s:%u)", + ifp->name, + ifp->vrf->name, + ifp->ifindex); + + rtadv_send_packet(zvrf->rtadv.sock, ifp, + RA_ENABLE); + } else { + zif->rtadv.AdvIntervalTimer -= period; + if (zif->rtadv.AdvIntervalTimer <= 0) { + /* FIXME: using + MaxRtrAdvInterval each + time isn't what section + 6.2.4 of RFC4861 tells to do. + */ + zif->rtadv.AdvIntervalTimer = + zif->rtadv + .MaxRtrAdvInterval; + rtadv_send_packet( + zvrf->rtadv.sock, ifp, + RA_ENABLE); + } + } + } + } +} + +static void rtadv_process_solicit(struct interface *ifp) +{ + struct zebra_vrf *zvrf; + struct zebra_if *zif; + + zvrf = rtadv_interface_get_zvrf(ifp); + assert(zvrf); + zif = ifp->info; + + /* + * If FastRetransmit is enabled, send the RA immediately. + * If not enabled but it has been more than MIN_DELAY_BETWEEN_RAS + * (3 seconds) since the last RA was sent, send it now and reset + * the timer to start at the max (configured) again. + * If not enabled and it is less than 3 seconds since the last + * RA packet was sent, set the timer for 3 seconds so the next + * one will be sent with a minimum of 3 seconds between RAs. + * RFC4861 sec 6.2.6 + */ + if ((zif->rtadv.UseFastRexmit) + || (zif->rtadv.AdvIntervalTimer <= + (zif->rtadv.MaxRtrAdvInterval - MIN_DELAY_BETWEEN_RAS))) { + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE); + zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval; + } else + zif->rtadv.AdvIntervalTimer = MIN_DELAY_BETWEEN_RAS; +} + +static const char *rtadv_optionalhdr2str(uint8_t opt_type) +{ + switch (opt_type) { + case ND_OPT_SOURCE_LINKADDR: + return "Optional Source Link Address"; + case ND_OPT_TARGET_LINKADDR: + return "Optional Target Link Address"; + case ND_OPT_PREFIX_INFORMATION: + return "Optional Prefix Information"; + case ND_OPT_REDIRECTED_HEADER: + return "Optional Redirected Header"; + case ND_OPT_MTU: + return "Optional MTU"; + case ND_OPT_RTR_ADV_INTERVAL: + return "Optional Advertisement Interval"; + case ND_OPT_HOME_AGENT_INFO: + return "Optional Home Agent Information"; + } + + return "Unknown Optional Type"; +} + +/* + * This function processes optional attributes off of + * end of a RA packet received. At this point in + * time we only care about this in one situation + * which is when a interface does not have a LL + * v6 address. We still need to be able to install + * the mac address for v4 to v6 resolution + */ +static void rtadv_process_optional(uint8_t *optional, unsigned int len, + struct interface *ifp, + struct sockaddr_in6 *addr) +{ + char *mac; + + while (len > 0) { + struct nd_opt_hdr *opt_hdr = (struct nd_opt_hdr *)optional; + + switch(opt_hdr->nd_opt_type) { + case ND_OPT_SOURCE_LINKADDR: + mac = (char *)(optional+2); + if_nbr_mac_to_ipv4ll_neigh_update(ifp, mac, + &addr->sin6_addr, 1); + break; + default: + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "%s:Received Packet with optional Header type %s(%u) that is being ignored", + __func__, + rtadv_optionalhdr2str( + opt_hdr->nd_opt_type), + opt_hdr->nd_opt_type); + break; + } + + len -= 8 * opt_hdr->nd_opt_len; + optional += 8 * opt_hdr->nd_opt_len; + } +} + +static void rtadv_process_advert(uint8_t *msg, unsigned int len, + struct interface *ifp, + struct sockaddr_in6 *addr) +{ + struct nd_router_advert *radvert; + char addr_str[INET6_ADDRSTRLEN]; + struct zebra_if *zif; + struct prefix p; + + zif = ifp->info; + + inet_ntop(AF_INET6, &addr->sin6_addr, addr_str, INET6_ADDRSTRLEN); + + if (len < sizeof(struct nd_router_advert)) { + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "%s(%s:%u): Rx RA with invalid length %d from %s", + ifp->name, ifp->vrf->name, ifp->ifindex, len, + addr_str); + return; + } + + if (!IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { + rtadv_process_optional(msg + sizeof(struct nd_router_advert), + len - sizeof(struct nd_router_advert), + ifp, addr); + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "%s(%s:%u): Rx RA with non-linklocal source address from %s", + ifp->name, ifp->vrf->name, ifp->ifindex, + addr_str); + return; + } + + radvert = (struct nd_router_advert *)msg; + +#define SIXHOUR2USEC (int64_t)6 * 60 * 60 * 1000000 + + if ((radvert->nd_ra_curhoplimit && zif->rtadv.AdvCurHopLimit) && + (radvert->nd_ra_curhoplimit != zif->rtadv.AdvCurHopLimit) && + (monotime_since(&zif->rtadv.lastadvcurhoplimit, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvcurhoplimit.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvCurHopLimit (%u) doesn't agree with %s (%u)", + ifp->name, ifp->ifindex, zif->rtadv.AdvCurHopLimit, + addr_str, radvert->nd_ra_curhoplimit); + monotime(&zif->rtadv.lastadvcurhoplimit); + } + + if ((radvert->nd_ra_flags_reserved & ND_RA_FLAG_MANAGED) && + !zif->rtadv.AdvManagedFlag && + (monotime_since(&zif->rtadv.lastadvmanagedflag, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvmanagedflag.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvManagedFlag (%u) doesn't agree with %s (%u)", + ifp->name, ifp->ifindex, zif->rtadv.AdvManagedFlag, + addr_str, + !!CHECK_FLAG(radvert->nd_ra_flags_reserved, + ND_RA_FLAG_MANAGED)); + monotime(&zif->rtadv.lastadvmanagedflag); + } + + if ((radvert->nd_ra_flags_reserved & ND_RA_FLAG_OTHER) && + !zif->rtadv.AdvOtherConfigFlag && + (monotime_since(&zif->rtadv.lastadvotherconfigflag, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvotherconfigflag.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvOtherConfigFlag (%u) doesn't agree with %s (%u)", + ifp->name, ifp->ifindex, zif->rtadv.AdvOtherConfigFlag, + addr_str, + !!CHECK_FLAG(radvert->nd_ra_flags_reserved, + ND_RA_FLAG_OTHER)); + monotime(&zif->rtadv.lastadvotherconfigflag); + } + + if ((radvert->nd_ra_reachable && zif->rtadv.AdvReachableTime) && + (ntohl(radvert->nd_ra_reachable) != zif->rtadv.AdvReachableTime) && + (monotime_since(&zif->rtadv.lastadvreachabletime, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvreachabletime.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvReachableTime (%u) doesn't agree with %s (%u)", + ifp->name, ifp->ifindex, zif->rtadv.AdvReachableTime, + addr_str, ntohl(radvert->nd_ra_reachable)); + monotime(&zif->rtadv.lastadvreachabletime); + } + + if ((radvert->nd_ra_retransmit && zif->rtadv.AdvRetransTimer) && + (ntohl(radvert->nd_ra_retransmit) != + (unsigned int)zif->rtadv.AdvRetransTimer) && + (monotime_since(&zif->rtadv.lastadvretranstimer, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvretranstimer.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvRetransTimer (%u) doesn't agree with %s (%u)", + ifp->name, ifp->ifindex, zif->rtadv.AdvRetransTimer, + addr_str, ntohl(radvert->nd_ra_retransmit)); + monotime(&zif->rtadv.lastadvretranstimer); + } + + /* Create entry for neighbor if not known. */ + p.family = AF_INET6; + IPV6_ADDR_COPY(&p.u.prefix6, &addr->sin6_addr); + p.prefixlen = IPV6_MAX_BITLEN; + + if (!nbr_connected_check(ifp, &p)) + nbr_connected_add_ipv6(ifp, &addr->sin6_addr); +} + + +static void rtadv_process_packet(uint8_t *buf, unsigned int len, + ifindex_t ifindex, int hoplimit, + struct sockaddr_in6 *from, + struct zebra_vrf *zvrf) +{ + struct icmp6_hdr *icmph; + struct interface *ifp; + struct zebra_if *zif; + char addr_str[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, &from->sin6_addr, addr_str, INET6_ADDRSTRLEN); + + /* Interface search. */ + ifp = if_lookup_by_index(ifindex, zvrf->vrf->vrf_id); + if (ifp == NULL) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "RA/RS received on unknown IF %u from %s", ifindex, + addr_str); + return; + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s(%s:%u): Rx RA/RS len %d from %s", ifp->name, + ifp->vrf->name, ifp->ifindex, len, addr_str); + + if (if_is_loopback(ifp)) + return; + + /* Check interface configuration. */ + zif = ifp->info; + if (!zif->rtadv.AdvSendAdvertisements) + return; + + /* ICMP message length check. */ + if (len < sizeof(struct icmp6_hdr)) { + zlog_debug( + "%s(%s:%u): Rx RA with Invalid ICMPV6 packet length %d", + ifp->name, ifp->vrf->name, ifp->ifindex, len); + return; + } + + icmph = (struct icmp6_hdr *)buf; + + /* ICMP message type check. */ + if (icmph->icmp6_type != ND_ROUTER_SOLICIT + && icmph->icmp6_type != ND_ROUTER_ADVERT) { + zlog_debug("%s(%s:%u): Rx RA - Unwanted ICMPV6 message type %d", + ifp->name, ifp->vrf->name, ifp->ifindex, + icmph->icmp6_type); + return; + } + + /* Hoplimit check. */ + if (hoplimit >= 0 && hoplimit != 255) { + zlog_debug("%s(%s:%u): Rx RA - Invalid hoplimit %d", ifp->name, + ifp->vrf->name, ifp->ifindex, hoplimit); + return; + } + + /* Check ICMP message type. */ + if (icmph->icmp6_type == ND_ROUTER_SOLICIT) + rtadv_process_solicit(ifp); + else if (icmph->icmp6_type == ND_ROUTER_ADVERT) + rtadv_process_advert(buf, len, ifp, from); + + return; +} + +static void rtadv_read(struct event *thread) +{ + int sock; + int len; + uint8_t buf[RTADV_MSG_SIZE]; + struct sockaddr_in6 from; + ifindex_t ifindex = 0; + int hoplimit = -1; + struct zebra_vrf *zvrf = EVENT_ARG(thread); + + sock = EVENT_FD(thread); + zvrf->rtadv.ra_read = NULL; + + /* Register myself. */ + rtadv_event(zvrf, RTADV_READ, 0); + + len = rtadv_recv_packet(zvrf, sock, buf, sizeof(buf), &from, &ifindex, + &hoplimit); + + if (len < 0) { + flog_err_sys(EC_LIB_SOCKET, + "RA/RS recv failed, socket %u error %s", sock, + safe_strerror(errno)); + return; + } + + rtadv_process_packet(buf, (unsigned)len, ifindex, hoplimit, &from, zvrf); +} + +static int rtadv_make_socket(ns_id_t ns_id) +{ + int sock = -1; + int ret = 0; + struct icmp6_filter filter; + int error; + + frr_with_privs(&zserv_privs) { + + sock = ns_socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6, ns_id); + /* + * with privs might set errno too if it fails save + * to the side + */ + error = errno; + } + + if (sock < 0) { + zlog_warn("RTADV socket for ns: %u failure to create: %s(%u)", + ns_id, safe_strerror(error), error); + return -1; + } + + ret = setsockopt_ipv6_pktinfo(sock, 1); + if (ret < 0) { + zlog_warn("RTADV failure to set Packet Information"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_multicast_loop(sock, 0); + if (ret < 0) { + zlog_warn("RTADV failure to set multicast Loop detection"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_unicast_hops(sock, 255); + if (ret < 0) { + zlog_warn("RTADV failure to set maximum unicast hops"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_multicast_hops(sock, 255); + if (ret < 0) { + zlog_warn("RTADV failure to set maximum multicast hops"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_hoplimit(sock, 1); + if (ret < 0) { + zlog_warn("RTADV failure to set maximum incoming hop limit"); + close(sock); + return ret; + } + + ICMP6_FILTER_SETBLOCKALL(&filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_ADVERT, &filter); + + ret = setsockopt(sock, IPPROTO_ICMPV6, ICMP6_FILTER, &filter, + sizeof(struct icmp6_filter)); + if (ret < 0) { + zlog_info("ICMP6_FILTER set fail: %s", safe_strerror(errno)); + close(sock); + return ret; + } + + return sock; +} + +static struct adv_if *adv_if_new(const char *name) +{ + struct adv_if *new; + + new = XCALLOC(MTYPE_ADV_IF, sizeof(struct adv_if)); + + strlcpy(new->name, name, sizeof(new->name)); + + return new; +} + +static void adv_if_free(struct adv_if *adv_if) +{ + XFREE(MTYPE_ADV_IF, adv_if); +} + +static bool adv_if_is_empty_internal(const struct adv_if_list_head *adv_if_head) +{ + return adv_if_list_count(adv_if_head) ? false : true; +} + +static struct adv_if *adv_if_add_internal(struct adv_if_list_head *adv_if_head, + const char *name) +{ + struct adv_if adv_if_lookup = {}; + struct adv_if *adv_if = NULL; + + strlcpy(adv_if_lookup.name, name, sizeof(adv_if_lookup.name)); + adv_if = adv_if_list_find(adv_if_head, &adv_if_lookup); + + if (adv_if != NULL) + return adv_if; + + adv_if = adv_if_new(adv_if_lookup.name); + adv_if_list_add(adv_if_head, adv_if); + + return NULL; +} + +static struct adv_if *adv_if_del_internal(struct adv_if_list_head *adv_if_head, + const char *name) +{ + struct adv_if adv_if_lookup = {}; + struct adv_if *adv_if = NULL; + + strlcpy(adv_if_lookup.name, name, sizeof(adv_if_lookup.name)); + adv_if = adv_if_list_find(adv_if_head, &adv_if_lookup); + + if (adv_if == NULL) + return NULL; + + adv_if_list_del(adv_if_head, adv_if); + + return adv_if; +} + +static void adv_if_clean_internal(struct adv_if_list_head *adv_if_head) +{ + struct adv_if *node = NULL; + + if (!adv_if_is_empty_internal(adv_if_head)) { + frr_each_safe (adv_if_list, adv_if_head, node) { + adv_if_list_del(adv_if_head, node); + adv_if_free(node); + } + } + + adv_if_list_fini(adv_if_head); +} + + +/* + * Add to list. On Success, return NULL, otherwise return already existing + * adv_if. + */ +static struct adv_if *adv_if_add(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_add_internal(&zvrf->rtadv.adv_if, name); + + if (adv_if != NULL) + return adv_if; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_if)); + } + + return NULL; +} + +/* + * Del from list. On Success, return the adv_if, otherwise return NULL. Caller + * frees. + */ +static struct adv_if *adv_if_del(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_del_internal(&zvrf->rtadv.adv_if, name); + + if (adv_if == NULL) + return NULL; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_if)); + } + + return adv_if; +} + +/* + * Add to list. On Success, return NULL, otherwise return already existing + * adv_if. + */ +static struct adv_if *adv_msec_if_add(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_add_internal(&zvrf->rtadv.adv_msec_if, name); + + if (adv_if != NULL) + return adv_if; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_msec_if)); + } + + return NULL; +} + +/* + * Del from list. On Success, return the adv_if, otherwise return NULL. Caller + * frees. + */ +static struct adv_if *adv_msec_if_del(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_del_internal(&zvrf->rtadv.adv_msec_if, name); + + if (adv_if == NULL) + return NULL; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_msec_if)); + } + + return adv_if; +} + +/* Clean adv_if list, called on vrf terminate */ +static void adv_if_clean(struct zebra_vrf *zvrf) +{ + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u count: %zu -> 0", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), + adv_if_list_count(&zvrf->rtadv.adv_if)); + } + + adv_if_clean_internal(&zvrf->rtadv.adv_if); +} + +/* Clean adv_msec_if list, called on vrf terminate */ +static void adv_msec_if_clean(struct zebra_vrf *zvrf) +{ + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u count: %zu -> 0", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), + adv_if_list_count(&zvrf->rtadv.adv_msec_if)); + } + + adv_if_clean_internal(&zvrf->rtadv.adv_msec_if); +} + +static struct rtadv_prefix *rtadv_prefix_new(void) +{ + return XCALLOC(MTYPE_RTADV_PREFIX, sizeof(struct rtadv_prefix)); +} + +static void rtadv_prefix_free(struct rtadv_prefix *rtadv_prefix) +{ + XFREE(MTYPE_RTADV_PREFIX, rtadv_prefix); +} + +static struct rtadv_prefix *rtadv_prefix_get(struct rtadv_prefixes_head *list, + struct prefix_ipv6 *p) +{ + struct rtadv_prefix *rprefix, ref; + + ref.prefix = *p; + + rprefix = rtadv_prefixes_find(list, &ref); + if (rprefix) + return rprefix; + + rprefix = rtadv_prefix_new(); + memcpy(&rprefix->prefix, p, sizeof(struct prefix_ipv6)); + rtadv_prefixes_add(list, rprefix); + + return rprefix; +} + +static void rtadv_prefix_set_defaults(struct rtadv_prefix *rp) +{ + rp->AdvAutonomousFlag = 1; + rp->AdvOnLinkFlag = 1; + rp->AdvRouterAddressFlag = 0; + rp->AdvPreferredLifetime = RTADV_PREFERRED_LIFETIME; + rp->AdvValidLifetime = RTADV_VALID_LIFETIME; +} + +static void rtadv_prefix_set(struct zebra_if *zif, struct rtadv_prefix *rp) +{ + struct rtadv_prefix *rprefix; + + rprefix = rtadv_prefix_get(zif->rtadv.prefixes, &rp->prefix); + + /* + * Set parameters based on where the prefix is created. + * If auto-created based on kernel address addition, set the + * default values. If created from a manual "ipv6 nd prefix" + * command, take the parameters from the manual command. Note + * that if the manual command exists, the default values will + * not overwrite the manual values. + */ + if (rp->AdvPrefixCreate == PREFIX_SRC_MANUAL) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_AUTO) + rprefix->AdvPrefixCreate = PREFIX_SRC_BOTH; + else + rprefix->AdvPrefixCreate = PREFIX_SRC_MANUAL; + + rprefix->AdvAutonomousFlag = rp->AdvAutonomousFlag; + rprefix->AdvOnLinkFlag = rp->AdvOnLinkFlag; + rprefix->AdvRouterAddressFlag = rp->AdvRouterAddressFlag; + rprefix->AdvPreferredLifetime = rp->AdvPreferredLifetime; + rprefix->AdvValidLifetime = rp->AdvValidLifetime; + } else if (rp->AdvPrefixCreate == PREFIX_SRC_AUTO) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_MANUAL) + rprefix->AdvPrefixCreate = PREFIX_SRC_BOTH; + else { + rprefix->AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set_defaults(rprefix); + } + } +} + +static int rtadv_prefix_reset(struct zebra_if *zif, struct rtadv_prefix *rp) +{ + struct rtadv_prefix *rprefix; + + rprefix = rtadv_prefixes_find(zif->rtadv.prefixes, rp); + if (rprefix != NULL) { + + /* + * When deleting an address from the list, need to take care + * it wasn't defined both automatically via kernel + * address addition as well as manually by vtysh cli. If both, + * we don't actually delete but may change the parameters + * back to default if a manually defined entry is deleted. + */ + if (rp->AdvPrefixCreate == PREFIX_SRC_MANUAL) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH) { + rprefix->AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set_defaults(rprefix); + return 1; + } + } else if (rp->AdvPrefixCreate == PREFIX_SRC_AUTO) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH) { + rprefix->AdvPrefixCreate = PREFIX_SRC_MANUAL; + return 1; + } + } + + rtadv_prefixes_del(zif->rtadv.prefixes, rprefix); + rtadv_prefix_free(rprefix); + return 1; + } else + return 0; +} + +/* Add IPv6 prefixes learned from the kernel to the RA prefix list */ +void rtadv_add_prefix(struct zebra_if *zif, const struct prefix_ipv6 *p) +{ + struct rtadv_prefix rp; + + rp.prefix = *p; + apply_mask_ipv6(&rp.prefix); + rp.AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set(zif, &rp); +} + +/* Delete IPv6 prefixes removed by the kernel from the RA prefix list */ +void rtadv_delete_prefix(struct zebra_if *zif, const struct prefix *p) +{ + struct rtadv_prefix rp; + + rp.prefix = *((struct prefix_ipv6 *)p); + apply_mask_ipv6(&rp.prefix); + rp.AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_reset(zif, &rp); +} + +static void rtadv_start_interface_events(struct zebra_vrf *zvrf, + struct zebra_if *zif) +{ + struct adv_if *adv_if = NULL; + + if (zif->ifp->ifindex == IFINDEX_INTERNAL) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s(%s) has not configured an ifindex yet, delaying until we have one", + zif->ifp->name, zvrf->vrf->name); + return; + } + + adv_if = adv_if_add(zvrf, zif->ifp->name); + if (adv_if != NULL) + return; /* Already added */ + + if_join_all_router(zvrf->rtadv.sock, zif->ifp); + + if (adv_if_list_count(&zvrf->rtadv.adv_if) == 1) + rtadv_event(zvrf, RTADV_START, 0); +} + +static void ipv6_nd_suppress_ra_set(struct interface *ifp, + enum ipv6_nd_suppress_ra_status status) +{ + struct zebra_if *zif; + struct zebra_vrf *zvrf; + struct adv_if *adv_if = NULL; + + zif = ifp->info; + + zvrf = rtadv_interface_get_zvrf(ifp); + + if (status == RA_SUPPRESS) { + /* RA is currently enabled */ + if (zif->rtadv.AdvSendAdvertisements) { + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS); + zif->rtadv.AdvSendAdvertisements = 0; + zif->rtadv.AdvIntervalTimer = 0; + + adv_if = adv_if_del(zvrf, ifp->name); + if (adv_if == NULL) + return; /* Nothing to delete */ + + adv_if_free(adv_if); + + if_leave_all_router(zvrf->rtadv.sock, ifp); + + if (adv_if_list_count(&zvrf->rtadv.adv_if) == 0) + rtadv_event(zvrf, RTADV_STOP, 0); + } + } else { + if (!zif->rtadv.AdvSendAdvertisements) { + zif->rtadv.AdvSendAdvertisements = 1; + zif->rtadv.AdvIntervalTimer = 0; + if ((zif->rtadv.MaxRtrAdvInterval >= 1000) + && zif->rtadv.UseFastRexmit) { + /* + * Enable Fast RA only when RA interval is in + * secs and Fast RA retransmit is enabled + */ + zif->rtadv.inFastRexmit = 1; + zif->rtadv.NumFastReXmitsRemain = + RTADV_NUM_FAST_REXMITS; + } + + rtadv_start_interface_events(zvrf, zif); + } + } +} + +/* + * Handle client (BGP) message to enable or disable IPv6 RA on an interface. + * Note that while the client could request RA on an interface on which the + * operator has not enabled RA, RA won't be disabled upon client request + * if the operator has explicitly enabled RA. The enable request can also + * specify a RA interval (in seconds). + */ +static void zebra_interface_radv_set(ZAPI_HANDLER_ARGS, int enable) +{ + struct stream *s; + ifindex_t ifindex; + struct interface *ifp; + struct zebra_if *zif; + uint32_t ra_interval; + + s = msg; + + /* Get interface index and RA interval. */ + STREAM_GETL(s, ifindex); + STREAM_GETL(s, ra_interval); + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s:%u: IF %u RA %s from client %s, interval %ums", + VRF_LOGNAME(vrf), zvrf_id(zvrf), ifindex, + enable ? "enable" : "disable", + zebra_route_string(client->proto), ra_interval); + } + + /* Locate interface and check VRF match. */ + ifp = if_lookup_by_index(ifindex, zvrf->vrf->vrf_id); + if (!ifp) { + struct vrf *vrf = zvrf->vrf; + + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s:%u: IF %u RA %s client %s - interface unknown", + VRF_LOGNAME(vrf), zvrf_id(zvrf), ifindex, + enable ? "enable" : "disable", + zebra_route_string(client->proto)); + return; + } + if (vrf_is_backend_netns() && ifp->vrf->vrf_id != zvrf_id(zvrf)) { + zlog_debug( + "%s:%u: IF %u RA %s client %s - VRF mismatch, IF VRF %u", + ifp->vrf->name, zvrf_id(zvrf), ifindex, + enable ? "enable" : "disable", + zebra_route_string(client->proto), ifp->vrf->vrf_id); + return; + } + + zif = ifp->info; + if (enable) { + if (!CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + interfaces_configured_for_ra_from_bgp++; + + SET_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED); + ipv6_nd_suppress_ra_set(ifp, RA_ENABLE); + if (ra_interval + && (ra_interval * 1000) < (unsigned int) zif->rtadv.MaxRtrAdvInterval + && !CHECK_FLAG(zif->rtadv.ra_configured, + VTY_RA_INTERVAL_CONFIGURED)) + zif->rtadv.MaxRtrAdvInterval = ra_interval * 1000; + } else { + if (CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + interfaces_configured_for_ra_from_bgp--; + + UNSET_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED); + if (!CHECK_FLAG(zif->rtadv.ra_configured, + VTY_RA_INTERVAL_CONFIGURED)) + zif->rtadv.MaxRtrAdvInterval = + RTADV_MAX_RTR_ADV_INTERVAL; + if (!CHECK_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED)) + ipv6_nd_suppress_ra_set(ifp, RA_SUPPRESS); + } +stream_failure: + return; +} + +/* + * send router lifetime value of zero in RAs on this interface since we're + * ceasing to advertise and want to let our neighbors know. + * RFC 4861 secion 6.2.5 + */ +void rtadv_stop_ra(struct interface *ifp) +{ + struct zebra_if *zif; + struct zebra_vrf *zvrf; + + zif = ifp->info; + zvrf = rtadv_interface_get_zvrf(ifp); + + if (zif->rtadv.AdvSendAdvertisements) + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS); +} + +/* + * Send router lifetime value of zero in RAs on all interfaces since we're + * ceasing to advertise globally and want to let all of our neighbors know + * RFC 4861 secion 6.2.5 + * + * Delete all ipv6 global prefixes added to the router advertisement prefix + * lists prior to ceasing. + */ +void rtadv_stop_ra_all(void) +{ + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *zif; + struct rtadv_prefix *rprefix; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + FOR_ALL_INTERFACES (vrf, ifp) { + zif = ifp->info; + + frr_each_safe (rtadv_prefixes, zif->rtadv.prefixes, + rprefix) + rtadv_prefix_reset(zif, rprefix); + + rtadv_stop_ra(ifp); + } +} + +void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS) +{ + zebra_interface_radv_set(client, hdr, msg, zvrf, 0); +} +void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS) +{ + zebra_interface_radv_set(client, hdr, msg, zvrf, 1); +} + +static void show_zvrf_rtadv_adv_if_helper(struct vty *vty, + struct adv_if_list_head *adv_if_head) +{ + struct adv_if *node = NULL; + + if (!adv_if_is_empty_internal(adv_if_head)) { + frr_each (adv_if_list, adv_if_head, node) { + vty_out(vty, " %s\n", node->name); + } + } + + vty_out(vty, "\n"); +} + +static void show_zvrf_rtadv_helper(struct vty *vty, struct zebra_vrf *zvrf) +{ + vty_out(vty, "VRF: %s\n", zvrf_name(zvrf)); + vty_out(vty, " Interfaces:\n"); + show_zvrf_rtadv_adv_if_helper(vty, &zvrf->rtadv.adv_if); + + vty_out(vty, " Interfaces(msec):\n"); + show_zvrf_rtadv_adv_if_helper(vty, &zvrf->rtadv.adv_msec_if); +} + +DEFPY(show_ipv6_nd_ra_if, show_ipv6_nd_ra_if_cmd, + "show ipv6 nd ra-interfaces [vrf]", + SHOW_STR IP6_STR + "Neighbor discovery\n" + "Route Advertisement Interfaces\n" VRF_FULL_CMD_HELP_STR) +{ + struct zebra_vrf *zvrf = NULL; + + if (!vrf_is_backend_netns() && (vrf_name || vrf_all)) { + vty_out(vty, + "%% VRF subcommand only applicable for netns-based vrfs.\n"); + return CMD_WARNING; + } + + if (vrf_all) { + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + struct zebra_vrf *zvrf; + + zvrf = vrf->info; + if (!zvrf) + continue; + + show_zvrf_rtadv_helper(vty, zvrf); + } + + return CMD_SUCCESS; + } + + if (vrf_name) + zvrf = zebra_vrf_lookup_by_name(vrf_name); + else + zvrf = zebra_vrf_lookup_by_name(VRF_DEFAULT_NAME); + + if (!zvrf) { + vty_out(vty, "%% VRF '%s' specified does not exist\n", + vrf_name); + return CMD_WARNING; + } + + show_zvrf_rtadv_helper(vty, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_fast_retrans, + ipv6_nd_ra_fast_retrans_cmd, + "ipv6 nd ra-fast-retrans", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Fast retransmit of RA packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.UseFastRexmit = true; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_fast_retrans, + no_ipv6_nd_ra_fast_retrans_cmd, + "no ipv6 nd ra-fast-retrans", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Fast retransmit of RA packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.UseFastRexmit = false; + + return CMD_SUCCESS; +} + +DEFPY (ipv6_nd_ra_hop_limit, + ipv6_nd_ra_hop_limit_cmd, + "ipv6 nd ra-hop-limit (0-255)$hopcount", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Hop Limit\n" + "Advertisement Hop Limit in hops (default:64)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvCurHopLimit = hopcount; + + return CMD_SUCCESS; +} + +DEFPY (no_ipv6_nd_ra_hop_limit, + no_ipv6_nd_ra_hop_limit_cmd, + "no ipv6 nd ra-hop-limit [(0-255)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Hop Limit\n" + "Advertisement Hop Limit in hops\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvCurHopLimit = RTADV_DEFAULT_HOPLIMIT; + + return CMD_SUCCESS; +} + +DEFPY (ipv6_nd_ra_retrans_interval, + ipv6_nd_ra_retrans_interval_cmd, + "ipv6 nd ra-retrans-interval (0-4294967295)$interval", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Retransmit Interval\n" + "Advertisement Retransmit Interval in msec\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on loopback interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvRetransTimer = interval; + + return CMD_SUCCESS; +} + +DEFPY (no_ipv6_nd_ra_retrans_interval, + no_ipv6_nd_ra_retrans_interval_cmd, + "no ipv6 nd ra-retrans-interval [(0-4294967295)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Retransmit Interval\n" + "Advertisement Retransmit Interval in msec\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot remove IPv6 Router Advertisements on loopback interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvRetransTimer = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_suppress_ra, + ipv6_nd_suppress_ra_cmd, + "ipv6 nd suppress-ra", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Suppress Router Advertisement\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + ipv6_nd_suppress_ra_set(ifp, RA_SUPPRESS); + + UNSET_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_suppress_ra, + no_ipv6_nd_suppress_ra_cmd, + "no ipv6 nd suppress-ra", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Suppress Router Advertisement\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ipv6_nd_suppress_ra_set(ifp, RA_ENABLE); + SET_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED); + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_interval_msec, + ipv6_nd_ra_interval_msec_cmd, + "ipv6 nd ra-interval msec (70-1800000)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router Advertisement interval\n" + "Router Advertisement interval in milliseconds\n" + "Router Advertisement interval in milliseconds\n") +{ + int idx_number = 4; + VTY_DECLVAR_CONTEXT(interface, ifp); + unsigned interval; + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf; + struct adv_if *adv_if; + + zvrf = rtadv_interface_get_zvrf(ifp); + + interval = strtoul(argv[idx_number]->arg, NULL, 10); + if ((zif->rtadv.AdvDefaultLifetime != -1 + && interval > (unsigned)zif->rtadv.AdvDefaultLifetime * 1000)) { + vty_out(vty, + "This ra-interval would conflict with configured ra-lifetime!\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (zif->rtadv.MaxRtrAdvInterval % 1000) { + adv_if = adv_msec_if_del(zvrf, ifp->name); + if (adv_if != NULL) + adv_if_free(adv_if); + } + + if (interval % 1000) + (void)adv_msec_if_add(zvrf, ifp->name); + + SET_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED); + zif->rtadv.MaxRtrAdvInterval = interval; + zif->rtadv.MinRtrAdvInterval = 0.33 * interval; + zif->rtadv.AdvIntervalTimer = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_interval, + ipv6_nd_ra_interval_cmd, + "ipv6 nd ra-interval (1-1800)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router Advertisement interval\n" + "Router Advertisement interval in seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + unsigned interval; + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf; + struct adv_if *adv_if; + + zvrf = rtadv_interface_get_zvrf(ifp); + + interval = strtoul(argv[idx_number]->arg, NULL, 10); + if ((zif->rtadv.AdvDefaultLifetime != -1 + && interval > (unsigned)zif->rtadv.AdvDefaultLifetime)) { + vty_out(vty, + "This ra-interval would conflict with configured ra-lifetime!\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (zif->rtadv.MaxRtrAdvInterval % 1000) { + adv_if = adv_msec_if_del(zvrf, ifp->name); + if (adv_if != NULL) + adv_if_free(adv_if); + } + + /* convert to milliseconds */ + interval = interval * 1000; + + SET_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED); + zif->rtadv.MaxRtrAdvInterval = interval; + zif->rtadv.MinRtrAdvInterval = 0.33 * interval; + zif->rtadv.AdvIntervalTimer = 0; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_interval, + no_ipv6_nd_ra_interval_cmd, + "no ipv6 nd ra-interval [<(1-1800)|msec (1-1800000)>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router Advertisement interval\n" + "Router Advertisement interval in seconds\n" + "Specify millisecond router advertisement interval\n" + "Router Advertisement interval in milliseconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf = NULL; + struct adv_if *adv_if; + + zvrf = rtadv_interface_get_zvrf(ifp); + + if (zif->rtadv.MaxRtrAdvInterval % 1000) { + adv_if = adv_msec_if_del(zvrf, ifp->name); + if (adv_if != NULL) + adv_if_free(adv_if); + } + + UNSET_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED); + + if (CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + zif->rtadv.MaxRtrAdvInterval = 10000; + else + zif->rtadv.MaxRtrAdvInterval = RTADV_MAX_RTR_ADV_INTERVAL; + + zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval; + zif->rtadv.MinRtrAdvInterval = RTADV_MIN_RTR_ADV_INTERVAL; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_lifetime, + ipv6_nd_ra_lifetime_cmd, + "ipv6 nd ra-lifetime (0-9000)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router lifetime\n" + "Router lifetime in seconds (0 stands for a non-default gw)\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + int lifetime; + + lifetime = strtoul(argv[idx_number]->arg, NULL, 10); + + /* The value to be placed in the Router Lifetime field + * of Router Advertisements sent from the interface, + * in seconds. MUST be either zero or between + * MaxRtrAdvInterval and 9000 seconds. -- RFC4861, 6.2.1 */ + if ((lifetime != 0 && lifetime * 1000 < zif->rtadv.MaxRtrAdvInterval)) { + vty_out(vty, + "This ra-lifetime would conflict with configured ra-interval\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvDefaultLifetime = lifetime; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_lifetime, + no_ipv6_nd_ra_lifetime_cmd, + "no ipv6 nd ra-lifetime [(0-9000)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router lifetime\n" + "Router lifetime in seconds (0 stands for a non-default gw)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvDefaultLifetime = -1; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_reachable_time, + ipv6_nd_reachable_time_cmd, + "ipv6 nd reachable-time (1-3600000)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Reachable time\n" + "Reachable time in milliseconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.AdvReachableTime = strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_reachable_time, + no_ipv6_nd_reachable_time_cmd, + "no ipv6 nd reachable-time [(1-3600000)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Reachable time\n" + "Reachable time in milliseconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvReachableTime = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_homeagent_preference, + ipv6_nd_homeagent_preference_cmd, + "ipv6 nd home-agent-preference (0-65535)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent preference\n" + "preference value (default is 0, least preferred)\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.HomeAgentPreference = + strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_homeagent_preference, + no_ipv6_nd_homeagent_preference_cmd, + "no ipv6 nd home-agent-preference [(0-65535)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent preference\n" + "preference value (default is 0, least preferred)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.HomeAgentPreference = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_homeagent_lifetime, + ipv6_nd_homeagent_lifetime_cmd, + "ipv6 nd home-agent-lifetime (0-65520)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent lifetime\n" + "Home Agent lifetime in seconds (0 to track ra-lifetime)\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.HomeAgentLifetime = strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_homeagent_lifetime, + no_ipv6_nd_homeagent_lifetime_cmd, + "no ipv6 nd home-agent-lifetime [(0-65520)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent lifetime\n" + "Home Agent lifetime in seconds (0 to track ra-lifetime)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.HomeAgentLifetime = -1; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_managed_config_flag, + ipv6_nd_managed_config_flag_cmd, + "ipv6 nd managed-config-flag", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Managed address configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvManagedFlag = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_managed_config_flag, + no_ipv6_nd_managed_config_flag_cmd, + "no ipv6 nd managed-config-flag", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Managed address configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvManagedFlag = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_homeagent_config_flag, + ipv6_nd_homeagent_config_flag_cmd, + "ipv6 nd home-agent-config-flag", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvHomeAgentFlag = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_homeagent_config_flag, + no_ipv6_nd_homeagent_config_flag_cmd, + "no ipv6 nd home-agent-config-flag", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvHomeAgentFlag = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_adv_interval_config_option, + ipv6_nd_adv_interval_config_option_cmd, + "ipv6 nd adv-interval-option", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Interval Option\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvIntervalOption = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_adv_interval_config_option, + no_ipv6_nd_adv_interval_config_option_cmd, + "no ipv6 nd adv-interval-option", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Interval Option\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvIntervalOption = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_other_config_flag, + ipv6_nd_other_config_flag_cmd, + "ipv6 nd other-config-flag", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Other statefull configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvOtherConfigFlag = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_other_config_flag, + no_ipv6_nd_other_config_flag_cmd, + "no ipv6 nd other-config-flag", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Other statefull configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvOtherConfigFlag = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_prefix, + ipv6_nd_prefix_cmd, + "ipv6 nd prefix X:X::X:X/M [<(0-4294967295)|infinite> <(0-4294967295)|infinite>] []", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Prefix information\n" + "IPv6 prefix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n" + "Preferred lifetime in seconds\n" + "Infinite preferred lifetime\n" + "Set Router Address flag\n" + "Do not use prefix for onlink determination\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for onlink determination\n") +{ + /* prelude */ + char *prefix = argv[3]->arg; + int lifetimes = (argc > 4) && (argv[4]->type == RANGE_TKN + || strmatch(argv[4]->text, "infinite")); + int routeropts = lifetimes ? argc > 6 : argc > 4; + + int idx_routeropts = routeropts ? (lifetimes ? 6 : 4) : 0; + + char *lifetime = NULL, *preflifetime = NULL; + int routeraddr = 0, offlink = 0, noautoconf = 0; + if (lifetimes) { + lifetime = argv[4]->type == RANGE_TKN ? argv[4]->arg + : argv[4]->text; + preflifetime = argv[5]->type == RANGE_TKN ? argv[5]->arg + : argv[5]->text; + } + if (routeropts) { + routeraddr = + strmatch(argv[idx_routeropts]->text, "router-address"); + if (!routeraddr) { + offlink = (argc > idx_routeropts + 1 + || strmatch(argv[idx_routeropts]->text, + "off-link")); + noautoconf = (argc > idx_routeropts + 1 + || strmatch(argv[idx_routeropts]->text, + "no-autoconfig")); + } + } + + /* business */ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zebra_if = ifp->info; + int ret; + struct rtadv_prefix rp; + + ret = str2prefix_ipv6(prefix, &rp.prefix); + if (!ret) { + vty_out(vty, "Malformed IPv6 prefix\n"); + return CMD_WARNING_CONFIG_FAILED; + } + apply_mask_ipv6(&rp.prefix); /* RFC4861 4.6.2 */ + rp.AdvOnLinkFlag = !offlink; + rp.AdvAutonomousFlag = !noautoconf; + rp.AdvRouterAddressFlag = routeraddr; + rp.AdvValidLifetime = RTADV_VALID_LIFETIME; + rp.AdvPreferredLifetime = RTADV_PREFERRED_LIFETIME; + rp.AdvPrefixCreate = PREFIX_SRC_MANUAL; + + if (lifetimes) { + rp.AdvValidLifetime = strmatch(lifetime, "infinite") + ? UINT32_MAX + : strtoll(lifetime, NULL, 10); + rp.AdvPreferredLifetime = + strmatch(preflifetime, "infinite") + ? UINT32_MAX + : strtoll(preflifetime, NULL, 10); + if (rp.AdvPreferredLifetime > rp.AdvValidLifetime) { + vty_out(vty, "Invalid preferred lifetime\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + rtadv_prefix_set(zebra_if, &rp); + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_prefix, + no_ipv6_nd_prefix_cmd, + "no ipv6 nd prefix X:X::X:X/M [<(0-4294967295)|infinite> <(0-4294967295)|infinite>] []", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Prefix information\n" + "IPv6 prefix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n" + "Preferred lifetime in seconds\n" + "Infinite preferred lifetime\n" + "Set Router Address flag\n" + "Do not use prefix for onlink determination\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for onlink determination\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zebra_if = ifp->info; + int ret; + struct rtadv_prefix rp; + char *prefix = argv[4]->arg; + + ret = str2prefix_ipv6(prefix, &rp.prefix); + if (!ret) { + vty_out(vty, "Malformed IPv6 prefix\n"); + return CMD_WARNING_CONFIG_FAILED; + } + apply_mask_ipv6(&rp.prefix); /* RFC4861 4.6.2 */ + rp.AdvPrefixCreate = PREFIX_SRC_MANUAL; + + ret = rtadv_prefix_reset(zebra_if, &rp); + if (!ret) { + vty_out(vty, "Non-existant IPv6 prefix\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_router_preference, + ipv6_nd_router_preference_cmd, + "ipv6 nd router-preference ", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Default router preference\n" + "High default router preference\n" + "Medium default router preference (default)\n" + "Low default router preference\n") +{ + int idx_high_medium_low = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + int i = 0; + + while (0 != rtadv_pref_strs[i]) { + if (strncmp(argv[idx_high_medium_low]->arg, rtadv_pref_strs[i], + 1) + == 0) { + zif->rtadv.DefaultPreference = i; + return CMD_SUCCESS; + } + i++; + } + + return CMD_ERR_NO_MATCH; +} + +DEFUN (no_ipv6_nd_router_preference, + no_ipv6_nd_router_preference_cmd, + "no ipv6 nd router-preference []", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Default router preference\n" + "High default router preference\n" + "Medium default router preference (default)\n" + "Low default router preference\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.DefaultPreference = + RTADV_PREF_MEDIUM; /* Default per RFC4191. */ + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_mtu, + ipv6_nd_mtu_cmd, + "ipv6 nd mtu (1-65535)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertised MTU\n" + "MTU in bytes\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.AdvLinkMTU = strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_mtu, + no_ipv6_nd_mtu_cmd, + "no ipv6 nd mtu [(1-65535)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertised MTU\n" + "MTU in bytes\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.AdvLinkMTU = 0; + return CMD_SUCCESS; +} + +static struct rtadv_rdnss *rtadv_rdnss_new(void) +{ + return XCALLOC(MTYPE_RTADV_RDNSS, sizeof(struct rtadv_rdnss)); +} + +static void rtadv_rdnss_free(struct rtadv_rdnss *rdnss) +{ + XFREE(MTYPE_RTADV_RDNSS, rdnss); +} + +static struct rtadv_rdnss *rtadv_rdnss_lookup(struct list *list, + struct rtadv_rdnss *rdnss) +{ + struct listnode *node; + struct rtadv_rdnss *p; + + for (ALL_LIST_ELEMENTS_RO(list, node, p)) + if (IPV6_ADDR_SAME(&p->addr, &rdnss->addr)) + return p; + return NULL; +} + +static struct rtadv_rdnss *rtadv_rdnss_get(struct list *list, + struct rtadv_rdnss *rdnss) +{ + struct rtadv_rdnss *p; + + p = rtadv_rdnss_lookup(list, rdnss); + if (p) + return p; + + p = rtadv_rdnss_new(); + memcpy(p, rdnss, sizeof(struct rtadv_rdnss)); + listnode_add(list, p); + + return p; +} + +static void rtadv_rdnss_set(struct zebra_if *zif, struct rtadv_rdnss *rdnss) +{ + struct rtadv_rdnss *p; + + p = rtadv_rdnss_get(zif->rtadv.AdvRDNSSList, rdnss); + p->lifetime = rdnss->lifetime; + p->lifetime_set = rdnss->lifetime_set; +} + +static int rtadv_rdnss_reset(struct zebra_if *zif, struct rtadv_rdnss *rdnss) +{ + struct rtadv_rdnss *p; + + p = rtadv_rdnss_lookup(zif->rtadv.AdvRDNSSList, rdnss); + if (p) { + listnode_delete(zif->rtadv.AdvRDNSSList, p); + rtadv_rdnss_free(p); + return 1; + } + + return 0; +} + +static struct rtadv_dnssl *rtadv_dnssl_new(void) +{ + return XCALLOC(MTYPE_RTADV_DNSSL, sizeof(struct rtadv_dnssl)); +} + +static void rtadv_dnssl_free(struct rtadv_dnssl *dnssl) +{ + XFREE(MTYPE_RTADV_DNSSL, dnssl); +} + +static struct rtadv_dnssl *rtadv_dnssl_lookup(struct list *list, + struct rtadv_dnssl *dnssl) +{ + struct listnode *node; + struct rtadv_dnssl *p; + + for (ALL_LIST_ELEMENTS_RO(list, node, p)) + if (!strcasecmp(p->name, dnssl->name)) + return p; + return NULL; +} + +static struct rtadv_dnssl *rtadv_dnssl_get(struct list *list, + struct rtadv_dnssl *dnssl) +{ + struct rtadv_dnssl *p; + + p = rtadv_dnssl_lookup(list, dnssl); + if (p) + return p; + + p = rtadv_dnssl_new(); + memcpy(p, dnssl, sizeof(struct rtadv_dnssl)); + listnode_add(list, p); + + return p; +} + +static void rtadv_dnssl_set(struct zebra_if *zif, struct rtadv_dnssl *dnssl) +{ + struct rtadv_dnssl *p; + + p = rtadv_dnssl_get(zif->rtadv.AdvDNSSLList, dnssl); + memcpy(p, dnssl, sizeof(struct rtadv_dnssl)); +} + +static int rtadv_dnssl_reset(struct zebra_if *zif, struct rtadv_dnssl *dnssl) +{ + struct rtadv_dnssl *p; + + p = rtadv_dnssl_lookup(zif->rtadv.AdvDNSSLList, dnssl); + if (p) { + listnode_delete(zif->rtadv.AdvDNSSLList, p); + rtadv_dnssl_free(p); + return 1; + } + + return 0; +} + +/* + * Convert dotted domain name (with or without trailing root zone dot) to + * sequence of length-prefixed labels, as described in [RFC1035 3.1]. Write up + * to strlen(in) + 2 octets to out. + * + * Returns the number of octets written to out or -1 if in does not constitute + * a valid domain name. + */ +static int rtadv_dnssl_encode(uint8_t *out, const char *in) +{ + const char *label_start, *label_end; + size_t outp; + + outp = 0; + label_start = in; + + while (*label_start) { + size_t label_len; + + label_end = strchr(label_start, '.'); + if (label_end == NULL) + label_end = label_start + strlen(label_start); + + label_len = label_end - label_start; + if (label_len >= 64) + return -1; /* labels must be 63 octets or less */ + + out[outp++] = (uint8_t)label_len; + memcpy(out + outp, label_start, label_len); + outp += label_len; + label_start += label_len; + if (*label_start == '.') + label_start++; + } + + out[outp++] = '\0'; + return outp; +} + +DEFUN(ipv6_nd_rdnss, + ipv6_nd_rdnss_cmd, + "ipv6 nd rdnss X:X::X:X [<(0-4294967295)|infinite>]", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Recursive DNS server information\n" + "IPv6 address\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_rdnss rdnss = {}; + + if (inet_pton(AF_INET6, argv[3]->arg, &rdnss.addr) != 1) { + vty_out(vty, "Malformed IPv6 address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (argc > 4) { + char *lifetime = argv[4]->type == RANGE_TKN ? argv[4]->arg + : argv[4]->text; + rdnss.lifetime = strmatch(lifetime, "infinite") + ? UINT32_MAX + : strtoll(lifetime, NULL, 10); + rdnss.lifetime_set = 1; + } + + rtadv_rdnss_set(zif, &rdnss); + + return CMD_SUCCESS; +} + +DEFUN(no_ipv6_nd_rdnss, + no_ipv6_nd_rdnss_cmd, + "no ipv6 nd rdnss X:X::X:X [<(0-4294967295)|infinite>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Recursive DNS server information\n" + "IPv6 address\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_rdnss rdnss = {}; + + if (inet_pton(AF_INET6, argv[4]->arg, &rdnss.addr) != 1) { + vty_out(vty, "Malformed IPv6 address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (rtadv_rdnss_reset(zif, &rdnss) != 1) { + vty_out(vty, "Non-existant RDNSS address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN(ipv6_nd_dnssl, + ipv6_nd_dnssl_cmd, + "ipv6 nd dnssl SUFFIX [<(0-4294967295)|infinite>]", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "DNS search list information\n" + "Domain name suffix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_dnssl dnssl = {}; + size_t len; + int ret; + + len = strlcpy(dnssl.name, argv[3]->arg, sizeof(dnssl.name)); + if (len == 0 || len >= sizeof(dnssl.name)) { + vty_out(vty, "Malformed DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (dnssl.name[len - 1] == '.') { + /* + * Allow, but don't require, a trailing dot signifying the root + * zone. Canonicalize by cutting it off if present. + */ + dnssl.name[len - 1] = '\0'; + len--; + } + if (argc > 4) { + char *lifetime = argv[4]->type == RANGE_TKN ? argv[4]->arg + : argv[4]->text; + dnssl.lifetime = strmatch(lifetime, "infinite") + ? UINT32_MAX + : strtoll(lifetime, NULL, 10); + dnssl.lifetime_set = 1; + } + + ret = rtadv_dnssl_encode(dnssl.encoded_name, dnssl.name); + if (ret < 0) { + vty_out(vty, "Malformed DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + dnssl.encoded_len = ret; + rtadv_dnssl_set(zif, &dnssl); + + return CMD_SUCCESS; +} + +DEFUN(no_ipv6_nd_dnssl, + no_ipv6_nd_dnssl_cmd, + "no ipv6 nd dnssl SUFFIX [<(0-4294967295)|infinite>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "DNS search list information\n" + "Domain name suffix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_dnssl dnssl = {}; + size_t len; + + len = strlcpy(dnssl.name, argv[4]->arg, sizeof(dnssl.name)); + if (len == 0 || len >= sizeof(dnssl.name)) { + vty_out(vty, "Malformed DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (dnssl.name[len - 1] == '.') { + dnssl.name[len - 1] = '\0'; + len--; + } + if (rtadv_dnssl_reset(zif, &dnssl) != 1) { + vty_out(vty, "Non-existant DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + + +/* Dump interface ND information to vty. */ +static int nd_dump_vty(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zif; + struct rtadvconf *rtadv; + int interval; + + zif = (struct zebra_if *)ifp->info; + rtadv = &zif->rtadv; + + if (rtadv->AdvSendAdvertisements) { + vty_out(vty, + " ND advertised reachable time is %d milliseconds\n", + rtadv->AdvReachableTime); + vty_out(vty, + " ND advertised retransmit interval is %u milliseconds\n", + rtadv->AdvRetransTimer); + vty_out(vty, " ND advertised hop-count limit is %d hops\n", + rtadv->AdvCurHopLimit); + vty_out(vty, " ND router advertisements sent: %d rcvd: %d\n", + zif->ra_sent, zif->ra_rcvd); + interval = rtadv->MaxRtrAdvInterval; + if (interval % 1000) + vty_out(vty, + " ND router advertisements are sent every %d milliseconds\n", + interval); + else + vty_out(vty, + " ND router advertisements are sent every %d seconds\n", + interval / 1000); + if (!rtadv->UseFastRexmit) + vty_out(vty, + " ND router advertisements do not use fast retransmit\n"); + + if (rtadv->AdvDefaultLifetime != -1) + vty_out(vty, + " ND router advertisements live for %d seconds\n", + rtadv->AdvDefaultLifetime); + else + vty_out(vty, + " ND router advertisements lifetime tracks ra-interval\n"); + vty_out(vty, + " ND router advertisement default router preference is %s\n", + rtadv_pref_strs[rtadv->DefaultPreference]); + if (rtadv->AdvManagedFlag) + vty_out(vty, + " Hosts use DHCP to obtain routable addresses.\n"); + else + vty_out(vty, + " Hosts use stateless autoconfig for addresses.\n"); + if (rtadv->AdvHomeAgentFlag) { + vty_out(vty, + " ND router advertisements with Home Agent flag bit set.\n"); + if (rtadv->HomeAgentLifetime != -1) + vty_out(vty, + " Home Agent lifetime is %u seconds\n", + rtadv->HomeAgentLifetime); + else + vty_out(vty, + " Home Agent lifetime tracks ra-lifetime\n"); + vty_out(vty, " Home Agent preference is %u\n", + rtadv->HomeAgentPreference); + } + if (rtadv->AdvIntervalOption) + vty_out(vty, + " ND router advertisements with Adv. Interval option.\n"); + } + return 0; +} + + +/* Write configuration about router advertisement. */ +static int rtadv_config_write(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zif; + struct listnode *node; + struct rtadv_prefix *rprefix; + struct rtadv_rdnss *rdnss; + struct rtadv_dnssl *dnssl; + int interval; + + zif = ifp->info; + + if (!if_is_loopback(ifp)) { + if (zif->rtadv.AdvSendAdvertisements + && CHECK_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED)) + vty_out(vty, " no ipv6 nd suppress-ra\n"); + } + + interval = zif->rtadv.MaxRtrAdvInterval; + if (CHECK_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED)) { + if (interval % 1000) + vty_out(vty, " ipv6 nd ra-interval msec %d\n", + interval); + else if (interval != RTADV_MAX_RTR_ADV_INTERVAL) + vty_out(vty, " ipv6 nd ra-interval %d\n", + interval / 1000); + } + + if (zif->rtadv.AdvIntervalOption) + vty_out(vty, " ipv6 nd adv-interval-option\n"); + + if (!zif->rtadv.UseFastRexmit) + vty_out(vty, " no ipv6 nd ra-fast-retrans\n"); + + if (zif->rtadv.AdvRetransTimer != 0) + vty_out(vty, " ipv6 nd ra-retrans-interval %u\n", + zif->rtadv.AdvRetransTimer); + + if (zif->rtadv.AdvCurHopLimit != RTADV_DEFAULT_HOPLIMIT) + vty_out(vty, " ipv6 nd ra-hop-limit %d\n", + zif->rtadv.AdvCurHopLimit); + + if (zif->rtadv.AdvDefaultLifetime != -1) + vty_out(vty, " ipv6 nd ra-lifetime %d\n", + zif->rtadv.AdvDefaultLifetime); + + if (zif->rtadv.HomeAgentPreference) + vty_out(vty, " ipv6 nd home-agent-preference %u\n", + zif->rtadv.HomeAgentPreference); + + if (zif->rtadv.HomeAgentLifetime != -1) + vty_out(vty, " ipv6 nd home-agent-lifetime %u\n", + zif->rtadv.HomeAgentLifetime); + + if (zif->rtadv.AdvHomeAgentFlag) + vty_out(vty, " ipv6 nd home-agent-config-flag\n"); + + if (zif->rtadv.AdvReachableTime) + vty_out(vty, " ipv6 nd reachable-time %d\n", + zif->rtadv.AdvReachableTime); + + if (zif->rtadv.AdvManagedFlag) + vty_out(vty, " ipv6 nd managed-config-flag\n"); + + if (zif->rtadv.AdvOtherConfigFlag) + vty_out(vty, " ipv6 nd other-config-flag\n"); + + if (zif->rtadv.DefaultPreference != RTADV_PREF_MEDIUM) + vty_out(vty, " ipv6 nd router-preference %s\n", + rtadv_pref_strs[zif->rtadv.DefaultPreference]); + + if (zif->rtadv.AdvLinkMTU) + vty_out(vty, " ipv6 nd mtu %d\n", zif->rtadv.AdvLinkMTU); + + frr_each (rtadv_prefixes, zif->rtadv.prefixes, rprefix) { + if ((rprefix->AdvPrefixCreate == PREFIX_SRC_MANUAL) + || (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH)) { + vty_out(vty, " ipv6 nd prefix %pFX", &rprefix->prefix); + if ((rprefix->AdvValidLifetime != RTADV_VALID_LIFETIME) + || (rprefix->AdvPreferredLifetime + != RTADV_PREFERRED_LIFETIME)) { + if (rprefix->AdvValidLifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", + rprefix->AdvValidLifetime); + if (rprefix->AdvPreferredLifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", + rprefix->AdvPreferredLifetime); + } + if (!rprefix->AdvOnLinkFlag) + vty_out(vty, " off-link"); + if (!rprefix->AdvAutonomousFlag) + vty_out(vty, " no-autoconfig"); + if (rprefix->AdvRouterAddressFlag) + vty_out(vty, " router-address"); + vty_out(vty, "\n"); + } + } + + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvRDNSSList, node, rdnss)) { + char buf[INET6_ADDRSTRLEN]; + + vty_out(vty, " ipv6 nd rdnss %s", + inet_ntop(AF_INET6, &rdnss->addr, buf, sizeof(buf))); + if (rdnss->lifetime_set) { + if (rdnss->lifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", rdnss->lifetime); + } + vty_out(vty, "\n"); + } + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvDNSSLList, node, dnssl)) { + vty_out(vty, " ipv6 nd dnssl %s", dnssl->name); + if (dnssl->lifetime_set) { + if (dnssl->lifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", dnssl->lifetime); + } + vty_out(vty, "\n"); + } + return 0; +} + + +static void rtadv_event(struct zebra_vrf *zvrf, enum rtadv_event event, int val) +{ + struct rtadv *rtadv; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s(%s) with event: %d and val: %d", __func__, + VRF_LOGNAME(vrf), event, val); + } + + rtadv = &zvrf->rtadv; + + switch (event) { + case RTADV_START: + event_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock, + &rtadv->ra_read); + event_add_event(zrouter.master, rtadv_timer, zvrf, 0, + &rtadv->ra_timer); + break; + case RTADV_STOP: + EVENT_OFF(rtadv->ra_timer); + EVENT_OFF(rtadv->ra_read); + break; + case RTADV_TIMER: + event_add_timer(zrouter.master, rtadv_timer, zvrf, val, + &rtadv->ra_timer); + break; + case RTADV_TIMER_MSEC: + event_add_timer_msec(zrouter.master, rtadv_timer, zvrf, val, + &rtadv->ra_timer); + break; + case RTADV_READ: + event_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock, + &rtadv->ra_read); + break; + default: + break; + } + return; +} + +void rtadv_if_up(struct zebra_if *zif) +{ + struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(zif->ifp); + + /* Enable fast tx of RA if enabled && RA interval is not in msecs */ + if (zif->rtadv.AdvSendAdvertisements && + (zif->rtadv.MaxRtrAdvInterval >= 1000) && + zif->rtadv.UseFastRexmit) { + zif->rtadv.inFastRexmit = 1; + zif->rtadv.NumFastReXmitsRemain = RTADV_NUM_FAST_REXMITS; + } + + /* + * startup the state machine, if it hasn't been already + * due to a delayed ifindex on startup ordering + */ + if (zif->rtadv.AdvSendAdvertisements) + rtadv_start_interface_events(zvrf, zif); +} + +void rtadv_if_init(struct zebra_if *zif) +{ + /* Set default router advertise values. */ + struct rtadvconf *rtadv; + + rtadv = &zif->rtadv; + + rtadv->AdvSendAdvertisements = 0; + rtadv->MaxRtrAdvInterval = RTADV_MAX_RTR_ADV_INTERVAL; + rtadv->MinRtrAdvInterval = RTADV_MIN_RTR_ADV_INTERVAL; + rtadv->AdvIntervalTimer = 0; + rtadv->AdvManagedFlag = 0; + rtadv->AdvOtherConfigFlag = 0; + rtadv->AdvHomeAgentFlag = 0; + rtadv->AdvLinkMTU = 0; + rtadv->AdvReachableTime = 0; + rtadv->AdvRetransTimer = 0; + rtadv->AdvCurHopLimit = RTADV_DEFAULT_HOPLIMIT; + memset(&rtadv->lastadvcurhoplimit, 0, + sizeof(rtadv->lastadvcurhoplimit)); + memset(&rtadv->lastadvmanagedflag, 0, + sizeof(rtadv->lastadvmanagedflag)); + memset(&rtadv->lastadvotherconfigflag, 0, + sizeof(rtadv->lastadvotherconfigflag)); + memset(&rtadv->lastadvreachabletime, 0, + sizeof(rtadv->lastadvreachabletime)); + memset(&rtadv->lastadvretranstimer, 0, + sizeof(rtadv->lastadvretranstimer)); + rtadv->AdvDefaultLifetime = -1; /* derive from MaxRtrAdvInterval */ + rtadv->HomeAgentPreference = 0; + rtadv->HomeAgentLifetime = -1; /* derive from AdvDefaultLifetime */ + rtadv->AdvIntervalOption = 0; + rtadv->UseFastRexmit = true; + rtadv->DefaultPreference = RTADV_PREF_MEDIUM; + + rtadv_prefixes_init(rtadv->prefixes); + + rtadv->AdvRDNSSList = list_new(); + rtadv->AdvDNSSLList = list_new(); +} + +void rtadv_if_fini(struct zebra_if *zif) +{ + struct rtadvconf *rtadv; + struct rtadv_prefix *rp; + + rtadv = &zif->rtadv; + + while ((rp = rtadv_prefixes_pop(rtadv->prefixes))) + rtadv_prefix_free(rp); + + list_delete(&rtadv->AdvRDNSSList); + list_delete(&rtadv->AdvDNSSLList); +} + +void rtadv_vrf_init(struct zebra_vrf *zvrf) +{ + if (!vrf_is_backend_netns() && (zvrf_id(zvrf) != VRF_DEFAULT)) + return; + + zvrf->rtadv.sock = rtadv_make_socket(zvrf->zns->ns_id); +} + +void rtadv_vrf_terminate(struct zebra_vrf *zvrf) +{ + if (!vrf_is_backend_netns() && (zvrf_id(zvrf) != VRF_DEFAULT)) + return; + + rtadv_event(zvrf, RTADV_STOP, 0); + if (zvrf->rtadv.sock >= 0) { + close(zvrf->rtadv.sock); + zvrf->rtadv.sock = -1; + } + + adv_if_clean(zvrf); + adv_msec_if_clean(zvrf); +} + +void rtadv_cmd_init(void) +{ + interfaces_configured_for_ra_from_bgp = 0; + + hook_register(zebra_if_extra_info, nd_dump_vty); + hook_register(zebra_if_config_wr, rtadv_config_write); + + install_element(VIEW_NODE, &show_ipv6_nd_ra_if_cmd); + + install_element(INTERFACE_NODE, &ipv6_nd_ra_fast_retrans_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_fast_retrans_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_retrans_interval_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_retrans_interval_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_hop_limit_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_hop_limit_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_suppress_ra_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_suppress_ra_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_interval_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_interval_msec_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_interval_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_lifetime_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_lifetime_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_reachable_time_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_reachable_time_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_managed_config_flag_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_managed_config_flag_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_other_config_flag_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_other_config_flag_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_homeagent_config_flag_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_homeagent_config_flag_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_homeagent_preference_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_homeagent_preference_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_homeagent_lifetime_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_homeagent_lifetime_cmd); + install_element(INTERFACE_NODE, + &ipv6_nd_adv_interval_config_option_cmd); + install_element(INTERFACE_NODE, + &no_ipv6_nd_adv_interval_config_option_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_prefix_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_prefix_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_router_preference_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_router_preference_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_mtu_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_mtu_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_rdnss_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_rdnss_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_dnssl_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_dnssl_cmd); +} + +static int if_join_all_router(int sock, struct interface *ifp) +{ + int ret; + + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr); + mreq.ipv6mr_interface = ifp->ifindex; + + ret = setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, (char *)&mreq, + sizeof(mreq)); + if (ret < 0) + flog_err_sys(EC_LIB_SOCKET, + "%s(%u): Failed to join group, socket %u error %s", + ifp->name, ifp->ifindex, sock, + safe_strerror(errno)); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s(%s:%u): Join All-Routers multicast group, socket %u", + ifp->name, ifp->vrf->name, ifp->ifindex, sock); + + return 0; +} + +static int if_leave_all_router(int sock, struct interface *ifp) +{ + int ret; + + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr); + mreq.ipv6mr_interface = ifp->ifindex; + + ret = setsockopt(sock, IPPROTO_IPV6, IPV6_LEAVE_GROUP, (char *)&mreq, + sizeof(mreq)); + if (ret < 0) + flog_err_sys( + EC_LIB_SOCKET, + "%s(%s:%u): Failed to leave group, socket %u error %s", + ifp->name, ifp->vrf->name, ifp->ifindex, sock, + safe_strerror(errno)); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s(%s:%u): Leave All-Routers multicast group, socket %u", + ifp->name, ifp->vrf->name, ifp->ifindex, sock); + + return 0; +} + +bool rtadv_compiled_in(void) +{ + return true; +} + +#else /* !HAVE_RTADV */ +/* + * If the end user does not have RADV enabled we should + * handle this better + */ +void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Received %s command, but ZEBRA is not compiled with Router Advertisements on", + zserv_command_string(hdr->command)); + + return; +} + +void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Received %s command, but ZEBRA is not compiled with Router Advertisements on", + zserv_command_string(hdr->command)); + + return; +} + +bool rtadv_compiled_in(void) +{ + return false; +} + +#endif /* HAVE_RTADV */ + +uint32_t rtadv_get_interfaces_configured_from_bgp(void) +{ + return interfaces_configured_for_ra_from_bgp; +} diff --git a/zebra/rtadv.h b/zebra/rtadv.h new file mode 100644 index 0000000..1ec376a --- /dev/null +++ b/zebra/rtadv.h @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Router advertisement + * Copyright (C) 2005 6WIND + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#ifndef _ZEBRA_RTADV_H +#define _ZEBRA_RTADV_H + +#include "zebra.h" +#include "vty.h" +#include "typesafe.h" + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct interface; +struct zebra_if; + +#if defined(HAVE_RTADV) + +PREDECL_SORTLIST_UNIQ(adv_if_list); +/* Structure which hold status of router advertisement. */ +struct rtadv { + int sock; + + struct adv_if_list_head adv_if; + struct adv_if_list_head adv_msec_if; + + struct event *ra_read; + struct event *ra_timer; +}; + +PREDECL_RBTREE_UNIQ(rtadv_prefixes); + +/* Router advertisement parameter. From RFC4861, RFC6275 and RFC4191. */ +struct rtadvconf { + /* A flag indicating whether or not the router sends periodic Router + Advertisements and responds to Router Solicitations. + Default: false */ + int AdvSendAdvertisements; + + /* The maximum time allowed between sending unsolicited multicast + Router Advertisements from the interface, in milliseconds. + MUST be no less than 70 ms [RFC6275 7.5] and no greater + than 1800000 ms [RFC4861 6.2.1]. + + Default: 600000 milliseconds */ + int MaxRtrAdvInterval; +#define RTADV_MAX_RTR_ADV_INTERVAL 600000 + + /* The minimum time allowed between sending unsolicited multicast + Router Advertisements from the interface, in milliseconds. + MUST be no less than 30 ms [RFC6275 7.5]. + MUST be no greater than .75 * MaxRtrAdvInterval. + + Default: 0.33 * MaxRtrAdvInterval */ + int MinRtrAdvInterval; /* This field is currently unused. */ +#define RTADV_MIN_RTR_ADV_INTERVAL (0.33 * RTADV_MAX_RTR_ADV_INTERVAL) + + /* Unsolicited Router Advertisements' interval timer. */ + int AdvIntervalTimer; + + /* The true/false value to be placed in the "Managed address + configuration" flag field in the Router Advertisement. See + [ADDRCONF]. + + Default: false */ + int AdvManagedFlag; + struct timeval lastadvmanagedflag; + + + /* The true/false value to be placed in the "Other stateful + configuration" flag field in the Router Advertisement. See + [ADDRCONF]. + + Default: false */ + int AdvOtherConfigFlag; + struct timeval lastadvotherconfigflag; + + /* The value to be placed in MTU options sent by the router. A + value of zero indicates that no MTU options are sent. + + Default: 0 */ + int AdvLinkMTU; + + + /* The value to be placed in the Reachable Time field in the Router + Advertisement messages sent by the router. The value zero means + unspecified (by this router). MUST be no greater than 3,600,000 + milliseconds (1 hour). + + Default: 0 */ + uint32_t AdvReachableTime; +#define RTADV_MAX_REACHABLE_TIME 3600000 + struct timeval lastadvreachabletime; + + /* The value to be placed in the Retrans Timer field in the Router + Advertisement messages sent by the router. The value zero means + unspecified (by this router). + + Default: 0 */ + int AdvRetransTimer; + struct timeval lastadvretranstimer; + + /* The default value to be placed in the Cur Hop Limit field in the + Router Advertisement messages sent by the router. The value + should be set to that current diameter of the Internet. The + value zero means unspecified (by this router). + + Default: The value specified in the "Assigned Numbers" RFC + [ASSIGNED] that was in effect at the time of implementation. */ + int AdvCurHopLimit; + struct timeval lastadvcurhoplimit; + +#define RTADV_DEFAULT_HOPLIMIT 64 /* 64 hops */ + + /* The value to be placed in the Router Lifetime field of Router + Advertisements sent from the interface, in seconds. MUST be + either zero or between MaxRtrAdvInterval and 9000 seconds. A + value of zero indicates that the router is not to be used as a + default router. + + Default: 3 * MaxRtrAdvInterval */ + int AdvDefaultLifetime; +#define RTADV_MAX_RTRLIFETIME 9000 /* 2.5 hours */ + + /* A list of prefixes to be placed in Prefix Information options in + Router Advertisement messages sent from the interface. + + Default: all prefixes that the router advertises via routing + protocols as being on-link for the interface from which the + advertisement is sent. The link-local prefix SHOULD NOT be + included in the list of advertised prefixes. */ + struct rtadv_prefixes_head prefixes[1]; + + /* The true/false value to be placed in the "Home agent" + flag field in the Router Advertisement. See [RFC6275 7.1]. + + Default: false */ + int AdvHomeAgentFlag; +#ifndef ND_RA_FLAG_HOME_AGENT +#define ND_RA_FLAG_HOME_AGENT 0x20 +#endif + + /* The value to be placed in Home Agent Information option if Home + Flag is set. + Default: 0 */ + int HomeAgentPreference; + + /* The value to be placed in Home Agent Information option if Home + Flag is set. Lifetime (seconds) MUST not be greater than 18.2 + hours. + The value 0 has special meaning: use of AdvDefaultLifetime value. + + Default: 0 */ + int HomeAgentLifetime; +#define RTADV_MAX_HALIFETIME 65520 /* 18.2 hours */ + + /* The true/false value to insert or not an Advertisement Interval + option. See [RFC 6275 7.3] + + Default: false */ + int AdvIntervalOption; + + /* The value to be placed in the Default Router Preference field of + a router advertisement. See [RFC 4191 2.1 & 2.2] + + Default: 0 (medium) */ + int DefaultPreference; +#define RTADV_PREF_MEDIUM 0x0 /* Per RFC4191. */ + + /* + * List of recursive DNS servers to include in the RDNSS option. + * See [RFC8106 5.1] + * + * Default: empty list; do not emit RDNSS option + */ + struct list *AdvRDNSSList; + + /* + * List of DNS search domains to include in the DNSSL option. + * See [RFC8106 5.2] + * + * Default: empty list; do not emit DNSSL option + */ + struct list *AdvDNSSLList; + + /* + * rfc4861 states RAs must be sent at least 3 seconds apart. + * We allow faster retransmits to speed up convergence but can + * turn that capability off to meet the rfc if needed. + */ + bool UseFastRexmit; /* True if fast rexmits are enabled */ + + uint8_t inFastRexmit; /* True if we're rexmits faster than usual */ + + /* Track if RA was configured by BGP or by the Operator or both */ + uint8_t ra_configured; /* Was RA configured? */ +#define BGP_RA_CONFIGURED (1 << 0) /* BGP configured RA? */ +#define VTY_RA_CONFIGURED (1 << 1) /* Operator configured RA? */ +#define VTY_RA_INTERVAL_CONFIGURED \ + (1 << 2) /* Operator configured RA interval */ + int NumFastReXmitsRemain; /* Loaded first with number of fast + rexmits to do */ + +#define RTADV_FAST_REXMIT_PERIOD 1 /* 1 sec */ +#define RTADV_NUM_FAST_REXMITS 4 /* Fast Rexmit RA 4 times on certain events \ + */ +}; + +struct rtadv_rdnss { + /* Address of recursive DNS server to advertise */ + struct in6_addr addr; + + /* + * Lifetime in seconds; all-ones means infinity, zero + * stop using it. + */ + uint32_t lifetime; + + /* If lifetime not set, use a default of 3*MaxRtrAdvInterval */ + int lifetime_set; +}; + +/* + * [RFC1035 2.3.4] sets the maximum length of a domain name (a sequence of + * labels, each prefixed by a length octet) at 255 octets. + */ +#define RTADV_MAX_ENCODED_DOMAIN_NAME 255 + +struct rtadv_dnssl { + /* Domain name without trailing root zone dot (NUL-terminated) */ + char name[RTADV_MAX_ENCODED_DOMAIN_NAME - 1]; + + /* Name encoded as in [RFC1035 3.1] */ + uint8_t encoded_name[RTADV_MAX_ENCODED_DOMAIN_NAME]; + + /* Actual length of encoded_name */ + size_t encoded_len; + + /* Lifetime as for RDNSS */ + uint32_t lifetime; + int lifetime_set; +}; + +/* Router advertisement prefix. */ +struct rtadv_prefix { + struct rtadv_prefixes_item item; + + /* Prefix to be advertised. */ + struct prefix_ipv6 prefix; + + /* The prefix was manually/automatically defined. */ + int AdvPrefixCreate; + + /* The value to be placed in the Valid Lifetime in the Prefix */ + uint32_t AdvValidLifetime; +#define RTADV_VALID_LIFETIME 2592000 + + /* The value to be placed in the on-link flag */ + int AdvOnLinkFlag; + + /* The value to be placed in the Preferred Lifetime in the Prefix + Information option, in seconds.*/ + uint32_t AdvPreferredLifetime; +#define RTADV_PREFERRED_LIFETIME 604800 + + /* The value to be placed in the Autonomous Flag. */ + int AdvAutonomousFlag; + + /* The value to be placed in the Router Address Flag [RFC6275 7.2]. */ + int AdvRouterAddressFlag; +#ifndef ND_OPT_PI_FLAG_RADDR +#define ND_OPT_PI_FLAG_RADDR 0x20 +#endif +}; + +/* RFC4861 minimum delay between RAs */ +#ifndef MIN_DELAY_BETWEEN_RAS +#define MIN_DELAY_BETWEEN_RAS 3000 +#endif + +/* RFC4584 Extension to Sockets API for Mobile IPv6 */ + +#ifndef ND_OPT_ADV_INTERVAL +#define ND_OPT_ADV_INTERVAL 7 /* Adv Interval Option */ +#endif +#ifndef ND_OPT_HA_INFORMATION +#define ND_OPT_HA_INFORMATION 8 /* HA Information Option */ +#endif + + +#ifndef HAVE_STRUCT_ND_OPT_ADV_INTERVAL +struct nd_opt_adv_interval { /* Advertisement interval option */ + uint8_t nd_opt_ai_type; + uint8_t nd_opt_ai_len; + uint16_t nd_opt_ai_reserved; + uint32_t nd_opt_ai_interval; +} __attribute__((__packed__)); +#else +#ifndef HAVE_STRUCT_ND_OPT_ADV_INTERVAL_ND_OPT_AI_TYPE +/* fields may have to be renamed */ +#define nd_opt_ai_type nd_opt_adv_interval_type +#define nd_opt_ai_len nd_opt_adv_interval_len +#define nd_opt_ai_reserved nd_opt_adv_interval_reserved +#define nd_opt_ai_interval nd_opt_adv_interval_ival +#endif +#endif +#ifndef ND_OPT_RTR_ADV_INTERVAL +#define ND_OPT_RTR_ADV_INTERVAL 7 +#endif +#ifndef ND_OPT_HOME_AGENT_INFO +#define ND_OPT_HOME_AGENT_INFO 8 +#endif + +#ifndef HAVE_STRUCT_ND_OPT_HOMEAGENT_INFO +struct nd_opt_homeagent_info { /* Home Agent info */ + uint8_t nd_opt_hai_type; + uint8_t nd_opt_hai_len; + uint16_t nd_opt_hai_reserved; + uint16_t nd_opt_hai_preference; + uint16_t nd_opt_hai_lifetime; +} __attribute__((__packed__)); +#endif + +#ifndef ND_OPT_RDNSS +#define ND_OPT_RDNSS 25 +#endif +#ifndef ND_OPT_DNSSL +#define ND_OPT_DNSSL 31 +#endif + +#ifndef HAVE_STRUCT_ND_OPT_RDNSS +struct nd_opt_rdnss { /* Recursive DNS server option [RFC8106 5.1] */ + uint8_t nd_opt_rdnss_type; + uint8_t nd_opt_rdnss_len; + uint16_t nd_opt_rdnss_reserved; + uint32_t nd_opt_rdnss_lifetime; + /* Followed by one or more IPv6 addresses */ +} __attribute__((__packed__)); +#endif + +#ifndef HAVE_STRUCT_ND_OPT_DNSSL +struct nd_opt_dnssl { /* DNS search list option [RFC8106 5.2] */ + uint8_t nd_opt_dnssl_type; + uint8_t nd_opt_dnssl_len; + uint16_t nd_opt_dnssl_reserved; + uint32_t nd_opt_dnssl_lifetime; + /* + * Followed by one or more domain names encoded as in [RFC1035 3.1]. + * Multiple domain names are concatenated after encoding. In any case, + * the result is zero-padded to a multiple of 8 octets. + */ +} __attribute__((__packed__)); +#endif + +/* + * ipv6 nd prefixes can be manually defined, derived from the kernel interface + * configs or both. If both, manual flag/timer settings are used. + */ +enum ipv6_nd_prefix_source { + PREFIX_SRC_NONE = 0, + PREFIX_SRC_MANUAL, + PREFIX_SRC_AUTO, + PREFIX_SRC_BOTH, +}; + +enum ipv6_nd_suppress_ra_status { + RA_ENABLE = 0, + RA_SUPPRESS, +}; + +extern void rtadv_vrf_init(struct zebra_vrf *zvrf); +extern void rtadv_vrf_terminate(struct zebra_vrf *zvrf); +extern void rtadv_stop_ra(struct interface *ifp); +extern void rtadv_stop_ra_all(void); +extern void rtadv_cmd_init(void); +extern void rtadv_if_init(struct zebra_if *zif); +extern void rtadv_if_up(struct zebra_if *zif); +extern void rtadv_if_fini(struct zebra_if *zif); +extern void rtadv_add_prefix(struct zebra_if *zif, const struct prefix_ipv6 *p); +extern void rtadv_delete_prefix(struct zebra_if *zif, const struct prefix *p); + +#else /* !HAVE_RTADV */ +struct rtadv { + /* empty structs aren't valid ISO C */ + char dummy; +}; + +struct rtadvconf { + /* same again, empty structs aren't valid ISO C */ + char dummy; +}; + +static inline void rtadv_vrf_init(struct zebra_vrf *zvrf) +{ +} +static inline void rtadv_vrf_terminate(struct zebra_vrf *zvrf) +{ +} +static inline void rtadv_cmd_init(void) +{ +} +static inline void rtadv_if_init(struct zebra_if *zif) +{ +} +static inline void rtadv_if_up(struct zebra_if *zif) +{ +} +static inline void rtadv_if_fini(struct zebra_if *zif) +{ +} +static inline void rtadv_add_prefix(struct zebra_if *zif, + const struct prefix_ipv6 *p) +{ +} +static inline void rtadv_delete_prefix(struct zebra_if *zif, + const struct prefix *p) +{ +} +static inline void rtadv_stop_ra(struct interface *ifp) +{ +} +static inline void rtadv_stop_ra_all(void) +{ +} +#endif + +extern void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS); +extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS); + +extern uint32_t rtadv_get_interfaces_configured_from_bgp(void); +extern bool rtadv_compiled_in(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_RTADV_H */ diff --git a/zebra/rtread_netlink.c b/zebra/rtread_netlink.c new file mode 100644 index 0000000..39daed2 --- /dev/null +++ b/zebra/rtread_netlink.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Kernel routing table readup by netlink + * Copyright (C) 1998 Kunihiro Ishiguro + */ + +#include + +#ifdef GNU_LINUX + +#include "vty.h" +#include "zebra/rt.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_tc.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/tc_netlink.h" + +void route_read(struct zebra_ns *zns) +{ + netlink_route_read(zns); +} + +void macfdb_read(struct zebra_ns *zns) +{ + netlink_macfdb_read(zns); +} + +void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if, vlanid_t vid) +{ + netlink_macfdb_read_for_bridge(zns, ifp, br_if, vid); +} + +void macfdb_read_mcast_entry_for_vni(struct zebra_ns *zns, + struct interface *ifp, vni_t vni) +{ + netlink_macfdb_read_mcast_for_vni(zns, ifp, vni); +} + +void macfdb_read_specific_mac(struct zebra_ns *zns, struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid) +{ + netlink_macfdb_read_specific_mac(zns, br_if, mac, vid); +} + +void neigh_read(struct zebra_ns *zns) +{ + netlink_neigh_read(zns); +} + +void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if) +{ + netlink_neigh_read_for_vlan(zns, vlan_if); +} + +void neigh_read_specific_ip(const struct ipaddr *ip, struct interface *vlan_if) +{ + netlink_neigh_read_specific_ip(ip, vlan_if); +} + +void kernel_read_pbr_rules(struct zebra_ns *zns) +{ + netlink_rules_read(zns); +} + +void kernel_read_tc_qdisc(struct zebra_ns *zns) +{ + netlink_qdisc_read(zns); +} + +void vlan_read(struct zebra_ns *zns) +{ + netlink_vlan_read(zns); +} + +#endif /* GNU_LINUX */ diff --git a/zebra/rtread_sysctl.c b/zebra/rtread_sysctl.c new file mode 100644 index 0000000..ef1e21b --- /dev/null +++ b/zebra/rtread_sysctl.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Kernel routing table read by sysctl function. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + */ + +#include + +#if !defined(GNU_LINUX) + +#include "memory.h" +#include "log.h" +#include "vrf.h" + +#include "zebra/rt.h" +#include "zebra/kernel_socket.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_tc.h" +#include "zebra/zebra_errors.h" + +/* Kernel routing table read up by sysctl function. */ +void route_read(struct zebra_ns *zns) +{ + caddr_t buf, end, ref; + size_t bufsiz; + struct rt_msghdr *rtm; + +#define MIBSIZ 6 + int mib[MIBSIZ] = {CTL_NET, PF_ROUTE, 0, 0, NET_RT_DUMP, 0}; + + if (zns->ns_id != NS_DEFAULT) + return; + + /* Get buffer size. */ + if (sysctl(mib, MIBSIZ, NULL, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl fail: %s", + safe_strerror(errno)); + return; + } + + /* Allocate buffer. */ + ref = buf = XMALLOC(MTYPE_TMP, bufsiz); + + /* Read routing table information by calling sysctl(). */ + if (sysctl(mib, MIBSIZ, buf, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl() fail by %s", + safe_strerror(errno)); + XFREE(MTYPE_TMP, ref); + return; + } + + for (end = buf + bufsiz; buf < end; buf += rtm->rtm_msglen) { + rtm = (struct rt_msghdr *)buf; + /* We must set RTF_DONE here, so rtm_read() doesn't ignore the + * message. */ + SET_FLAG(rtm->rtm_flags, RTF_DONE); + rtm_read(rtm); + } + + /* Free buffer. */ + XFREE(MTYPE_TMP, ref); + + return; +} + +/* Only implemented for the netlink method. */ +void macfdb_read(struct zebra_ns *zns) +{ +} + +void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if, vlanid_t vid) +{ +} + +void macfdb_read_mcast_entry_for_vni(struct zebra_ns *zns, + struct interface *ifp, vni_t vni) +{ +} + +void macfdb_read_specific_mac(struct zebra_ns *zns, struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid) +{ +} + +void neigh_read(struct zebra_ns *zns) +{ +} + +void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if) +{ +} + +void neigh_read_specific_ip(const struct ipaddr *ip, struct interface *vlan_if) +{ +} + +void kernel_read_pbr_rules(struct zebra_ns *zns) +{ +} + +void kernel_read_tc_qdisc(struct zebra_ns *zns) +{ +} + +void vlan_read(struct zebra_ns *zns) +{ +} + +#endif /* !defined(GNU_LINUX) */ diff --git a/zebra/rule_netlink.c b/zebra/rule_netlink.c new file mode 100644 index 0000000..f00aef5 --- /dev/null +++ b/zebra/rule_netlink.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Policy Based Routing (PBR) interaction with the kernel using + * netlink. + * Copyright (C) 2018 Cumulus Networks, Inc. + */ + +#include + +#ifdef HAVE_NETLINK + +#include "if.h" +#include "prefix.h" +#include "vrf.h" + +#include +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_trace.h" + +/* definitions */ + +/* static function declarations */ + +/* Private functions */ + + +/* + * netlink_rule_msg_encode + * + * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen. + * + * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer + * or the number of bytes written to buf. + */ +static ssize_t netlink_rule_msg_encode( + int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm, + uint32_t priority, uint32_t table, const struct prefix *src_ip, + const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield, + uint8_t ip_protocol, void *buf, size_t buflen) +{ + uint8_t protocol = RTPROT_ZEBRA; + int family; + int bytelen; + struct { + struct nlmsghdr n; + struct fib_rule_hdr frh; + char buf[]; + } *req = buf; + + const char *ifname = dplane_ctx_rule_get_ifname(ctx); + + if (buflen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + + /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */ + if (PREFIX_FAMILY(src_ip)) + family = PREFIX_FAMILY(src_ip); + else if (PREFIX_FAMILY(dst_ip)) + family = PREFIX_FAMILY(dst_ip); + else + family = AF_INET; + + bytelen = (family == AF_INET ? 4 : 16); + + req->n.nlmsg_type = cmd; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + + req->frh.family = family; + req->frh.action = FR_ACT_TO_TBL; + + if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol, + sizeof(protocol))) + return 0; + + /* rule's pref # */ + if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority)) + return 0; + + /* interface on which applied */ + if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname, + strlen(ifname) + 1)) + return 0; + + /* source IP, if specified */ + if (filter_bm & PBR_FILTER_SRC_IP) { + req->frh.src_len = src_ip->prefixlen; + if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix, + bytelen)) + return 0; + } + + /* destination IP, if specified */ + if (filter_bm & PBR_FILTER_DST_IP) { + req->frh.dst_len = dst_ip->prefixlen; + if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix, + bytelen)) + return 0; + } + + /* fwmark, if specified */ + if (filter_bm & PBR_FILTER_FWMARK) { + if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark)) + return 0; + } + + /* dsfield, if specified; mask off the ECN bits */ + if (filter_bm & PBR_FILTER_DSCP) + req->frh.tos = dsfield & PBR_DSFIELD_DSCP; + + /* protocol to match on */ + if (filter_bm & PBR_FILTER_IP_PROTOCOL) + nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol); + + /* Route table to use to forward, if filter criteria matches. */ + if (table < 256) + req->frh.table = table; + else { + req->frh.table = RT_TABLE_UNSPEC; + if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table)) + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u", + nl_msg_type_to_str(cmd), nl_family_to_str(family), + ifname, priority, fwmark, src_ip, dst_ip, table); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + int cmd = RTM_NEWRULE; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE) + cmd = RTM_DELRULE; + + return netlink_rule_msg_encode( + cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx), + dplane_ctx_rule_get_priority(ctx), + dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx), + dplane_ctx_rule_get_dst_ip(ctx), + dplane_ctx_rule_get_fwmark(ctx), + dplane_ctx_rule_get_dsfield(ctx), + dplane_ctx_rule_get_ipproto(ctx), buf, buflen); +} + +static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_rule_msg_encode( + RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx), + dplane_ctx_rule_get_old_priority(ctx), + dplane_ctx_rule_get_old_table(ctx), + dplane_ctx_rule_get_old_src_ip(ctx), + dplane_ctx_rule_get_old_dst_ip(ctx), + dplane_ctx_rule_get_old_fwmark(ctx), + dplane_ctx_rule_get_old_dsfield(ctx), + dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen); +} + +/* + * Identify valid rule actions for netlink - other actions can't be installed + */ +static bool nl_rule_valid_action(uint32_t action) +{ + if (action == PBR_ACTION_TABLE) + return true; + else + return false; +} + +/* Public functions */ + +enum netlink_msg_status +netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + struct pbr_rule rule = {}; + + op = dplane_ctx_get_op(ctx); + if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE + || op == DPLANE_OP_RULE_DELETE)) { + flog_err( + EC_ZEBRA_PBR_RULE_UPDATE, + "Context received for kernel rule update with incorrect OP code (%u)", + op); + return FRR_NETLINK_ERROR; + } + + /* TODO -- special handling for rules that include actions that + * netlink cannot install. Some of the rule attributes are not + * available in netlink: only try to install valid actions. + */ + dplane_ctx_rule_get(ctx, &rule, NULL); + if (!nl_rule_valid_action(rule.action.flags)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: skip invalid action %#x", __func__, + rule.action.flags); + return 0; + } + + ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false); + + /** + * Delete the old one. + * + * Don't care about this result right? + */ + if (op == DPLANE_OP_RULE_UPDATE) + netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder, + true); + + return ret; +} + +/* + * Handle netlink notification informing a rule add or delete. + * Handling of an ADD is TBD. + * DELs are notified up, if other attributes indicate it may be a + * notification of interest. The expectation is that if this corresponds + * to a PBR rule added by FRR, it will be readded. + * + * If startup and we see a rule we created, delete it as its leftover + * from a previous instance and should have been removed on shutdown. + * + */ +int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct zebra_ns *zns; + struct fib_rule_hdr *frh; + struct rtattr *tb[FRA_MAX + 1]; + int len; + char *ifname; + struct zebra_pbr_rule rule = {}; + uint8_t proto = 0; + uint8_t ip_proto = 0; + + frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup); + + /* Basic validation followed by extracting attributes. */ + if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr))); + return -1; + } + + frh = NLMSG_DATA(h); + + if (frh->family != AF_INET && frh->family != AF_INET6) { + if (frh->family == RTNL_FAMILY_IPMR + || frh->family == RTNL_FAMILY_IP6MR) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Received rule netlink that we are ignoring for family %u, rule change: %u", + frh->family, h->nlmsg_type); + return 0; + } + flog_warn( + EC_ZEBRA_NETLINK_INVALID_AF, + "Invalid address family: %u received from kernel rule change: %u", + frh->family, h->nlmsg_type); + return 0; + } + if (frh->action != FR_ACT_TO_TBL) + return 0; + + memset(tb, 0, sizeof(tb)); + netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); + + if (tb[FRA_PRIORITY]) + rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]); + + if (tb[FRA_SRC]) { + if (frh->family == AF_INET) + memcpy(&rule.rule.filter.src_ip.u.prefix4, + RTA_DATA(tb[FRA_SRC]), 4); + else + memcpy(&rule.rule.filter.src_ip.u.prefix6, + RTA_DATA(tb[FRA_SRC]), 16); + rule.rule.filter.src_ip.prefixlen = frh->src_len; + rule.rule.filter.src_ip.family = frh->family; + rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP; + } + + if (tb[FRA_DST]) { + if (frh->family == AF_INET) + memcpy(&rule.rule.filter.dst_ip.u.prefix4, + RTA_DATA(tb[FRA_DST]), 4); + else + memcpy(&rule.rule.filter.dst_ip.u.prefix6, + RTA_DATA(tb[FRA_DST]), 16); + rule.rule.filter.dst_ip.prefixlen = frh->dst_len; + rule.rule.filter.dst_ip.family = frh->family; + rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP; + } + + if (tb[FRA_TABLE]) + rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]); + else + rule.rule.action.table = frh->table; + + /* TBD: We don't care about rules not specifying an IIF. */ + if (tb[FRA_IFNAME] == NULL) + return 0; + + if (tb[FRA_PROTOCOL]) + proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]); + + if (tb[FRA_IP_PROTO]) + ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]); + + ifname = (char *)RTA_DATA(tb[FRA_IFNAME]); + strlcpy(rule.ifname, ifname, sizeof(rule.ifname)); + + if (h->nlmsg_type == RTM_NEWRULE) { + /* + * If we see a rule at startup we created, delete it now. + * It should have been flushed on a previous shutdown. + */ + if (startup && proto == RTPROT_ZEBRA) { + enum zebra_dplane_result ret; + + ret = dplane_pbr_rule_delete(&rule); + + zlog_debug( + "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", + __func__, + ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) + ? "Failed to remove" + : "Removed"), + nl_family_to_str(frh->family), rule.ifname, + rule.rule.priority, &rule.rule.filter.src_ip, + &rule.rule.filter.dst_ip, + rule.rule.action.table, ip_proto); + } + + /* TBD */ + return 0; + } + + zns = zebra_ns_lookup(ns_id); + + /* If we don't know the interface, we don't care. */ + if (!if_lookup_by_name_per_ns(zns, ifname)) + return 0; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(frh->family), rule.ifname, + rule.rule.priority, &rule.rule.filter.src_ip, + &rule.rule.filter.dst_ip, rule.rule.action.table, + ip_proto); + + return kernel_pbr_rule_del(&rule); +} + +/* + * Request rules from the kernel + */ +static int netlink_request_rules(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct fib_rule_hdr frh; + char buf[NL_PKT_BUF_SIZE]; + } req; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); + req.frh.family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* + * Get to know existing PBR rules in the kernel - typically called at startup. + */ +int netlink_rules_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true); + + ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, + &dp_info, 0, true); + if (ret < 0) + return ret; + + ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, + &dp_info, 0, true); + + return ret; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/rule_netlink.h b/zebra/rule_netlink.h new file mode 100644 index 0000000..8ffca49 --- /dev/null +++ b/zebra/rule_netlink.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Policy Based Routing (PBR) interaction with the kernel using + * netlink - public definitions and function declarations. + * Copyright (C) 2018 Cumulus Networks, Inc. + */ + +#ifndef _ZEBRA_RULE_NETLINK_H +#define _ZEBRA_RULE_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Handle netlink notification informing a rule add or delete. + */ +extern int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); + +/* + * Get to know existing PBR rules in the kernel - typically called at startup. + */ +extern int netlink_rules_read(struct zebra_ns *zns); + +extern enum netlink_msg_status +netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_RULE_NETLINK_H */ diff --git a/zebra/rule_socket.c b/zebra/rule_socket.c new file mode 100644 index 0000000..d013749 --- /dev/null +++ b/zebra/rule_socket.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Policy Based Routing (PBR) interaction with the kernel using + * netlink. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Donald Sharp + */ + +#include + +#ifndef HAVE_NETLINK + +#include "if.h" +#include "prefix.h" +#include "vrf.h" +#include "lib_errors.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_pbr_rule_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif diff --git a/zebra/sample_plugin.c b/zebra/sample_plugin.c new file mode 100644 index 0000000..b5f7b88 --- /dev/null +++ b/zebra/sample_plugin.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Sample plugin for the FRR zebra dataplane. + * + * Copyright (c) 2019 Volta Networks, Inc. + */ + +/* + * Should be possible to build this plugin using this sort of command: + * + * gcc -I ~/work/frr/ -I ~/work/frr/lib -I ~/work/frr/zebra \ + * -g -O0 -o sample_plugin.so -shared -fPIC sample_plugin.c + * + * where 'frr' is a configured and built frr sandbox. + * + * Run zebra with '-M /path/to/sample_plugin.so' to load the module. + */ + +#include "config.h" /* Include this explicitly */ +#include "lib/zebra.h" +#include "lib/libfrr.h" +#include "zebra/zebra_dplane.h" +#include "zebra/debug.h" + +static const char *plugin_name = "SAMPLE"; + +static struct zebra_dplane_provider *prov_p; + +/* + * Startup/init callback, called from the dataplane. + */ +static int sample_start(struct zebra_dplane_provider *prov) +{ + /* Nothing special to do - we don't allocate anything. */ + return 0; +} + + +/* + * Shutdown/cleanup callback, called from the dataplane pthread. + */ +static int sample_fini(struct zebra_dplane_provider *prov, bool early) +{ + /* Nothing special to do. */ + return 0; +} + +/* + * Callback from the dataplane to process incoming work; this runs in the + * dplane pthread. + */ +static int sample_process(struct zebra_dplane_provider *prov) +{ + int counter, limit; + struct zebra_dplane_ctx *ctx; + + limit = dplane_provider_get_work_limit(prov_p); + + /* Respect the configured limit on the amount of work to do in + * any one call. + */ + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov_p); + if (!ctx) + break; + + /* Just set 'success' status and return to the dataplane */ + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov_p, ctx); + } + + return 0; +} + +/* + * Init entry point called during zebra startup. This is registered during + * module init. + */ +static int init_sample_plugin(struct event_loop *tm) +{ + int ret; + + /* Note that we don't use or store the thread_master 'tm'. We + * don't use the zebra main pthread: our plugin code will run in + * the zebra dataplane pthread context. + */ + + /* Register the plugin with the dataplane infrastructure. We + * register to be called before the kernel, and we register + * our init, process work, and shutdown callbacks. + */ + ret = dplane_provider_register(plugin_name, DPLANE_PRIO_PRE_KERNEL, + DPLANE_PROV_FLAGS_DEFAULT, + sample_start, + sample_process, + sample_fini, + NULL, + &prov_p); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("sample plugin register => %d", ret); + + return 0; +} + +/* + * Base FRR loadable module info: basic info including module entry-point. + */ +static int module_init(void) +{ + hook_register(frr_late_init, init_sample_plugin); + return 0; +} + +FRR_MODULE_SETUP( + .name = "dplane_sample", + .version = "0.0.1", + .description = "Dataplane Sample Plugin", + .init = module_init, +); diff --git a/zebra/subdir.am b/zebra/subdir.am new file mode 100644 index 0000000..b3bd9be --- /dev/null +++ b/zebra/subdir.am @@ -0,0 +1,259 @@ +# +# zebra +# + +if ZEBRA +sbin_PROGRAMS += zebra/zebra +vtysh_daemons += zebra + +if IRDP +module_LTLIBRARIES += zebra/zebra_irdp.la +endif +if SNMP +module_LTLIBRARIES += zebra/zebra_snmp.la +endif +if FPM +module_LTLIBRARIES += zebra/zebra_fpm.la +endif +if LINUX +module_LTLIBRARIES += zebra/zebra_cumulus_mlag.la +endif + +# Dataplane sample plugin +if DEV_BUILD +module_LTLIBRARIES += zebra/dplane_sample_plugin.la +endif + +man8 += $(MANBUILD)/frr-zebra.8 +## endif ZEBRA +endif + +zebra_zebra_LDADD = lib/libfrr.la $(LIBCAP) $(UST_LIBS) +if HAVE_PROTOBUF3 +zebra_zebra_LDADD += mlag/libmlag_pb.la $(PROTOBUF_C_LIBS) +zebra/zebra_mlag.$(OBJEXT): mlag/mlag.pb-c.h +endif +zebra_zebra_SOURCES = \ + zebra/connected.c \ + zebra/debug.c \ + zebra/if_ioctl.c \ + zebra/if_netlink.c \ + zebra/if_socket.c \ + zebra/if_sysctl.c \ + zebra/interface.c \ + zebra/ioctl.c \ + zebra/ipforward_proc.c \ + zebra/ipforward_sysctl.c \ + zebra/kernel_netlink.c \ + zebra/kernel_socket.c \ + zebra/label_manager.c \ + zebra/main.c \ + zebra/netconf_netlink.c \ + zebra/redistribute.c \ + zebra/router-id.c \ + zebra/rt_netlink.c \ + zebra/rt_socket.c \ + zebra/rtadv.c \ + zebra/rtread_netlink.c \ + zebra/rtread_sysctl.c \ + zebra/rule_netlink.c \ + zebra/rule_socket.c \ + zebra/table_manager.c \ + zebra/tc_netlink.c \ + zebra/tc_socket.c \ + zebra/zapi_msg.c \ + zebra/zebra_affinitymap.c \ + zebra/zebra_dplane.c \ + zebra/zebra_errors.c \ + zebra/zebra_gr.c \ + zebra/zebra_l2.c \ + zebra/zebra_l2_bridge_if.c \ + zebra/zebra_evpn.c \ + zebra/zebra_evpn_mac.c \ + zebra/zebra_evpn_neigh.c \ + zebra/zebra_mlag.c \ + zebra/zebra_mlag_vty.c \ + zebra/zebra_mpls.c \ + zebra/zebra_mpls_netlink.c \ + zebra/zebra_mpls_openbsd.c \ + zebra/zebra_mpls_null.c \ + zebra/zebra_mpls_vty.c \ + zebra/zebra_srv6.c \ + zebra/zebra_srv6_vty.c \ + zebra/zebra_mroute.c \ + zebra/zebra_nb.c \ + zebra/zebra_nb_config.c \ + zebra/zebra_nb_rpcs.c \ + zebra/zebra_nb_state.c \ + zebra/zebra_netns_id.c \ + zebra/zebra_netns_notify.c \ + zebra/zebra_nhg.c \ + zebra/zebra_ns.c \ + zebra/zebra_opaque.c \ + zebra/zebra_pbr.c \ + zebra/zebra_ptm.c \ + zebra/zebra_ptm_redistribute.c \ + zebra/zebra_pw.c \ + zebra/zebra_rib.c \ + zebra/zebra_router.c \ + zebra/zebra_rnh.c \ + zebra/zebra_routemap.c \ + zebra/zebra_routemap_nb.c \ + zebra/zebra_routemap_nb_config.c \ + zebra/zebra_script.c \ + zebra/zebra_srte.c \ + zebra/zebra_tc.c \ + zebra/zebra_trace.c \ + zebra/zebra_vrf.c \ + zebra/zebra_vty.c \ + zebra/zebra_vxlan.c \ + zebra/zebra_vxlan_if.c \ + zebra/zebra_evpn_mh.c \ + zebra/zebra_neigh.c \ + zebra/zserv.c \ + # end + +clippy_scan += \ + zebra/debug.c \ + zebra/interface.c \ + zebra/rtadv.c \ + zebra/zebra_evpn_mh.c \ + zebra/zebra_mlag_vty.c \ + zebra/zebra_routemap.c \ + zebra/zebra_vty.c \ + zebra/zebra_srv6_vty.c \ + zebra/zebra_vrf.c \ + zebra/dpdk/zebra_dplane_dpdk_vty.c \ + zebra/label_manager.c \ + # end + +noinst_HEADERS += \ + zebra/connected.h \ + zebra/debug.h \ + zebra/if_netlink.h \ + zebra/interface.h \ + zebra/ioctl.h \ + zebra/ipforward.h \ + zebra/irdp.h \ + zebra/kernel_netlink.h \ + zebra/kernel_socket.h \ + zebra/label_manager.h \ + zebra/netconf_netlink.h \ + zebra/redistribute.h \ + zebra/rib.h \ + zebra/router-id.h \ + zebra/rt.h \ + zebra/rt_netlink.h \ + zebra/rtadv.h \ + zebra/rule_netlink.h \ + zebra/table_manager.h \ + zebra/tc_netlink.h \ + zebra/zapi_msg.h \ + zebra/zebra_affinitymap.h \ + zebra/zebra_dplane.h \ + zebra/zebra_errors.h \ + zebra/zebra_evpn.h \ + zebra/zebra_evpn_mac.h \ + zebra/zebra_evpn_neigh.h \ + zebra/zebra_evpn_vxlan.h \ + zebra/zebra_fpm_private.h \ + zebra/zebra_l2.h \ + zebra/zebra_mlag.h \ + zebra/zebra_mlag_vty.h \ + zebra/zebra_mpls.h \ + zebra/zebra_srv6.h \ + zebra/zebra_srv6_vty.h \ + zebra/zebra_mroute.h \ + zebra/zebra_nb.h \ + zebra/zebra_netns_id.h \ + zebra/zebra_netns_notify.h \ + zebra/zebra_nhg.h \ + zebra/zebra_nhg_private.h \ + zebra/zebra_ns.h \ + zebra/zebra_opaque.h \ + zebra/zebra_pbr.h \ + zebra/zebra_ptm.h \ + zebra/zebra_ptm_redistribute.h \ + zebra/zebra_pw.h \ + zebra/zebra_rnh.h \ + zebra/zebra_routemap.h \ + zebra/zebra_routemap_nb.h \ + zebra/zebra_router.h \ + zebra/zebra_script.h \ + zebra/zebra_srte.h \ + zebra/zebra_tc.h \ + zebra/zebra_trace.h \ + zebra/zebra_vrf.h \ + zebra/zebra_vxlan.h \ + zebra/zebra_vxlan_private.h \ + zebra/zebra_evpn_mh.h \ + zebra/zebra_neigh.h \ + zebra/zebra_l2_bridge_if.h \ + zebra/zebra_vxlan_if.h \ + zebra/zserv.h \ + zebra/dpdk/zebra_dplane_dpdk.h \ + zebra/dpdk/zebra_dplane_dpdk_private.h \ + # end + +zebra_zebra_irdp_la_SOURCES = \ + zebra/irdp_interface.c \ + zebra/irdp_main.c \ + zebra/irdp_packet.c \ + # end +zebra_zebra_irdp_la_LDFLAGS = $(MODULE_LDFLAGS) + +zebra_zebra_snmp_la_SOURCES = zebra/zebra_snmp.c +zebra_zebra_snmp_la_CFLAGS = $(AM_CFLAGS) $(SNMP_CFLAGS) -std=gnu11 +zebra_zebra_snmp_la_LDFLAGS = $(MODULE_LDFLAGS) +zebra_zebra_snmp_la_LIBADD = lib/libfrrsnmp.la + +zebra_zebra_fpm_la_LDFLAGS = $(MODULE_LDFLAGS) +zebra_zebra_fpm_la_LIBADD = +zebra_zebra_fpm_la_SOURCES = zebra/zebra_fpm.c +zebra_zebra_fpm_la_SOURCES += zebra/zebra_fpm_netlink.c +if HAVE_PROTOBUF +zebra_zebra_fpm_la_LIBADD += fpm/libfrrfpm_pb.la qpb/libfrr_pb.la $(PROTOBUF_C_LIBS) +zebra_zebra_fpm_la_SOURCES += zebra/zebra_fpm_protobuf.c +zebra/zebra_fpm_protobuf.lo: fpm/fpm.pb-c.h qpb/qpb.pb-c.h +if DEV_BUILD +zebra_zebra_fpm_la_SOURCES += zebra/zebra_fpm_dt.c +zebra/zebra_fpm_dt.lo: fpm/fpm.pb-c.h qpb/qpb.pb-c.h +endif +endif + +# Sample dataplane plugin +if DEV_BUILD +zebra_dplane_sample_plugin_la_SOURCES = zebra/sample_plugin.c +zebra_dplane_sample_plugin_la_LDFLAGS = $(MODULE_LDFLAGS) +endif + +nodist_zebra_zebra_SOURCES = \ + yang/frr-zebra.yang.c \ + yang/frr-zebra-route-map.yang.c \ + # end + +zebra_zebra_cumulus_mlag_la_SOURCES = zebra/zebra_mlag_private.c +zebra_zebra_cumulus_mlag_la_LDFLAGS = $(MODULE_LDFLAGS) + +if LINUX +module_LTLIBRARIES += zebra/dplane_fpm_nl.la + +zebra_dplane_fpm_nl_la_SOURCES = zebra/dplane_fpm_nl.c +zebra_dplane_fpm_nl_la_LDFLAGS = $(MODULE_LDFLAGS) +zebra_dplane_fpm_nl_la_LIBADD = +endif + +if NETLINK_DEBUG +zebra_zebra_SOURCES += \ + zebra/debug_nl.c \ + # end +endif + +if DP_DPDK +module_LTLIBRARIES += zebra/zebra_dplane_dpdk.la +endif + +zebra_zebra_dplane_dpdk_la_SOURCES = zebra/dpdk/zebra_dplane_dpdk.c zebra/dpdk/zebra_dplane_dpdk_vty.c +zebra_zebra_dplane_dpdk_la_LDFLAGS = -avoid-version -module -shared -export-dynamic -L/usr/local/lib -v +zebra_zebra_dplane_dpdk_la_CFLAGS = $(DPDK_CFLAGS) +zebra_zebra_dplane_dpdk_la_LIBADD = $(DPDK_LIBS) diff --git a/zebra/table_manager.c b/zebra/table_manager.c new file mode 100644 index 0000000..512508b --- /dev/null +++ b/zebra/table_manager.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* zebra table Manager for routing table identifier management + * Copyright (C) 2018 6WIND + */ + +#include "zebra.h" + +#include +#include +#include + +#include "lib/log.h" +#include "lib/memory.h" +#include "lib/table.h" +#include "lib/network.h" +#include "lib/stream.h" +#include "lib/zclient.h" +#include "lib/libfrr.h" +#include "lib/vrf.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" +#include "zebra/label_manager.h" /* for NO_PROTO */ +#include "zebra/table_manager.h" +#include "zebra/zebra_errors.h" + +/* routing table identifiers + * + */ +#if !defined(GNU_LINUX) +/* BSD systems + */ +#else +/* Linux Systems + */ +#define RT_TABLE_ID_LOCAL 255 +#define RT_TABLE_ID_MAIN 254 +#define RT_TABLE_ID_DEFAULT 253 +#define RT_TABLE_ID_COMPAT 252 +#define RT_TABLE_ID_UNSPEC 0 +#endif /* !def(GNU_LINUX) */ +#define RT_TABLE_ID_UNRESERVED_MIN 1 +#define RT_TABLE_ID_UNRESERVED_MAX 0xffffffff + +DEFINE_MGROUP(TABLE_MGR, "Table Manager"); +DEFINE_MTYPE_STATIC(TABLE_MGR, TM_CHUNK, "Table Manager Chunk"); +DEFINE_MTYPE_STATIC(TABLE_MGR, TM_TABLE, "Table Manager Context"); + +static void delete_table_chunk(void *val) +{ + XFREE(MTYPE_TM_CHUNK, val); +} + +/** + * Init table manager + */ +void table_manager_enable(struct zebra_vrf *zvrf) +{ + + if (zvrf->tbl_mgr) + return; + if (!vrf_is_backend_netns() + && strcmp(zvrf_name(zvrf), VRF_DEFAULT_NAME)) { + struct zebra_vrf *def = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + zvrf->tbl_mgr = def->tbl_mgr; + return; + } + zvrf->tbl_mgr = XCALLOC(MTYPE_TM_TABLE, sizeof(struct table_manager)); + zvrf->tbl_mgr->lc_list = list_new(); + zvrf->tbl_mgr->lc_list->del = delete_table_chunk; +} + +/** + * Core function, assigns table chunks + * + * It first searches through the list to check if there's one available + * (previously released). Otherwise it creates and assigns a new one + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @para size Size of the table chunk + * @return Pointer to the assigned table chunk + */ +struct table_manager_chunk *assign_table_chunk(uint8_t proto, uint16_t instance, + uint32_t size, + struct zebra_vrf *zvrf) +{ + struct table_manager_chunk *tmc; + struct listnode *node; + uint32_t start; + bool manual_conf = false; + + if (!zvrf) + return NULL; + + /* first check if there's one available */ + for (ALL_LIST_ELEMENTS_RO(zvrf->tbl_mgr->lc_list, node, tmc)) { + if (tmc->proto == NO_PROTO + && tmc->end - tmc->start + 1 == size) { + tmc->proto = proto; + tmc->instance = instance; + return tmc; + } + } + /* otherwise create a new one */ + tmc = XCALLOC(MTYPE_TM_CHUNK, sizeof(struct table_manager_chunk)); + + if (zvrf->tbl_mgr->start || zvrf->tbl_mgr->end) + manual_conf = true; + /* table RT IDs range are [1;252] and [256;0xffffffff] + * - check if the requested range can be within the first range, + * otherwise elect second one + * - TODO : vrf-lites have their own table identifier. + * In that case, table_id should be removed from the table range. + */ + if (list_isempty(zvrf->tbl_mgr->lc_list)) { + if (!manual_conf) + start = RT_TABLE_ID_UNRESERVED_MIN; + else + start = zvrf->tbl_mgr->start; + } else + start = ((struct table_manager_chunk *)listgetdata( + listtail(zvrf->tbl_mgr->lc_list))) + ->end + + 1; + + if (!manual_conf) { + +#if !defined(GNU_LINUX) +/* BSD systems + */ +#else +/* Linux Systems + */ + /* if not enough room space between MIN and COMPAT, + * then begin after LOCAL + */ + if (start < RT_TABLE_ID_COMPAT + && (size > RT_TABLE_ID_COMPAT - RT_TABLE_ID_UNRESERVED_MIN)) + start = RT_TABLE_ID_LOCAL + 1; +#endif /* !def(GNU_LINUX) */ + tmc->start = start; + if (RT_TABLE_ID_UNRESERVED_MAX - size + 1 < start) { + flog_err(EC_ZEBRA_TM_EXHAUSTED_IDS, + "Reached max table id. Start/Size %u/%u", + start, size); + XFREE(MTYPE_TM_CHUNK, tmc); + return NULL; + } + } else { + tmc->start = start; + if (zvrf->tbl_mgr->end - size + 1 < start) { + flog_err(EC_ZEBRA_TM_EXHAUSTED_IDS, + "Reached max table id. Start/Size %u/%u", + start, size); + XFREE(MTYPE_TM_CHUNK, tmc); + return NULL; + } + } + tmc->end = tmc->start + size - 1; + tmc->proto = proto; + tmc->instance = instance; + listnode_add(zvrf->tbl_mgr->lc_list, tmc); + + return tmc; +} + +/** + * Core function, release no longer used table chunks + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param start First table RT ID of the chunk + * @param end Last table RT ID of the chunk + * @return 0 on success, -1 otherwise + */ +int release_table_chunk(uint8_t proto, uint16_t instance, uint32_t start, + uint32_t end, struct zebra_vrf *zvrf) +{ + struct listnode *node; + struct table_manager_chunk *tmc; + int ret = -1; + struct table_manager *tbl_mgr; + + if (!zvrf) + return -1; + + tbl_mgr = zvrf->tbl_mgr; + if (!tbl_mgr) + return ret; + /* check that size matches */ + zlog_debug("Releasing table chunk: %u - %u", start, end); + /* find chunk and disown */ + for (ALL_LIST_ELEMENTS_RO(tbl_mgr->lc_list, node, tmc)) { + if (tmc->start != start) + continue; + if (tmc->end != end) + continue; + if (tmc->proto != proto || tmc->instance != instance) { + flog_err(EC_ZEBRA_TM_DAEMON_MISMATCH, + "%s: Daemon mismatch!!", __func__); + continue; + } + tmc->proto = NO_PROTO; + tmc->instance = 0; + ret = 0; + break; + } + if (ret != 0) + flog_err(EC_ZEBRA_TM_UNRELEASED_CHUNK, + "%s: Table chunk not released!!", __func__); + + return ret; +} + +/** + * Release table chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param client the client to release chunks from + * @return Number of chunks released + */ +int release_daemon_table_chunks(struct zserv *client) +{ + uint8_t proto = client->proto; + uint16_t instance = client->instance; + struct listnode *node; + struct table_manager_chunk *tmc; + int count = 0; + int ret; + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + + if (!zvrf) + continue; + if (!vrf_is_backend_netns() && vrf->vrf_id != VRF_DEFAULT) + continue; + for (ALL_LIST_ELEMENTS_RO(zvrf->tbl_mgr->lc_list, node, tmc)) { + if (tmc->proto == proto && tmc->instance == instance) { + ret = release_table_chunk( + tmc->proto, tmc->instance, tmc->start, + tmc->end, zvrf); + if (ret == 0) + count++; + } + } + } + zlog_debug("%s: Released %d table chunks", __func__, count); + + return count; +} + +static void table_range_add(struct zebra_vrf *zvrf, uint32_t start, + uint32_t end) +{ + if (!zvrf->tbl_mgr) + return; + zvrf->tbl_mgr->start = start; + zvrf->tbl_mgr->end = end; +} + +void table_manager_disable(struct zebra_vrf *zvrf) +{ + if (!zvrf->tbl_mgr) + return; + if (!vrf_is_backend_netns() + && strcmp(zvrf_name(zvrf), VRF_DEFAULT_NAME)) { + zvrf->tbl_mgr = NULL; + return; + } + list_delete(&zvrf->tbl_mgr->lc_list); + XFREE(MTYPE_TM_TABLE, zvrf->tbl_mgr); + zvrf->tbl_mgr = NULL; +} + +int table_manager_range(struct vty *vty, bool add, struct zebra_vrf *zvrf, + const char *start_table_str, const char *end_table_str) +{ + uint32_t start; + uint32_t end; + + if (add) { + if (!start_table_str || !end_table_str) { + vty_out(vty, "%% Labels not specified\n"); + return CMD_WARNING_CONFIG_FAILED; + } + start = atoi(start_table_str); + end = atoi(end_table_str); + if (end < start) { + vty_out(vty, "%% End table is less than Start table\n"); + return CMD_WARNING_CONFIG_FAILED; + } + +#if !defined(GNU_LINUX) +/* BSD systems + */ +#else + /* Linux Systems + */ + if ((start >= RT_TABLE_ID_COMPAT && start <= RT_TABLE_ID_LOCAL) + || (end >= RT_TABLE_ID_COMPAT + && end <= RT_TABLE_ID_LOCAL)) { + vty_out(vty, "%% Values forbidden in range [%u;%u]\n", + RT_TABLE_ID_COMPAT, RT_TABLE_ID_LOCAL); + return CMD_WARNING_CONFIG_FAILED; + } + if (start < RT_TABLE_ID_COMPAT && end > RT_TABLE_ID_LOCAL) { + vty_out(vty, + "%% Range overlaps range [%u;%u] forbidden\n", + RT_TABLE_ID_COMPAT, RT_TABLE_ID_LOCAL); + return CMD_WARNING_CONFIG_FAILED; + } +#endif + if (zvrf->tbl_mgr + && ((zvrf->tbl_mgr->start && zvrf->tbl_mgr->start != start) + || (zvrf->tbl_mgr->end && zvrf->tbl_mgr->end != end))) { + vty_out(vty, + "%% New range will be taken into account at restart\n"); + } + table_range_add(zvrf, start, end); + } else + table_range_add(zvrf, 0, 0); + return CMD_SUCCESS; +} diff --git a/zebra/table_manager.h b/zebra/table_manager.h new file mode 100644 index 0000000..f8e99a3 --- /dev/null +++ b/zebra/table_manager.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* zebra table Manager for routing table identifier management + * Copyright (C) 2018 6WIND + */ + +#ifndef _TABLE_MANAGER_H +#define _TABLE_MANAGER_H + +#include + +#include "lib/linklist.h" +#include "frrevent.h" +#include "lib/ns.h" + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Table chunk struct + * Client daemon which the chunk belongs to can be identified by either + * proto (daemon protocol) + instance + VRF. + * If the client then passes a non-empty value to keep field when it requests + * for chunks, the chunks won't be garbage collected and the client will be + * responsible of its release. + * Otherwise, if the keep field is not set (value 0) for the chunk, it will be + * automatically released when the client disconnects or when it reconnects + * (in case it died unexpectedly, we can know it's the same because it will have + * the same proto and instance values) + */ +struct table_manager_chunk { + vrf_id_t vrf_id; + uint8_t proto; + uint16_t instance; + uint32_t start; /* First table RT ID of the chunk */ + uint32_t end; /* Last table RT ID of the chunk */ +}; + +/* + * Main table manager struct + * Holds a linked list of table chunks. + */ +struct table_manager { + struct list *lc_list; + uint32_t start; + uint32_t end; +}; + +void table_manager_enable(struct zebra_vrf *zvrf); +struct table_manager_chunk *assign_table_chunk(uint8_t proto, uint16_t instance, + uint32_t size, + struct zebra_vrf *zvrf); +int release_table_chunk(uint8_t proto, uint16_t instance, uint32_t start, + uint32_t end, struct zebra_vrf *zvrf); +int release_daemon_table_chunks(struct zserv *client); +void table_manager_disable(struct zebra_vrf *zvrf); +int table_manager_range(struct vty *vty, bool add, struct zebra_vrf *zvrf, + const char *min, const char *max); + +#ifdef __cplusplus +} +#endif + +#endif /* _TABLE_MANAGER_H */ diff --git a/zebra/tc_netlink.c b/zebra/tc_netlink.c new file mode 100644 index 0000000..d633c07 --- /dev/null +++ b/zebra/tc_netlink.c @@ -0,0 +1,873 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + */ + +#include + +#ifdef HAVE_NETLINK + +#include +#include +#include +#include + +#include "if.h" +#include "prefix.h" +#include "vrf.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/kernel_netlink.h" +#include "zebra/tc_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_tc.h" +#include "zebra/zebra_trace.h" + +#define TC_FREQ_DEFAULT (100) + +/* some magic number */ +#define TC_QDISC_MAJOR_ZEBRA (0xbeef0000u) +#define TC_MINOR_NOCLASS (0xffffu) + +#define TIME_UNITS_PER_SEC (1000000) +#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r))) + +static uint32_t tc_get_freq(void) +{ + int freq = 0; + FILE *fp = fopen("/proc/net/psched", "r"); + + if (fp) { + uint32_t nom, denom; + + if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) { + if (nom == 1000000) + freq = denom; + } + fclose(fp); + } + + return freq == 0 ? TC_FREQ_DEFAULT : freq; +} + +static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table, + uint32_t mtu) +{ + if (mtu == 0) + mtu = 2047; + + int cell_log = -1; + + if (cell_log < 0) { + cell_log = 0; + while ((mtu >> cell_log) > 255) + cell_log++; + } + + for (int i = 0; i < 256; i++) + table[i] = xmittime(ratespec->rate, (i + 1) << cell_log); + + ratespec->cell_align = -1; + ratespec->cell_log = cell_log; + ratespec->linklayer = TC_LINKLAYER_ETHERNET; +} + +static int tc_flower_get_inet_prefix(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32)); + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val)); + } else { + return -1; + } + + return 0; +} + +static int tc_flower_get_inet_mask(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else { + return -1; + } + + memset(addr->data, 0xff, addr->bytelen); + + int rest = prefix->prefixlen; + + for (int i = 0; i < addr->bytelen / 4; i++) { + if (!rest) { + addr->data[i] = 0; + } else if (rest / 32 >= 1) { + rest -= 32; + } else { + addr->data[i] <<= 32 - rest; + addr->data[i] = htonl(addr->data[i]); + rest = 0; + } + } + + return 0; +} + +/* + * Traffic control queue discipline encoding (only "htb" supported) + */ +static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + const char *kind_str = NULL; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_info = 0; + req->t.tcm_handle = 0; + req->t.tcm_parent = TC_H_ROOT; + + if (cmd == RTM_NEWQDISC) { + req->t.tcm_handle = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, 0); + + kind_str = dplane_ctx_tc_qdisc_get_kind_str(ctx); + + nl_attr_put(&req->n, datalen, TCA_KIND, kind_str, + strlen(kind_str) + 1); + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + switch (dplane_ctx_tc_qdisc_get_kind(ctx)) { + case TC_QDISC_HTB: { + struct tc_htb_glob htb_glob = { + .rate2quantum = 10, + .version = 3, + .defcls = TC_MINOR_NOCLASS}; + nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob, + sizeof(htb_glob)); + break; + } + case TC_QDISC_NOQUEUE: + break; + default: + break; + /* not implemented */ + } + + nl_attr_nest_end(&req->n, nest); + } else { + /* ifindex are enough for del/get qdisc */ + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control class encoding + */ +static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + struct nlsock *nl; + const char *kind_str = NULL; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (op == DPLANE_OP_TC_CLASS_UPDATE) + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + + req->t.tcm_handle = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, + dplane_ctx_tc_class_get_handle(ctx)); + req->t.tcm_parent = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, 0); + req->t.tcm_info = 0; + + kind_str = dplane_ctx_tc_class_get_kind_str(ctx); + + if (op == DPLANE_OP_TC_CLASS_ADD || op == DPLANE_OP_TC_CLASS_UPDATE) { + zlog_debug("netlink tclass encoder: op: %s kind: %s handle: %u", + op == DPLANE_OP_TC_CLASS_UPDATE ? "update" : "add", + kind_str, dplane_ctx_tc_class_get_handle(ctx)); + + nl_attr_put(&req->n, datalen, TCA_KIND, kind_str, + strlen(kind_str) + 1); + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + switch (dplane_ctx_tc_class_get_kind(ctx)) { + case TC_QDISC_HTB: { + struct tc_htb_opt htb_opt = {}; + + uint64_t rate = dplane_ctx_tc_class_get_rate(ctx), + ceil = dplane_ctx_tc_class_get_ceil(ctx); + + uint64_t buffer, cbuffer; + + /* TODO: fetch mtu from interface */ + uint32_t mtu = 1500; + + uint32_t rtab[256]; + uint32_t ctab[256]; + + ceil = MAX(rate, ceil); + + htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate; + htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil; + + buffer = rate / tc_get_freq() + mtu; + cbuffer = ceil / tc_get_freq() + mtu; + + htb_opt.buffer = buffer; + htb_opt.cbuffer = cbuffer; + + tc_calc_rate_table(&htb_opt.rate, rtab, mtu); + tc_calc_rate_table(&htb_opt.ceil, ctab, mtu); + + htb_opt.ceil.mpu = htb_opt.rate.mpu = 0; + htb_opt.ceil.overhead = htb_opt.rate.overhead = 0; + + if (rate >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_RATE64, + &rate, sizeof(rate)); + } + + if (ceil >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, + &ceil, sizeof(ceil)); + } + + nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, + sizeof(htb_opt)); + + nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, + sizeof(rtab)); + nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, + sizeof(ctab)); + break; + } + default: + break; + } + + nl_attr_nest_end(&req->n, nest); + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static int netlink_tfilter_flower_port_type(uint8_t ip_proto, bool src) +{ + if (ip_proto == IPPROTO_TCP) + return src ? TCA_FLOWER_KEY_TCP_SRC : TCA_FLOWER_KEY_TCP_DST; + else if (ip_proto == IPPROTO_UDP) + return src ? TCA_FLOWER_KEY_UDP_SRC : TCA_FLOWER_KEY_UDP_DST; + else if (ip_proto == IPPROTO_SCTP) + return src ? TCA_FLOWER_KEY_SCTP_SRC : TCA_FLOWER_KEY_SCTP_DST; + else + return -1; +} + +static void netlink_tfilter_flower_put_options(struct nlmsghdr *n, + size_t datalen, + struct zebra_dplane_ctx *ctx) +{ + struct inet_prefix addr; + uint32_t flags = 0, classid; + uint8_t protocol = htons(dplane_ctx_tc_filter_get_eth_proto(ctx)); + uint32_t filter_bm = dplane_ctx_tc_filter_get_filter_bm(ctx); + + if (filter_bm & TC_FLOWER_SRC_IP) { + const struct prefix *src_p = + dplane_ctx_tc_filter_get_src_ip(ctx); + + if (tc_flower_get_inet_prefix(src_p, &addr) != 0) + return; + + nl_attr_put(n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC + : TCA_FLOWER_KEY_IPV6_SRC, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(src_p, &addr) != 0) + return; + + nl_attr_put(n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_SRC_MASK + : TCA_FLOWER_KEY_IPV6_SRC_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FLOWER_DST_IP) { + const struct prefix *dst_p = + dplane_ctx_tc_filter_get_dst_ip(ctx); + + if (tc_flower_get_inet_prefix(dst_p, &addr) != 0) + return; + + nl_attr_put(n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST + : TCA_FLOWER_KEY_IPV6_DST, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(dst_p, &addr) != 0) + return; + + nl_attr_put(n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_DST_MASK + : TCA_FLOWER_KEY_IPV6_DST_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FLOWER_IP_PROTOCOL) { + nl_attr_put8(n, datalen, TCA_FLOWER_KEY_IP_PROTO, + dplane_ctx_tc_filter_get_ip_proto(ctx)); + } + + if (filter_bm & TC_FLOWER_SRC_PORT) { + uint16_t min, max; + + min = dplane_ctx_tc_filter_get_src_port_min(ctx); + max = dplane_ctx_tc_filter_get_src_port_max(ctx); + + if (max > min) { + nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_SRC_MIN, + htons(min)); + + nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_SRC_MAX, + htons(max)); + } else { + int type = netlink_tfilter_flower_port_type( + dplane_ctx_tc_filter_get_ip_proto(ctx), true); + + if (type < 0) + return; + + nl_attr_put16(n, datalen, type, htons(min)); + } + } + + if (filter_bm & TC_FLOWER_DST_PORT) { + uint16_t min = dplane_ctx_tc_filter_get_dst_port_min(ctx), + max = dplane_ctx_tc_filter_get_dst_port_max(ctx); + + if (max > min) { + nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_DST_MIN, + htons(min)); + + nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_DST_MAX, + htons(max)); + } else { + int type = netlink_tfilter_flower_port_type( + dplane_ctx_tc_filter_get_ip_proto(ctx), false); + + if (type < 0) + return; + + nl_attr_put16(n, datalen, type, htons(min)); + } + } + + if (filter_bm & TC_FLOWER_DSFIELD) { + nl_attr_put8(n, datalen, TCA_FLOWER_KEY_IP_TOS, + dplane_ctx_tc_filter_get_dsfield(ctx)); + nl_attr_put8(n, datalen, TCA_FLOWER_KEY_IP_TOS_MASK, + dplane_ctx_tc_filter_get_dsfield_mask(ctx)); + } + + classid = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, + dplane_ctx_tc_filter_get_classid(ctx)); + nl_attr_put32(n, datalen, TCA_FLOWER_CLASSID, classid); + + nl_attr_put32(n, datalen, TCA_FLOWER_FLAGS, flags); + + nl_attr_put16(n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol); +} + +/* + * Traffic control filter encoding + */ +static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + struct nlsock *nl; + const char *kind_str = NULL; + + struct rtattr *nest; + + uint16_t priority; + uint16_t protocol; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (op == DPLANE_OP_TC_FILTER_UPDATE) + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + + priority = dplane_ctx_tc_filter_get_priority(ctx); + protocol = htons(dplane_ctx_tc_filter_get_eth_proto(ctx)); + + req->t.tcm_info = TC_H_MAKE(priority << 16, protocol); + req->t.tcm_handle = dplane_ctx_tc_filter_get_handle(ctx); + req->t.tcm_parent = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, 0); + + kind_str = dplane_ctx_tc_filter_get_kind_str(ctx); + + if (op == DPLANE_OP_TC_FILTER_ADD || op == DPLANE_OP_TC_FILTER_UPDATE) { + nl_attr_put(&req->n, datalen, TCA_KIND, kind_str, + strlen(kind_str) + 1); + + zlog_debug( + "netlink tfilter encoder: op: %s priority: %u protocol: %u kind: %s handle: %u filter_bm: %u ip_proto: %u", + op == DPLANE_OP_TC_FILTER_UPDATE ? "update" : "add", + priority, protocol, kind_str, + dplane_ctx_tc_filter_get_handle(ctx), + dplane_ctx_tc_filter_get_filter_bm(ctx), + dplane_ctx_tc_filter_get_ip_proto(ctx)); + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + switch (dplane_ctx_tc_filter_get_kind(ctx)) { + case TC_FILTER_FLOWER: { + netlink_tfilter_flower_put_options(&req->n, datalen, + ctx); + break; + } + default: + break; + } + nl_attr_nest_end(&req->n, nest); + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen); +} + +static ssize_t netlink_delqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_qdisc_msg_encode(RTM_DELQDISC, ctx, buf, buflen); +} + +static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen); +} + +static ssize_t netlink_deltclass_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tclass_msg_encode(RTM_DELTCLASS, ctx, buf, buflen); +} + +static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen); +} + +static ssize_t netlink_deltfilter_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tfilter_msg_encode(RTM_DELTFILTER, ctx, buf, buflen); +} + +enum netlink_msg_status +netlink_put_tc_qdisc_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + + op = dplane_ctx_get_op(ctx); + + if (op == DPLANE_OP_TC_QDISC_INSTALL) { + ret = netlink_batch_add_msg( + bth, ctx, netlink_newqdisc_msg_encoder, false); + } else if (op == DPLANE_OP_TC_QDISC_UNINSTALL) { + ret = netlink_batch_add_msg( + bth, ctx, netlink_delqdisc_msg_encoder, false); + } else { + return FRR_NETLINK_ERROR; + } + + return ret; +} + +enum netlink_msg_status +netlink_put_tc_class_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + + op = dplane_ctx_get_op(ctx); + + if (op == DPLANE_OP_TC_CLASS_ADD || op == DPLANE_OP_TC_CLASS_UPDATE) { + ret = netlink_batch_add_msg( + bth, ctx, netlink_newtclass_msg_encoder, false); + } else if (op == DPLANE_OP_TC_CLASS_DELETE) { + ret = netlink_batch_add_msg( + bth, ctx, netlink_deltclass_msg_encoder, false); + } else { + return FRR_NETLINK_ERROR; + } + + return ret; +} + +enum netlink_msg_status +netlink_put_tc_filter_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + + op = dplane_ctx_get_op(ctx); + + if (op == DPLANE_OP_TC_FILTER_ADD) { + ret = netlink_batch_add_msg( + bth, ctx, netlink_newtfilter_msg_encoder, false); + } else if (op == DPLANE_OP_TC_FILTER_UPDATE) { + /* + * Replace will fail if either filter type or the number of + * filter options is changed, so DEL then NEW + * + * TFILTER may have refs to TCLASS. + */ + + (void)netlink_batch_add_msg( + bth, ctx, netlink_deltfilter_msg_encoder, false); + ret = netlink_batch_add_msg( + bth, ctx, netlink_newtfilter_msg_encoder, false); + } else if (op == DPLANE_OP_TC_FILTER_DELETE) { + ret = netlink_batch_add_msg( + bth, ctx, netlink_deltfilter_msg_encoder, false); + } else { + return FRR_NETLINK_ERROR; + } + + return ret; +} + +/* + * Request filters from the kernel + */ +static int netlink_request_filters(struct zebra_ns *zns, int family, int type, + ifindex_t ifindex) +{ + struct { + struct nlmsghdr n; + struct tcmsg tc; + } req; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.tc.tcm_family = family; + req.tc.tcm_ifindex = ifindex; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* + * Request queue discipline from the kernel + */ +static int netlink_request_qdiscs(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct tcmsg tc; + } req; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.tc.tcm_family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + +int netlink_qdisc_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct tcmsg *tcm; + struct zebra_tc_qdisc qdisc = {}; + enum tc_qdisc_kind kind = TC_QDISC_UNSPEC; + const char *kind_str = "Unknown"; + + int len; + struct rtattr *tb[TCA_MAX + 1]; + + frrtrace(3, frr_zebra, netlink_tc_qdisc_change, h, ns_id, startup); + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct tcmsg)); + + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct tcmsg))); + return -1; + } + + tcm = NLMSG_DATA(h); + netlink_parse_rtattr(tb, TCA_MAX, TCA_RTA(tcm), len); + + if (RTA_DATA(tb[TCA_KIND])) { + kind_str = (const char *)RTA_DATA(tb[TCA_KIND]); + + kind = tc_qdisc_str2kind(kind_str); + } + + qdisc.qdisc.ifindex = tcm->tcm_ifindex; + + switch (kind) { + case TC_QDISC_NOQUEUE: + /* "noqueue" is the default qdisc */ + break; + case TC_QDISC_HTB: + case TC_QDISC_UNSPEC: + break; + } + + if (tb[TCA_OPTIONS] != NULL) { + struct rtattr *options[TCA_HTB_MAX + 1]; + + netlink_parse_rtattr_nested(options, TCA_HTB_MAX, + tb[TCA_OPTIONS]); + + /* TODO: more details */ + /* struct tc_htb_glob *glob = RTA_DATA(options[TCA_HTB_INIT]); + */ + } + + if (h->nlmsg_type == RTM_NEWQDISC) { + if (startup && + TC_H_MAJ(tcm->tcm_handle) == TC_QDISC_MAJOR_ZEBRA) { + enum zebra_dplane_result ret; + + ret = dplane_tc_qdisc_uninstall(&qdisc); + + zlog_debug("%s: %s leftover qdisc: ifindex %d kind %s", + __func__, + ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) + ? "Failed to remove" + : "Removed"), + qdisc.qdisc.ifindex, kind_str); + } + } + + return 0; +} + +int netlink_tclass_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct tcmsg *tcm; + + int len; + struct rtattr *tb[TCA_MAX + 1]; + + frrtrace(3, frr_zebra, netlink_tc_class_change, h, ns_id, startup); + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct tcmsg)); + + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct tcmsg))); + return -1; + } + + tcm = NLMSG_DATA(h); + netlink_parse_rtattr(tb, TCA_MAX, TCA_RTA(tcm), len); + + + if (tb[TCA_OPTIONS] != NULL) { + struct rtattr *options[TCA_HTB_MAX + 1]; + + netlink_parse_rtattr_nested(options, TCA_HTB_MAX, + tb[TCA_OPTIONS]); + + /* TODO: more details */ + /* struct tc_htb_opt *opt = RTA_DATA(options[TCA_HTB_PARMS]); */ + } + + return 0; +} + +int netlink_tfilter_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct tcmsg *tcm; + + int len; + struct rtattr *tb[TCA_MAX + 1]; + + frrtrace(3, frr_zebra, netlink_tc_filter_change, h, ns_id, startup); + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct tcmsg)); + + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct tcmsg))); + return -1; + } + + tcm = NLMSG_DATA(h); + netlink_parse_rtattr(tb, TCA_MAX, TCA_RTA(tcm), len); + + return 0; +} + +int netlink_qdisc_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true); + + ret = netlink_request_qdiscs(zns, AF_UNSPEC, RTM_GETQDISC); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_qdisc_change, &zns->netlink_cmd, + &dp_info, 0, true); + if (ret < 0) + return ret; + + return 0; +} + +int netlink_tfilter_read_for_interface(struct zebra_ns *zns, ifindex_t ifindex) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true); + + ret = netlink_request_filters(zns, AF_UNSPEC, RTM_GETTFILTER, ifindex); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_tfilter_change, &zns->netlink_cmd, + &dp_info, 0, true); + if (ret < 0) + return ret; + + return 0; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/tc_netlink.h b/zebra/tc_netlink.h new file mode 100644 index 0000000..5e95e6c --- /dev/null +++ b/zebra/tc_netlink.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + */ + +#ifndef _ZEBRA_TC_NETLINK_H +#define _ZEBRA_TC_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +/* Represent a prefixed address in flower filter */ + +struct inet_prefix { + uint16_t flags; + uint16_t bytelen; + uint16_t bitlen; + uint16_t family; + uint32_t data[64]; +}; + +enum { + PREFIXLEN_SPECIFIED = (1 << 0), + ADDRTYPE_INET = (1 << 1), + ADDRTYPE_UNSPEC = (1 << 2), + ADDRTYPE_MULTI = (1 << 3), + + ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC, + ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI +}; + +extern enum netlink_msg_status +netlink_put_tc_qdisc_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_tc_class_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_tc_filter_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); + +/** + * "filter" & "class" in the following become "tfilter" & "tclass" for + * the sake of consistency with kernel message types (RTM_NEWTFILTER etc.) + */ + +extern int netlink_qdisc_read(struct zebra_ns *zns); +extern int netlink_tfilter_read_for_interface(struct zebra_ns *zns, + ifindex_t ifindex); + +extern int netlink_tfilter_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +extern int netlink_tclass_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +extern int netlink_qdisc_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); + + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_TC_NETLINK_H */ diff --git a/zebra/tc_socket.c b/zebra/tc_socket.c new file mode 100644 index 0000000..1bd69ba --- /dev/null +++ b/zebra/tc_socket.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra Traffic Control (TC) interaction with the kernel using socket. + * + * Copyright (C) 2022 Shichu Yang + */ + +#include + +#ifndef HAVE_NETLINK + +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_tc.h" + +enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/testrib.conf b/zebra/testrib.conf new file mode 100644 index 0000000..0df7dc2 --- /dev/null +++ b/zebra/testrib.conf @@ -0,0 +1,76 @@ +! +! Zebra configuration saved from vty +! 2007/04/01 17:46:48 +! +password foo +log stdout +service advanced-vty +! +debug zebra rib +debug zebra kernel +! +interface eth0 + ip address 10.0.0.1/24 + ipv6 address 1::0:1/64 + state up +! +interface eth1 + ip address 10.0.1.1/24 + ipv6 address 1::1:1/64 +! +interface eth2 + ip address 10.0.2.1/24 + ipv6 address 1::2:1/64 +! +! Unnumbered +interface foo1 + ip address 192.168.1.1/32 + ipv6 address 2::1:1/128 +! +interface foo0 + ip address 192.168.1.1/32 + ip address 192.168.1.1/24 label foo + ipv6 address 2::1:1/128 + state up +! + +! statics that should be subsumed by connected routes, according to interface +! state +ip route 10.0.0.0/24 10.0.1.254 +ip route 10.0.1.0/24 10.0.2.254 +ip route 10.0.2.0/24 10.0.0.254 +ipv6 route 1::0:0/64 1::1:f +ipv6 route 1::1:0/64 1::2:f +ipv6 route 1::2:0/64 1::0:f + +! null route +ip route 10.1.0.1/32 null0 +ipv6 route 100::1:1/128 null0 + +! normalish routes +ip route 1.1.2.0/24 10.0.0.2 +ipv6 route 80::/64 1::0:e + +! different admin distances +ip route 1.1.0.2/32 10.0.0.3 10 +ip route 1.1.0.2/32 10.0.0.4 20 +ip route 1.1.0.2/32 10.0.1.3 30 + +ipv6 route 90::1/128 1::0:a 10 +ipv6 route 90::1/128 1::0:b 20 +ipv6 route 90::1/128 1::1:c 30 + +! multiple-nexthop + distance +ip route 1.1.0.2/32 10.0.0.5 10 +ipv6 route 90::1/128 1::0:d 10 + +! a recursive route, potentially. +ip route 1.1.3.0/24 10.0.0.2 +! double recursive, potentially +ip route 1.1.0.1/32 1.1.3.1 +! +ip route 1.1.1.0/24 1.1.2.2 + +line vty + exec-timeout 0 0 +! diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c new file mode 100644 index 0000000..6754238 --- /dev/null +++ b/zebra/zapi_msg.c @@ -0,0 +1,3972 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra API message creation & consumption. + * Portions: + * Copyright (C) 1997-1999 Kunihiro Ishiguro + * Copyright (C) 2015-2018 Cumulus Networks, Inc. + * et al. + * Copyright (c) 2021 The MITRE Corporation. + * Copyright (c) 2023 LabN Consulting, L.L.C. + */ + +#include +#include + +#include "lib/prefix.h" +#include "lib/stream.h" +#include "lib/memory.h" +#include "lib/table.h" +#include "lib/network.h" +#include "lib/log.h" +#include "lib/zclient.h" +#include "lib/privs.h" +#include "lib/nexthop.h" +#include "lib/vrf.h" +#include "lib/libfrr.h" +#include "lib/lib_errors.h" + +#include "zebra/zebra_router.h" +#include "zebra/rib.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/router-id.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/zebra_rnh.h" +#include "zebra/interface.h" +#include "zebra/zebra_ptm.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_mroute.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/rt.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_tc.h" +#include "zebra/table_manager.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_mlag.h" +#include "zebra/connected.h" +#include "zebra/zebra_opaque.h" +#include "zebra/zebra_srte.h" +#include "zebra/zebra_srv6.h" + +DEFINE_MTYPE_STATIC(ZEBRA, RE_OPAQUE, "Route Opaque Data"); + +static int zapi_nhg_decode(struct stream *s, int cmd, struct zapi_nhg *api_nhg); + +/* Encoding helpers -------------------------------------------------------- */ + +static void zserv_encode_interface(struct stream *s, struct interface *ifp) +{ + /* Interface information. */ + struct zebra_if *zif = ifp->info; + + stream_put(s, ifp->name, INTERFACE_NAMSIZ); + stream_putl(s, ifp->ifindex); + stream_putc(s, ifp->status); + stream_putq(s, ifp->flags); + stream_putc(s, ifp->ptm_enable); + stream_putc(s, ifp->ptm_status); + stream_putl(s, ifp->metric); + stream_putl(s, ifp->speed); + stream_putl(s, ifp->txqlen); + stream_putl(s, ifp->mtu); + stream_putl(s, ifp->mtu6); + stream_putl(s, ifp->bandwidth); + stream_putl(s, zif->link_ifindex); + stream_putl(s, ifp->ll_type); + stream_putl(s, ifp->hw_addr_len); + if (ifp->hw_addr_len) + stream_put(s, ifp->hw_addr, ifp->hw_addr_len); + + /* Then, Traffic Engineering parameters if any */ + if (HAS_LINK_PARAMS(ifp) && IS_LINK_PARAMS_SET(ifp->link_params)) { + stream_putc(s, 1); + zebra_interface_link_params_write(s, ifp); + } else + stream_putc(s, 0); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); +} + +static void zserv_encode_vrf(struct stream *s, struct zebra_vrf *zvrf) +{ + struct vrf_data data; + const char *netns_name = zvrf_ns_name(zvrf); + + memset(&data, 0, sizeof(data)); + data.l.table_id = zvrf->table_id; + + if (netns_name) + strlcpy(data.l.netns_name, basename((char *)netns_name), + NS_NAMSIZ); + else + memset(data.l.netns_name, 0, NS_NAMSIZ); + /* Pass the tableid and the netns NAME */ + stream_put(s, &data, sizeof(struct vrf_data)); + /* Interface information. */ + stream_put(s, zvrf_name(zvrf), VRF_NAMSIZ); + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); +} + +static int zserv_encode_nexthop(struct stream *s, struct nexthop *nexthop) +{ + stream_putl(s, nexthop->vrf_id); + stream_putc(s, nexthop->type); + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + stream_put_in_addr(s, &nexthop->gate.ipv4); + stream_putl(s, nexthop->ifindex); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + stream_put(s, &nexthop->gate.ipv6, 16); + stream_putl(s, nexthop->ifindex); + break; + case NEXTHOP_TYPE_IFINDEX: + stream_putl(s, nexthop->ifindex); + break; + case NEXTHOP_TYPE_BLACKHOLE: + /* do nothing */ + break; + } + return 1; +} + +/* + * Zebra error addition adds error type. + * + * + * 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | enum zebra_error_types | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ +static void zserv_encode_error(struct stream *s, enum zebra_error_types error) +{ + stream_put(s, &error, sizeof(error)); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); +} + +/* Send handlers ----------------------------------------------------------- */ + +/* Interface is added. Send ZEBRA_INTERFACE_ADD to client. */ +/* + * This function is called in the following situations: + * - in response to a 3-byte ZEBRA_INTERFACE_ADD request + * from the client. + * - at startup, when zebra figures out the available interfaces + * - when an interface is added (where support for + * RTM_IFANNOUNCE or AF_NETLINK sockets is available), or when + * an interface is marked IFF_UP (i.e., an RTM_IFINFO message is + * received) + */ +int zsend_interface_add(struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_INTERFACE_ADD, ifp->vrf->vrf_id); + zserv_encode_interface(s, ifp); + + client->ifadd_cnt++; + return zserv_send_message(client, s); +} + +/* Interface deletion from zebra daemon. */ +int zsend_interface_delete(struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_INTERFACE_DELETE, ifp->vrf->vrf_id); + zserv_encode_interface(s, ifp); + + client->ifdel_cnt++; + return zserv_send_message(client, s); +} + +int zsend_vrf_add(struct zserv *client, struct zebra_vrf *zvrf) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_VRF_ADD, zvrf_id(zvrf)); + zserv_encode_vrf(s, zvrf); + + client->vrfadd_cnt++; + return zserv_send_message(client, s); +} + +/* VRF deletion from zebra daemon. */ +int zsend_vrf_delete(struct zserv *client, struct zebra_vrf *zvrf) + +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_VRF_DELETE, zvrf_id(zvrf)); + zserv_encode_vrf(s, zvrf); + + client->vrfdel_cnt++; + return zserv_send_message(client, s); +} + +int zsend_interface_link_params(struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_INTERFACE_LINK_PARAMS, ifp->vrf->vrf_id); + + /* Add Interface Index */ + stream_putl(s, ifp->ifindex); + + /* Then TE Link Parameters */ + if (zebra_interface_link_params_write(s, ifp) == 0) { + stream_free(s); + return 0; + } + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Interface address is added/deleted. Send ZEBRA_INTERFACE_ADDRESS_ADD or + * ZEBRA_INTERFACE_ADDRESS_DELETE to the client. + * + * A ZEBRA_INTERFACE_ADDRESS_ADD is sent in the following situations: + * - in response to a 3-byte ZEBRA_INTERFACE_ADD request + * from the client, after the ZEBRA_INTERFACE_ADD has been + * sent from zebra to the client + * - redistribute new address info to all clients in the following situations + * - at startup, when zebra figures out the available interfaces + * - when an interface is added (where support for + * RTM_IFANNOUNCE or AF_NETLINK sockets is available), or when + * an interface is marked IFF_UP (i.e., an RTM_IFINFO message is + * received) + * - for the vty commands "ip address A.B.C.D/M [