diff options
Diffstat (limited to '')
152 files changed, 112526 insertions, 0 deletions
diff --git a/zebra/.gitignore b/zebra/.gitignore new file mode 100644 index 0000000..41a86e7 --- /dev/null +++ b/zebra/.gitignore @@ -0,0 +1,3 @@ +zebra +zebra.conf +client diff --git a/zebra/Makefile b/zebra/Makefile new file mode 100644 index 0000000..625a716 --- /dev/null +++ b/zebra/Makefile @@ -0,0 +1,10 @@ +all: ALWAYS + @$(MAKE) -s -C .. zebra/zebra +%: ALWAYS + @$(MAKE) -s -C .. zebra/$@ + +Makefile: + #nothing +ALWAYS: +.PHONY: ALWAYS makefiles +.SUFFIXES: diff --git a/zebra/connected.c b/zebra/connected.c new file mode 100644 index 0000000..c01be58 --- /dev/null +++ b/zebra/connected.c @@ -0,0 +1,639 @@ +/* + * Address linked list routine. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "prefix.h" +#include "linklist.h" +#include "if.h" +#include "table.h" +#include "rib.h" +#include "table.h" +#include "log.h" +#include "memory.h" + +#include "vty.h" +#include "zebra/debug.h" +#include "zebra/zserv.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/connected.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_router.h" + +/* communicate the withdrawal of a connected address */ +static void connected_withdraw(struct connected *ifc) +{ + if (!ifc) + return; + + /* Update interface address information to protocol daemon. */ + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) { + zebra_interface_address_delete_update(ifc->ifp, ifc); + + if (ifc->address->family == AF_INET) + if_subnet_delete(ifc->ifp, ifc); + + connected_down(ifc->ifp, ifc); + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + } + + /* The address is not in the kernel anymore, so clear the flag */ + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) { + listnode_delete(ifc->ifp->connected, ifc); + connected_free(&ifc); + } +} + +static void connected_announce(struct interface *ifp, struct connected *ifc) +{ + if (!ifc) + return; + + if (!if_is_loopback(ifp) && ifc->address->family == AF_INET) { + if (ifc->address->prefixlen == IPV4_MAX_BITLEN) + SET_FLAG(ifc->flags, ZEBRA_IFA_UNNUMBERED); + else + UNSET_FLAG(ifc->flags, ZEBRA_IFA_UNNUMBERED); + } + + listnode_add(ifp->connected, ifc); + + /* Update interface address information to protocol daemon. */ + if (ifc->address->family == AF_INET) + if_subnet_add(ifp, ifc); + + zebra_interface_address_add_update(ifp, ifc); + + if (if_is_operative(ifp)) { + connected_up(ifp, ifc); + } +} + +/* If same interface address is already exist... */ +struct connected *connected_check(struct interface *ifp, + union prefixconstptr pu) +{ + const struct prefix *p = pu.p; + struct connected *ifc; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) + if (prefix_same(ifc->address, p)) + return ifc; + + return NULL; +} + +/* same, but with peer address */ +struct connected *connected_check_ptp(struct interface *ifp, + union prefixconstptr pu, + union prefixconstptr du) +{ + const struct prefix *p = pu.p; + const struct prefix *d = du.p; + struct connected *ifc; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) { + if (!prefix_same(ifc->address, p)) + continue; + if (!CONNECTED_PEER(ifc) && !d) + return ifc; + if (CONNECTED_PEER(ifc) && d + && prefix_same(ifc->destination, d)) + return ifc; + } + + return NULL; +} + +/* Check if two ifc's describe the same address in the same state */ +static int connected_same(struct connected *ifc1, struct connected *ifc2) +{ + if (ifc1->ifp != ifc2->ifp) + return 0; + + if (ifc1->flags != ifc2->flags) + return 0; + + if (ifc1->conf != ifc2->conf) + return 0; + + if (ifc1->destination) + if (!ifc2->destination) + return 0; + if (ifc2->destination) + if (!ifc1->destination) + return 0; + + if (ifc1->destination && ifc2->destination) + if (!prefix_same(ifc1->destination, ifc2->destination)) + return 0; + + return 1; +} + +/* Handle changes to addresses and send the neccesary announcements + * to clients. */ +static void connected_update(struct interface *ifp, struct connected *ifc) +{ + struct connected *current; + + /* Check same connected route. */ + current = connected_check_ptp(ifp, ifc->address, ifc->destination); + if (current) { + if (CHECK_FLAG(current->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* Avoid spurious withdraws, this might be just the kernel + * 'reflecting' + * back an address we have already added. + */ + if (connected_same(current, ifc)) { + /* nothing to do */ + connected_free(&ifc); + return; + } + + /* Clear the configured flag on the old ifc, so it will be freed + * by + * connected withdraw. */ + UNSET_FLAG(current->conf, ZEBRA_IFC_CONFIGURED); + connected_withdraw( + current); /* implicit withdraw - freebsd does this */ + } + + /* If the connected is new or has changed, announce it, if it is usable + */ + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + connected_announce(ifp, ifc); +} + +/* Called from if_up(). */ +void connected_up(struct interface *ifp, struct connected *ifc) +{ + afi_t afi; + struct prefix p; + struct nexthop nh = { + .type = NEXTHOP_TYPE_IFINDEX, + .ifindex = ifp->ifindex, + .vrf_id = ifp->vrf->vrf_id, + }; + struct zebra_vrf *zvrf; + uint32_t metric; + uint32_t flags = 0; + uint32_t count = 0; + struct listnode *cnode; + struct connected *c; + + zvrf = ifp->vrf->info; + if (!zvrf) { + flog_err( + EC_ZEBRA_VRF_NOT_FOUND, + "%s: Received Up for interface but no associated zvrf: %s(%d)", + __func__, ifp->vrf->name, ifp->vrf->vrf_id); + return; + } + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + return; + + /* Ensure 'down' flag is cleared */ + UNSET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + prefix_copy(&p, CONNECTED_PREFIX(ifc)); + + /* Apply mask to the network. */ + apply_mask(&p); + + afi = family2afi(p.family); + + switch (afi) { + case AFI_IP: + /* + * In case of connected address is 0.0.0.0/0 we treat it tunnel + * address. + */ + if (prefix_ipv4_any((struct prefix_ipv4 *)&p)) + return; + break; + case AFI_IP6: +#ifndef GNU_LINUX + /* XXX: It is already done by rib_bogus_ipv6 within rib_add */ + if (IN6_IS_ADDR_UNSPECIFIED(&p.u.prefix6)) + return; +#endif + break; + default: + flog_warn(EC_ZEBRA_CONNECTED_AFI_UNKNOWN, + "Received unknown AFI: %s", afi2str(afi)); + return; + break; + } + + metric = (ifc->metric < (uint32_t)METRIC_MAX) ? + ifc->metric : ifp->metric; + + /* + * Since we are hand creating the connected routes + * in our main routing table, *if* we are working + * in an offloaded environment then we need to + * pretend like the route is offloaded so everything + * else will work + */ + if (zrouter.asic_offloaded) + flags |= ZEBRA_FLAG_OFFLOADED; + + /* + * It's possible to add the same network and mask + * to an interface over and over. This would + * result in an equivalent number of connected + * routes. Just add one connected route in + * for all the addresses on an interface that + * resolve to the same network and mask + */ + for (ALL_LIST_ELEMENTS_RO(ifp->connected, cnode, c)) { + struct prefix cp; + + prefix_copy(&cp, CONNECTED_PREFIX(c)); + apply_mask(&cp); + + if (prefix_same(&cp, &p) && + !CHECK_FLAG(c->conf, ZEBRA_IFC_DOWN)) + count++; + + if (count >= 2) + return; + } + + rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0, + false); + + rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0, + false); + + /* Schedule LSP forwarding entries for processing, if appropriate. */ + if (zvrf->vrf->vrf_id == VRF_DEFAULT) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%u: IF %s IP %pFX address add/up, scheduling MPLS processing", + zvrf->vrf->vrf_id, ifp->name, &p); + mpls_mark_lsps_for_processing(zvrf, &p); + } +} + +/* Add connected IPv4 route to the interface. */ +void connected_add_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, uint16_t prefixlen, + const struct in_addr *dest, const char *label, + uint32_t metric) +{ + struct prefix_ipv4 *p; + struct connected *ifc; + + if (ipv4_martian(addr)) + return; + + /* Make connected structure. */ + ifc = connected_new(); + ifc->ifp = ifp; + ifc->flags = flags; + ifc->metric = metric; + /* If we get a notification from the kernel, + * we can safely assume the address is known to the kernel */ + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + if (!if_is_operative(ifp)) + SET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + /* Allocate new connected address. */ + p = prefix_ipv4_new(); + p->family = AF_INET; + p->prefix = *addr; + p->prefixlen = + CHECK_FLAG(flags, ZEBRA_IFA_PEER) ? IPV4_MAX_BITLEN : prefixlen; + ifc->address = (struct prefix *)p; + + /* If there is a peer address. */ + if (CONNECTED_PEER(ifc)) { + /* validate the destination address */ + if (dest) { + p = prefix_ipv4_new(); + p->family = AF_INET; + p->prefix = *dest; + p->prefixlen = prefixlen; + ifc->destination = (struct prefix *)p; + + if (IPV4_ADDR_SAME(addr, dest)) + flog_warn( + EC_ZEBRA_IFACE_SAME_LOCAL_AS_PEER, + "interface %s has same local and peer address %pI4, routing protocols may malfunction", + ifp->name, addr); + } else { + zlog_debug( + "%s called for interface %s with peer flag set, but no peer address supplied", + __func__, ifp->name); + UNSET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + } + } + + /* no destination address was supplied */ + if (!dest && (prefixlen == IPV4_MAX_BITLEN) && if_is_pointopoint(ifp)) + zlog_debug( + "PtP interface %s with addr %pI4/%d needs a peer address", + ifp->name, addr, prefixlen); + + /* Label of this address. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* For all that I know an IPv4 address is always ready when we receive + * the notification. So it should be safe to set the REAL flag here. */ + SET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + + connected_update(ifp, ifc); +} + +void connected_down(struct interface *ifp, struct connected *ifc) +{ + afi_t afi; + struct prefix p; + struct nexthop nh = { + .type = NEXTHOP_TYPE_IFINDEX, + .ifindex = ifp->ifindex, + .vrf_id = ifp->vrf->vrf_id, + }; + struct zebra_vrf *zvrf; + uint32_t count = 0; + struct listnode *cnode; + struct connected *c; + + zvrf = ifp->vrf->info; + if (!zvrf) { + flog_err( + EC_ZEBRA_VRF_NOT_FOUND, + "%s: Received Down for interface but no associated zvrf: %s(%d)", + __func__, ifp->vrf->name, ifp->vrf->vrf_id); + return; + } + + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + return; + + /* Skip if we've already done this; this can happen if we have a + * config change that takes an interface down, then we receive kernel + * notifications about the downed interface and its addresses. + */ + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_DOWN)) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: ifc %p, %pFX already DOWN", + __func__, ifc, ifc->address); + return; + } + + prefix_copy(&p, CONNECTED_PREFIX(ifc)); + + /* Apply mask to the network. */ + apply_mask(&p); + + afi = family2afi(p.family); + + switch (afi) { + case AFI_IP: + /* + * In case of connected address is 0.0.0.0/0 we treat it tunnel + * address. + */ + if (prefix_ipv4_any((struct prefix_ipv4 *)&p)) + return; + break; + case AFI_IP6: + if (IN6_IS_ADDR_UNSPECIFIED(&p.u.prefix6)) + return; + break; + default: + zlog_warn("Unknown AFI: %s", afi2str(afi)); + break; + } + + /* Mark the address as 'down' */ + SET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + /* + * It's possible to have X number of addresses + * on a interface that all resolve to the same + * network and mask. Find them and just + * allow the deletion when are removing the last + * one. + */ + for (ALL_LIST_ELEMENTS_RO(ifp->connected, cnode, c)) { + struct prefix cp; + + prefix_copy(&cp, CONNECTED_PREFIX(c)); + apply_mask(&cp); + + if (prefix_same(&p, &cp) && + !CHECK_FLAG(c->conf, ZEBRA_IFC_DOWN)) + count++; + + if (count >= 1) + return; + } + + /* + * Same logic as for connected_up(): push the changes into the + * head. + */ + rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, + 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); + + rib_delete(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, + 0, 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); + + /* Schedule LSP forwarding entries for processing, if appropriate. */ + if (zvrf->vrf->vrf_id == VRF_DEFAULT) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%u: IF %s IP %pFX address down, scheduling MPLS processing", + zvrf->vrf->vrf_id, ifp->name, &p); + mpls_mark_lsps_for_processing(zvrf, &p); + } +} + +static void connected_delete_helper(struct connected *ifc, struct prefix *p) +{ + struct interface *ifp; + + if (!ifc) + return; + ifp = ifc->ifp; + + connected_withdraw(ifc); + + /* Schedule LSP forwarding entries for processing, if appropriate. */ + if (ifp->vrf->vrf_id == VRF_DEFAULT) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%u: IF %s IP %pFX address delete, scheduling MPLS processing", + ifp->vrf->vrf_id, ifp->name, p); + mpls_mark_lsps_for_processing(ifp->vrf->info, p); + } +} + +/* Delete connected IPv4 route to the interface. */ +void connected_delete_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, uint16_t prefixlen, + const struct in_addr *dest) +{ + struct prefix p, d; + struct connected *ifc; + + memset(&p, 0, sizeof(p)); + p.family = AF_INET; + p.u.prefix4 = *addr; + p.prefixlen = + CHECK_FLAG(flags, ZEBRA_IFA_PEER) ? IPV4_MAX_BITLEN : prefixlen; + + if (dest) { + memset(&d, 0, sizeof(d)); + d.family = AF_INET; + d.u.prefix4 = *dest; + d.prefixlen = prefixlen; + ifc = connected_check_ptp(ifp, &p, &d); + } else + ifc = connected_check_ptp(ifp, &p, NULL); + + connected_delete_helper(ifc, &p); +} + +/* Add connected IPv6 route to the interface. */ +void connected_add_ipv6(struct interface *ifp, int flags, + const struct in6_addr *addr, + const struct in6_addr *dest, uint16_t prefixlen, + const char *label, uint32_t metric) +{ + struct prefix_ipv6 *p; + struct connected *ifc; + + if (ipv6_martian(addr)) + return; + + /* Make connected structure. */ + ifc = connected_new(); + ifc->ifp = ifp; + ifc->flags = flags; + ifc->metric = metric; + /* If we get a notification from the kernel, + * we can safely assume the address is known to the kernel */ + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + if (!if_is_operative(ifp)) + SET_FLAG(ifc->conf, ZEBRA_IFC_DOWN); + + /* Allocate new connected address. */ + p = prefix_ipv6_new(); + p->family = AF_INET6; + IPV6_ADDR_COPY(&p->prefix, addr); + p->prefixlen = prefixlen; + ifc->address = (struct prefix *)p; + + /* Add global ipv6 address to the RA prefix list */ + if (!IN6_IS_ADDR_LINKLOCAL(&p->prefix)) + rtadv_add_prefix(ifp->info, p); + + if (dest) { + p = prefix_ipv6_new(); + p->family = AF_INET6; + IPV6_ADDR_COPY(&p->prefix, dest); + p->prefixlen = prefixlen; + ifc->destination = (struct prefix *)p; + } else { + if (CHECK_FLAG(ifc->flags, ZEBRA_IFA_PEER)) { + zlog_debug( + "%s called for interface %s with peer flag set, but no peer address supplied", + __func__, ifp->name); + UNSET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + } + } + + /* Label of this address. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* On Linux, we only get here when DAD is complete, therefore we can set + * ZEBRA_IFC_REAL. + * + * On BSD, there currently doesn't seem to be a way to check for + * completion of + * DAD, so we replicate the old behaviour and set ZEBRA_IFC_REAL, + * although DAD + * might still be running. + */ + SET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + connected_update(ifp, ifc); +} + +void connected_delete_ipv6(struct interface *ifp, + const struct in6_addr *address, + const struct in6_addr *dest, uint16_t prefixlen) +{ + struct prefix p, d; + struct connected *ifc; + + memset(&p, 0, sizeof(p)); + p.family = AF_INET6; + memcpy(&p.u.prefix6, address, sizeof(struct in6_addr)); + p.prefixlen = prefixlen; + + /* Delete global ipv6 address from RA prefix list */ + if (!IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6)) + rtadv_delete_prefix(ifp->info, &p); + + if (dest) { + memset(&d, 0, sizeof(d)); + d.family = AF_INET6; + IPV6_ADDR_COPY(&d.u.prefix6, dest); + d.prefixlen = prefixlen; + ifc = connected_check_ptp(ifp, &p, &d); + } else + ifc = connected_check_ptp(ifp, &p, NULL); + + connected_delete_helper(ifc, &p); +} + +int connected_is_unnumbered(struct interface *ifp) +{ + struct connected *connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && connected->address->family == AF_INET) + return CHECK_FLAG(connected->flags, + ZEBRA_IFA_UNNUMBERED); + } + return 0; +} diff --git a/zebra/connected.h b/zebra/connected.h new file mode 100644 index 0000000..3ed9f6d --- /dev/null +++ b/zebra/connected.h @@ -0,0 +1,70 @@ +/* + * Interface's address and mask. + * Copyright (C) 1997 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_CONNECTED_H +#define _ZEBRA_CONNECTED_H + +#include <zebra.h> +#include <stdint.h> + +#include "lib/if.h" +#include "lib/prefix.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern struct connected *connected_check(struct interface *ifp, + union prefixconstptr p); +extern struct connected *connected_check_ptp(struct interface *ifp, + union prefixconstptr p, + union prefixconstptr d); + +extern void connected_add_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, uint16_t prefixlen, + const struct in_addr *dest, const char *label, + uint32_t metric); + +extern void connected_delete_ipv4(struct interface *ifp, int flags, + const struct in_addr *addr, + uint16_t prefixlen, + const struct in_addr *dest); + +extern void connected_delete_ipv4_unnumbered(struct connected *ifc); + +extern void connected_up(struct interface *ifp, struct connected *ifc); +extern void connected_down(struct interface *ifp, struct connected *ifc); + +extern void connected_add_ipv6(struct interface *ifp, int flags, + const struct in6_addr *address, + const struct in6_addr *dest, uint16_t prefixlen, + const char *label, uint32_t metric); +extern void connected_delete_ipv6(struct interface *ifp, + const struct in6_addr *address, + const struct in6_addr *dest, + uint16_t prefixlen); + +extern int connected_is_unnumbered(struct interface *); + +#ifdef __cplusplus +} +#endif +#endif /*_ZEBRA_CONNECTED_H */ diff --git a/zebra/debug.c b/zebra/debug.c new file mode 100644 index 0000000..69aaed3 --- /dev/null +++ b/zebra/debug.c @@ -0,0 +1,848 @@ +/* + * Zebra debug related function + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include "command.h" +#include "debug.h" + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/debug_clippy.c" +#endif + +/* For debug statement. */ +unsigned long zebra_debug_event; +unsigned long zebra_debug_packet; +unsigned long zebra_debug_kernel; +unsigned long zebra_debug_rib; +unsigned long zebra_debug_fpm; +unsigned long zebra_debug_nht; +unsigned long zebra_debug_mpls; +unsigned long zebra_debug_vxlan; +unsigned long zebra_debug_pw; +unsigned long zebra_debug_dplane; +unsigned long zebra_debug_dplane_dpdk; +unsigned long zebra_debug_mlag; +unsigned long zebra_debug_nexthop; +unsigned long zebra_debug_evpn_mh; +unsigned long zebra_debug_pbr; +unsigned long zebra_debug_neigh; + +DEFINE_HOOK(zebra_debug_show_debugging, (struct vty *vty), (vty)); + +DEFUN_NOSH (show_debugging_zebra, + show_debugging_zebra_cmd, + "show debugging [zebra]", + SHOW_STR + "Debugging information\n" + "Zebra configuration\n") +{ + vty_out(vty, "Zebra debugging status:\n"); + + if (IS_ZEBRA_DEBUG_EVENT) + vty_out(vty, " Zebra event debugging is on\n"); + + if (IS_ZEBRA_DEBUG_PACKET) { + if (IS_ZEBRA_DEBUG_SEND && IS_ZEBRA_DEBUG_RECV) { + vty_out(vty, " Zebra packet%s debugging is on\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + } else { + if (IS_ZEBRA_DEBUG_SEND) + vty_out(vty, + " Zebra packet send%s debugging is on\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + else + vty_out(vty, + " Zebra packet receive%s debugging is on\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) + vty_out(vty, " Zebra kernel debugging is on\n"); + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) + vty_out(vty, + " Zebra kernel netlink message dumps (send) are on\n"); + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) + vty_out(vty, + " Zebra kernel netlink message dumps (recv) are on\n"); + + /* Check here using flags as the 'macro' does an OR */ + if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB_DETAILED)) + vty_out(vty, " Zebra RIB detailed debugging is on\n"); + else if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB)) + vty_out(vty, " Zebra RIB debugging is on\n"); + + if (IS_ZEBRA_DEBUG_FPM) + vty_out(vty, " Zebra FPM debugging is on\n"); + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + vty_out(vty, " Zebra detailed next-hop tracking debugging is on\n"); + else if (IS_ZEBRA_DEBUG_NHT) + vty_out(vty, " Zebra next-hop tracking debugging is on\n"); + if (IS_ZEBRA_DEBUG_MPLS_DETAIL) + vty_out(vty, " Zebra detailed MPLS debugging is on\n"); + else if (IS_ZEBRA_DEBUG_MPLS) + vty_out(vty, " Zebra MPLS debugging is on\n"); + + if (IS_ZEBRA_DEBUG_VXLAN) + vty_out(vty, " Zebra VXLAN debugging is on\n"); + if (IS_ZEBRA_DEBUG_PW) + vty_out(vty, " Zebra pseudowire debugging is on\n"); + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + vty_out(vty, " Zebra detailed dataplane debugging is on\n"); + else if (IS_ZEBRA_DEBUG_DPLANE) + vty_out(vty, " Zebra dataplane debugging is on\n"); + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + vty_out(vty, + " Zebra detailed dpdk dataplane debugging is on\n"); + else if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + vty_out(vty, " Zebra dataplane dpdk debugging is on\n"); + if (IS_ZEBRA_DEBUG_MLAG) + vty_out(vty, " Zebra mlag debugging is on\n"); + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + vty_out(vty, " Zebra detailed nexthop debugging is on\n"); + else if (IS_ZEBRA_DEBUG_NHG) + vty_out(vty, " Zebra nexthop debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + vty_out(vty, " Zebra EVPN-MH ethernet segment debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + vty_out(vty, " Zebra EVPN-MH nexthop debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + vty_out(vty, " Zebra EVPN-MH MAC debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + vty_out(vty, " Zebra EVPN-MH Neigh debugging is on\n"); + + if (IS_ZEBRA_DEBUG_PBR) + vty_out(vty, " Zebra PBR debugging is on\n"); + + hook_call(zebra_debug_show_debugging, vty); + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_events, + debug_zebra_events_cmd, + "debug zebra events", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra events\n") +{ + zebra_debug_event = ZEBRA_DEBUG_EVENT; + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_nht, + debug_zebra_nht_cmd, + "debug zebra nht [detailed]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra next hop tracking\n" + "Debug option set for detailed info\n") +{ + int idx = 0; + + zebra_debug_nht = ZEBRA_DEBUG_NHT; + + if (argv_find(argv, argc, "detailed", &idx)) + zebra_debug_nht |= ZEBRA_DEBUG_NHT_DETAILED; + + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_mpls, + debug_zebra_mpls_cmd, + "debug zebra mpls [detailed$detail]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra MPLS LSPs\n" + "Debug option for detailed info\n") +{ + zebra_debug_mpls = ZEBRA_DEBUG_MPLS; + + if (detail) + zebra_debug_mpls |= ZEBRA_DEBUG_MPLS_DETAILED; + + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_vxlan, + debug_zebra_vxlan_cmd, + "debug zebra vxlan", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra VxLAN (EVPN)\n") +{ + zebra_debug_vxlan = ZEBRA_DEBUG_VXLAN; + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_pw, + debug_zebra_pw_cmd, + "[no] debug zebra pseudowires", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra pseudowires\n") +{ + if (strmatch(argv[0]->text, "no")) + UNSET_FLAG(zebra_debug_pw, ZEBRA_DEBUG_PW); + else + SET_FLAG(zebra_debug_pw, ZEBRA_DEBUG_PW); + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_packet, + debug_zebra_packet_cmd, + "debug zebra packet [<recv|send>] [detail]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra packet\n" + "Debug option set for receive packet\n" + "Debug option set for send packet\n" + "Debug option set for detailed info\n") +{ + int idx = 0; + zebra_debug_packet = ZEBRA_DEBUG_PACKET; + + if (argv_find(argv, argc, "send", &idx)) + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_SEND); + else if (argv_find(argv, argc, "recv", &idx)) + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_RECV); + else { + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_SEND); + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_RECV); + } + + if (argv_find(argv, argc, "detail", &idx)) + SET_FLAG(zebra_debug_packet, ZEBRA_DEBUG_DETAIL); + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_kernel, + debug_zebra_kernel_cmd, + "debug zebra kernel", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n") +{ + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL); + + return CMD_SUCCESS; +} + +#if defined(HAVE_NETLINK) +DEFUN (debug_zebra_kernel_msgdump, + debug_zebra_kernel_msgdump_cmd, + "debug zebra kernel msgdump [<recv|send>]", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n" + "Dump raw netlink messages, sent and received\n" + "Dump raw netlink messages received\n" + "Dump raw netlink messages sent\n") +{ + int idx = 0; + + if (argv_find(argv, argc, "recv", &idx)) + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + else if (argv_find(argv, argc, "send", &idx)) + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + else { + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + SET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + } + + return CMD_SUCCESS; +} +#endif + +DEFUN (debug_zebra_rib, + debug_zebra_rib_cmd, + "debug zebra rib [detailed]", + DEBUG_STR + "Zebra configuration\n" + "Debug RIB events\n" + "Detailed debugs\n") +{ + int idx = 0; + SET_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB); + + if (argv_find(argv, argc, "detailed", &idx)) + SET_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB_DETAILED); + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_fpm, + debug_zebra_fpm_cmd, + "debug zebra fpm", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra FPM events\n") +{ + SET_FLAG(zebra_debug_fpm, ZEBRA_DEBUG_FPM); + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_dplane, + debug_zebra_dplane_cmd, + "debug zebra dplane [detailed]", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra dataplane events\n" + "Detailed debug information\n") +{ + int idx = 0; + + SET_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE); + + if (argv_find(argv, argc, "detailed", &idx)) + SET_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE_DETAILED); + + return CMD_SUCCESS; +} + +DEFPY(debug_zebra_dplane_dpdk, debug_zebra_dplane_dpdk_cmd, + "[no$no] debug zebra dplane dpdk [detailed$detail]", + NO_STR DEBUG_STR + "Zebra configuration\n" + "Debug zebra dataplane events\n" + "Debug zebra DPDK offload events\n" + "Detailed debug information\n") +{ + if (no) { + UNSET_FLAG(zebra_debug_dplane_dpdk, ZEBRA_DEBUG_DPLANE_DPDK); + UNSET_FLAG(zebra_debug_dplane_dpdk, + ZEBRA_DEBUG_DPLANE_DPDK_DETAIL); + } else { + SET_FLAG(zebra_debug_dplane_dpdk, ZEBRA_DEBUG_DPLANE_DPDK); + + if (detail) + SET_FLAG(zebra_debug_dplane, + ZEBRA_DEBUG_DPLANE_DPDK_DETAIL); + } + + return CMD_SUCCESS; +} + +DEFUN (debug_zebra_pbr, + debug_zebra_pbr_cmd, + "debug zebra pbr", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra pbr events\n") +{ + SET_FLAG(zebra_debug_pbr, ZEBRA_DEBUG_PBR); + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_neigh, + debug_zebra_neigh_cmd, + "[no$no] debug zebra neigh", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra neigh events\n") +{ + if (no) + UNSET_FLAG(zebra_debug_neigh, ZEBRA_DEBUG_NEIGH); + else + SET_FLAG(zebra_debug_neigh, ZEBRA_DEBUG_NEIGH); + + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_mlag, + debug_zebra_mlag_cmd, + "[no$no] debug zebra mlag", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for mlag events\n") +{ + if (no) + UNSET_FLAG(zebra_debug_mlag, ZEBRA_DEBUG_MLAG); + else + SET_FLAG(zebra_debug_mlag, ZEBRA_DEBUG_MLAG); + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_evpn_mh, + debug_zebra_evpn_mh_cmd, + "[no$no] debug zebra evpn mh <es$es|mac$mac|neigh$neigh|nh$nh>", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "EVPN\n" + "Multihoming\n" + "Ethernet Segment Debugging\n" + "MAC Debugging\n" + "Neigh Debugging\n" + "Nexthop Debugging\n") +{ + if (es) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES); + } + + if (mac) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_MAC); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_MAC); + } + + if (neigh) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_NEIGH); + else + SET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_NEIGH); + } + + if (nh) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH); + } + + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_events, + no_debug_zebra_events_cmd, + "no debug zebra events", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra events\n") +{ + zebra_debug_event = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_nht, + no_debug_zebra_nht_cmd, + "no debug zebra nht [detailed]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra next hop tracking\n" + "Debug option set for detailed info\n") +{ + zebra_debug_nht = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_mpls, + no_debug_zebra_mpls_cmd, + "no debug zebra mpls [detailed]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra MPLS LSPs\n" + "Debug option for zebra detailed info\n") +{ + zebra_debug_mpls = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_vxlan, + no_debug_zebra_vxlan_cmd, + "no debug zebra vxlan", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra VxLAN (EVPN)\n") +{ + zebra_debug_vxlan = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_packet, + no_debug_zebra_packet_cmd, + "no debug zebra packet [<recv|send>] [detail]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra packet\n" + "Debug option set for receive packet\n" + "Debug option set for send packet\n" + "Debug option set for detailed info\n") +{ + zebra_debug_packet = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_kernel, + no_debug_zebra_kernel_cmd, + "no debug zebra kernel", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n") +{ + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL); + + return CMD_SUCCESS; +} + +#if defined(HAVE_NETLINK) +DEFUN (no_debug_zebra_kernel_msgdump, + no_debug_zebra_kernel_msgdump_cmd, + "no debug zebra kernel msgdump [<recv|send>]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra between kernel interface\n" + "Dump raw netlink messages, sent and received\n" + "Dump raw netlink messages received\n" + "Dump raw netlink messages sent\n") +{ + int idx = 0; + + if (argv_find(argv, argc, "recv", &idx)) + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + else if (argv_find(argv, argc, "send", &idx)) + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + else { + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV); + UNSET_FLAG(zebra_debug_kernel, ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND); + } + + return CMD_SUCCESS; +} +#endif + +DEFUN (no_debug_zebra_rib, + no_debug_zebra_rib_cmd, + "no debug zebra rib [detailed]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra RIB\n" + "Detailed debugs\n") +{ + zebra_debug_rib = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_fpm, + no_debug_zebra_fpm_cmd, + "no debug zebra fpm", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra FPM events\n") +{ + zebra_debug_fpm = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_dplane, + no_debug_zebra_dplane_cmd, + "no debug zebra dplane", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra dataplane events\n") +{ + zebra_debug_dplane = 0; + return CMD_SUCCESS; +} + +DEFUN (no_debug_zebra_pbr, + no_debug_zebra_pbr_cmd, + "no debug zebra pbr", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra pbr events\n") +{ + zebra_debug_pbr = 0; + return CMD_SUCCESS; +} + +DEFPY (debug_zebra_nexthop, + debug_zebra_nexthop_cmd, + "[no$no] debug zebra nexthop [detail$detail]", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra nexthop events\n" + "Detailed information\n") +{ + if (no) + zebra_debug_nexthop = 0; + else { + SET_FLAG(zebra_debug_nexthop, ZEBRA_DEBUG_NHG); + + if (detail) + SET_FLAG(zebra_debug_nexthop, + ZEBRA_DEBUG_NHG_DETAILED); + } + + return CMD_SUCCESS; +} + +/* Debug node. */ +static int config_write_debug(struct vty *vty); +struct cmd_node debug_node = { + .name = "debug", + .node = DEBUG_NODE, + .prompt = "", + .config_write = config_write_debug, +}; + +static int config_write_debug(struct vty *vty) +{ + int write = 0; + + if (IS_ZEBRA_DEBUG_EVENT) { + vty_out(vty, "debug zebra events\n"); + write++; + } + if (IS_ZEBRA_DEBUG_PACKET) { + if (IS_ZEBRA_DEBUG_SEND && IS_ZEBRA_DEBUG_RECV) { + vty_out(vty, "debug zebra packet%s\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + write++; + } else { + if (IS_ZEBRA_DEBUG_SEND) + vty_out(vty, "debug zebra packet send%s\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + else + vty_out(vty, "debug zebra packet recv%s\n", + IS_ZEBRA_DEBUG_DETAIL ? " detail" : ""); + write++; + } + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND + && IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { + vty_out(vty, "debug zebra kernel msgdump\n"); + write++; + } else if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { + vty_out(vty, "debug zebra kernel msgdump recv\n"); + write++; + } else if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) { + vty_out(vty, "debug zebra kernel msgdump send\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + vty_out(vty, "debug zebra kernel\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB_DETAILED)) { + vty_out(vty, "debug zebra rib detailed\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_rib, ZEBRA_DEBUG_RIB)) { + vty_out(vty, "debug zebra rib\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_FPM) { + vty_out(vty, "debug zebra fpm\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) { + vty_out(vty, "debug zebra nht detailed\n"); + write++; + } else if (IS_ZEBRA_DEBUG_NHT) { + vty_out(vty, "debug zebra nht\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_MPLS_DETAIL) { + vty_out(vty, "debug zebra mpls detailed\n"); + write++; + } else if (IS_ZEBRA_DEBUG_MPLS) { + vty_out(vty, "debug zebra mpls\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_VXLAN) { + vty_out(vty, "debug zebra vxlan\n"); + write++; + } + if (IS_ZEBRA_DEBUG_MLAG) { + vty_out(vty, "debug zebra mlag\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { + vty_out(vty, "debug zebra evpn mh es\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) { + vty_out(vty, "debug zebra evpn mh nh\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + vty_out(vty, "debug zebra evpn mh mac\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) { + vty_out(vty, "debug zebra evpn mh neigh\n"); + write++; + } + if (IS_ZEBRA_DEBUG_PW) { + vty_out(vty, "debug zebra pseudowires\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE_DETAILED)) { + vty_out(vty, "debug zebra dplane detailed\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE)) { + vty_out(vty, "debug zebra dplane\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE_DPDK_DETAIL)) { + vty_out(vty, "debug zebra dplane dpdk detailed\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE_DPDK)) { + vty_out(vty, "debug zebra dplane dpdk\n"); + write++; + } + + if (CHECK_FLAG(zebra_debug_nexthop, ZEBRA_DEBUG_NHG_DETAILED)) { + vty_out(vty, "debug zebra nexthop detail\n"); + write++; + } else if (CHECK_FLAG(zebra_debug_nexthop, ZEBRA_DEBUG_NHG)) { + vty_out(vty, "debug zebra nexthop\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_PBR) { + vty_out(vty, "debug zebra pbr\n"); + write++; + } + + if (IS_ZEBRA_DEBUG_NEIGH) { + vty_out(vty, "debug zebra neigh\n"); + write++; + } + + return write; +} + +void zebra_debug_init(void) +{ + zebra_debug_event = 0; + zebra_debug_packet = 0; + zebra_debug_kernel = 0; + zebra_debug_rib = 0; + zebra_debug_fpm = 0; + zebra_debug_mpls = 0; + zebra_debug_vxlan = 0; + zebra_debug_pw = 0; + zebra_debug_dplane = 0; + zebra_debug_dplane_dpdk = 0; + zebra_debug_mlag = 0; + zebra_debug_evpn_mh = 0; + zebra_debug_nht = 0; + zebra_debug_nexthop = 0; + zebra_debug_pbr = 0; + zebra_debug_neigh = 0; + + install_node(&debug_node); + + install_element(ENABLE_NODE, &show_debugging_zebra_cmd); + + install_element(ENABLE_NODE, &debug_zebra_events_cmd); + install_element(ENABLE_NODE, &debug_zebra_nht_cmd); + install_element(ENABLE_NODE, &debug_zebra_mpls_cmd); + install_element(ENABLE_NODE, &debug_zebra_vxlan_cmd); + install_element(ENABLE_NODE, &debug_zebra_pw_cmd); + install_element(ENABLE_NODE, &debug_zebra_packet_cmd); + install_element(ENABLE_NODE, &debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(ENABLE_NODE, &debug_zebra_kernel_msgdump_cmd); +#endif + install_element(ENABLE_NODE, &debug_zebra_rib_cmd); + install_element(ENABLE_NODE, &debug_zebra_fpm_cmd); + install_element(ENABLE_NODE, &debug_zebra_dplane_cmd); + install_element(ENABLE_NODE, &debug_zebra_mlag_cmd); + install_element(ENABLE_NODE, &debug_zebra_nexthop_cmd); + install_element(ENABLE_NODE, &debug_zebra_pbr_cmd); + install_element(ENABLE_NODE, &debug_zebra_neigh_cmd); + install_element(ENABLE_NODE, &debug_zebra_dplane_dpdk_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_events_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_nht_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_mpls_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_vxlan_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_packet_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(ENABLE_NODE, &no_debug_zebra_kernel_msgdump_cmd); +#endif + install_element(ENABLE_NODE, &no_debug_zebra_rib_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_fpm_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_dplane_cmd); + install_element(ENABLE_NODE, &no_debug_zebra_pbr_cmd); + install_element(ENABLE_NODE, &debug_zebra_evpn_mh_cmd); + + install_element(CONFIG_NODE, &debug_zebra_events_cmd); + install_element(CONFIG_NODE, &debug_zebra_nht_cmd); + install_element(CONFIG_NODE, &debug_zebra_mpls_cmd); + install_element(CONFIG_NODE, &debug_zebra_vxlan_cmd); + install_element(CONFIG_NODE, &debug_zebra_pw_cmd); + install_element(CONFIG_NODE, &debug_zebra_packet_cmd); + install_element(CONFIG_NODE, &debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(CONFIG_NODE, &debug_zebra_kernel_msgdump_cmd); +#endif + install_element(CONFIG_NODE, &debug_zebra_rib_cmd); + install_element(CONFIG_NODE, &debug_zebra_fpm_cmd); + install_element(CONFIG_NODE, &debug_zebra_dplane_cmd); + install_element(CONFIG_NODE, &debug_zebra_dplane_dpdk_cmd); + install_element(CONFIG_NODE, &debug_zebra_nexthop_cmd); + install_element(CONFIG_NODE, &debug_zebra_pbr_cmd); + install_element(CONFIG_NODE, &debug_zebra_neigh_cmd); + + install_element(CONFIG_NODE, &no_debug_zebra_events_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_nht_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_mpls_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_vxlan_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_packet_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_kernel_cmd); +#if defined(HAVE_NETLINK) + install_element(CONFIG_NODE, &no_debug_zebra_kernel_msgdump_cmd); +#endif + install_element(CONFIG_NODE, &no_debug_zebra_rib_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_fpm_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_dplane_cmd); + install_element(CONFIG_NODE, &no_debug_zebra_pbr_cmd); + install_element(CONFIG_NODE, &debug_zebra_mlag_cmd); + install_element(CONFIG_NODE, &debug_zebra_evpn_mh_cmd); +} diff --git a/zebra/debug.h b/zebra/debug.h new file mode 100644 index 0000000..73546de --- /dev/null +++ b/zebra/debug.h @@ -0,0 +1,161 @@ +/* + * Zebra debug related function + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_DEBUG_H +#define _ZEBRA_DEBUG_H + +#include "lib/vty.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Debug flags. */ +#define ZEBRA_DEBUG_EVENT 0x01 + +#define ZEBRA_DEBUG_PACKET 0x01 +#define ZEBRA_DEBUG_SEND 0x20 +#define ZEBRA_DEBUG_RECV 0x40 +#define ZEBRA_DEBUG_DETAIL 0x80 + +#define ZEBRA_DEBUG_KERNEL 0x01 +#define ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND 0x20 +#define ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV 0x40 + +#define ZEBRA_DEBUG_RIB 0x01 +#define ZEBRA_DEBUG_RIB_DETAILED 0x02 + +#define ZEBRA_DEBUG_FPM 0x01 + +#define ZEBRA_DEBUG_NHT 0x01 +#define ZEBRA_DEBUG_NHT_DETAILED 0x02 + +#define ZEBRA_DEBUG_MPLS 0x01 +#define ZEBRA_DEBUG_MPLS_DETAILED 0x02 + +#define ZEBRA_DEBUG_VXLAN 0x01 + +#define ZEBRA_DEBUG_PW 0x01 + +#define ZEBRA_DEBUG_DPLANE 0x01 +#define ZEBRA_DEBUG_DPLANE_DETAILED 0x02 + +#define ZEBRA_DEBUG_DPLANE_DPDK 0x01 +#define ZEBRA_DEBUG_DPLANE_DPDK_DETAIL 0x02 + +#define ZEBRA_DEBUG_MLAG 0x01 + +#define ZEBRA_DEBUG_NHG 0x01 +#define ZEBRA_DEBUG_NHG_DETAILED 0x02 + +#define ZEBRA_DEBUG_EVPN_MH_ES 0x01 +#define ZEBRA_DEBUG_EVPN_MH_NH 0x02 +#define ZEBRA_DEBUG_EVPN_MH_MAC 0x04 +#define ZEBRA_DEBUG_EVPN_MH_NEIGH 0x08 + +#define ZEBRA_DEBUG_PBR 0x01 + +#define ZEBRA_DEBUG_NEIGH 0x01 + +/* Debug related macro. */ +#define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT) + +#define IS_ZEBRA_DEBUG_PACKET (zebra_debug_packet & ZEBRA_DEBUG_PACKET) +#define IS_ZEBRA_DEBUG_SEND (zebra_debug_packet & ZEBRA_DEBUG_SEND) +#define IS_ZEBRA_DEBUG_RECV (zebra_debug_packet & ZEBRA_DEBUG_RECV) +#define IS_ZEBRA_DEBUG_DETAIL (zebra_debug_packet & ZEBRA_DEBUG_DETAIL) + +#define IS_ZEBRA_DEBUG_KERNEL (zebra_debug_kernel & ZEBRA_DEBUG_KERNEL) +#define IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND \ + (zebra_debug_kernel & ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) +#define IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV \ + (zebra_debug_kernel & ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) + +#define IS_ZEBRA_DEBUG_RIB \ + (zebra_debug_rib & (ZEBRA_DEBUG_RIB | ZEBRA_DEBUG_RIB_DETAILED)) +#define IS_ZEBRA_DEBUG_RIB_DETAILED (zebra_debug_rib & ZEBRA_DEBUG_RIB_DETAILED) + +#define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM) + +#define IS_ZEBRA_DEBUG_NHT (zebra_debug_nht & ZEBRA_DEBUG_NHT) +#define IS_ZEBRA_DEBUG_NHT_DETAILED (zebra_debug_nht & ZEBRA_DEBUG_NHT_DETAILED) + +#define IS_ZEBRA_DEBUG_MPLS (zebra_debug_mpls & ZEBRA_DEBUG_MPLS) +#define IS_ZEBRA_DEBUG_MPLS_DETAIL \ + (zebra_debug_mpls & ZEBRA_DEBUG_MPLS_DETAILED) +#define IS_ZEBRA_DEBUG_VXLAN (zebra_debug_vxlan & ZEBRA_DEBUG_VXLAN) +#define IS_ZEBRA_DEBUG_PW (zebra_debug_pw & ZEBRA_DEBUG_PW) + +#define IS_ZEBRA_DEBUG_DPLANE (zebra_debug_dplane & ZEBRA_DEBUG_DPLANE) +#define IS_ZEBRA_DEBUG_DPLANE_DETAIL \ + (zebra_debug_dplane & ZEBRA_DEBUG_DPLANE_DETAILED) + +#define IS_ZEBRA_DEBUG_DPLANE_DPDK \ + (zebra_debug_dplane & ZEBRA_DEBUG_DPLANE_DPDK) +#define IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL \ + (zebra_debug_dplane & ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + +#define IS_ZEBRA_DEBUG_MLAG (zebra_debug_mlag & ZEBRA_DEBUG_MLAG) + +#define IS_ZEBRA_DEBUG_NHG (zebra_debug_nexthop & ZEBRA_DEBUG_NHG) + +#define IS_ZEBRA_DEBUG_NHG_DETAIL \ + (zebra_debug_nexthop & ZEBRA_DEBUG_NHG_DETAILED) + +#define IS_ZEBRA_DEBUG_EVPN_MH_ES \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_ES) +#define IS_ZEBRA_DEBUG_EVPN_MH_NH \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NH) +#define IS_ZEBRA_DEBUG_EVPN_MH_MAC \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_MAC) +#define IS_ZEBRA_DEBUG_EVPN_MH_NEIGH \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NEIGH) + +#define IS_ZEBRA_DEBUG_PBR (zebra_debug_pbr & ZEBRA_DEBUG_PBR) + +#define IS_ZEBRA_DEBUG_NEIGH (zebra_debug_neigh & ZEBRA_DEBUG_NEIGH) + +extern unsigned long zebra_debug_event; +extern unsigned long zebra_debug_packet; +extern unsigned long zebra_debug_kernel; +extern unsigned long zebra_debug_rib; +extern unsigned long zebra_debug_fpm; +extern unsigned long zebra_debug_nht; +extern unsigned long zebra_debug_mpls; +extern unsigned long zebra_debug_vxlan; +extern unsigned long zebra_debug_pw; +extern unsigned long zebra_debug_dplane; +extern unsigned long zebra_debug_dplane_dpdk; +extern unsigned long zebra_debug_mlag; +extern unsigned long zebra_debug_nexthop; +extern unsigned long zebra_debug_evpn_mh; +extern unsigned long zebra_debug_pbr; +extern unsigned long zebra_debug_neigh; + +extern void zebra_debug_init(void); + +DECLARE_HOOK(zebra_debug_show_debugging, (struct vty *vty), (vty)); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_DEBUG_H */ diff --git a/zebra/debug_nl.c b/zebra/debug_nl.c new file mode 100644 index 0000000..afefab6 --- /dev/null +++ b/zebra/debug_nl.c @@ -0,0 +1,1784 @@ +/* + * Copyright (c) 2018 Rafael Zalamena + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <zebra.h> + +#if defined(HAVE_NETLINK) && defined(NETLINK_DEBUG) + +#include <sys/socket.h> + +#include <linux/netconf.h> +#include <linux/netlink.h> +#include <linux/nexthop.h> +#include <linux/rtnetlink.h> +#include <net/if_arp.h> +#include <linux/fib_rules.h> +#include <linux/lwtunnel.h> + +#include <stdio.h> +#include <stdint.h> + +#include "zebra/rt_netlink.h" +#include "zebra/kernel_netlink.h" +#include "lib/vxlan.h" + +const char *nlmsg_type2str(uint16_t type) +{ + switch (type) { + /* Generic */ + case NLMSG_NOOP: + return "NOOP"; + case NLMSG_ERROR: + return "ERROR"; + case NLMSG_DONE: + return "DONE"; + case NLMSG_OVERRUN: + return "OVERRUN"; + + /* RTM */ + case RTM_NEWLINK: + return "NEWLINK"; + case RTM_DELLINK: + return "DELLINK"; + case RTM_GETLINK: + return "GETLINK"; + case RTM_SETLINK: + return "SETLINK"; + + case RTM_NEWADDR: + return "NEWADDR"; + case RTM_DELADDR: + return "DELADDR"; + case RTM_GETADDR: + return "GETADDR"; + + case RTM_NEWROUTE: + return "NEWROUTE"; + case RTM_DELROUTE: + return "DELROUTE"; + case RTM_GETROUTE: + return "GETROUTE"; + + case RTM_NEWNEIGH: + return "NEWNEIGH"; + case RTM_DELNEIGH: + return "DELNEIGH"; + case RTM_GETNEIGH: + return "GETNEIGH"; + + case RTM_NEWRULE: + return "NEWRULE"; + case RTM_DELRULE: + return "DELRULE"; + case RTM_GETRULE: + return "GETRULE"; + + case RTM_NEWNEXTHOP: + return "NEWNEXTHOP"; + case RTM_DELNEXTHOP: + return "DELNEXTHOP"; + case RTM_GETNEXTHOP: + return "GETNEXTHOP"; + + case RTM_NEWTUNNEL: + return "NEWTUNNEL"; + case RTM_DELTUNNEL: + return "DELTUNNEL"; + case RTM_GETTUNNEL: + return "GETTUNNEL"; + + case RTM_NEWNETCONF: + return "RTM_NEWNETCONF"; + case RTM_DELNETCONF: + return "RTM_DELNETCONF"; + + default: + return "UNKNOWN"; + } +} + +const char *af_type2str(int type) +{ + switch (type) { + case AF_UNSPEC: + return "AF_UNSPEC"; + case AF_UNIX: + return "AF_UNIX"; + case AF_INET: + return "AF_INET"; + case AF_INET6: + return "AF_INET6"; + case AF_BRIDGE: + return "AF_BRIDGE"; + case AF_NETLINK: + return "AF_NETLINK"; +#ifdef AF_MPLS + case AF_MPLS: + return "AF_MPLS"; +#endif /* AF_MPLS */ + case AF_BLUETOOTH: + return "AF_BLUETOOTH"; + case AF_VSOCK: + return "AF_VSOCK"; + case AF_KEY: + return "AF_KEY"; + case AF_PACKET: + return "AF_PACKET"; + default: + return "UNKNOWN"; + } +} + +const char *ifi_type2str(int type) +{ + switch (type) { + case ARPHRD_ETHER: + return "ETHER"; + case ARPHRD_EETHER: + return "EETHER"; + case ARPHRD_NETROM: + return "NETROM"; + case ARPHRD_AX25: + return "AX25"; + case ARPHRD_PRONET: + return "PRONET"; + case ARPHRD_CHAOS: + return "CHAOS"; + case ARPHRD_IEEE802: + return "IEEE802"; + case ARPHRD_ARCNET: + return "ARCNET"; + case ARPHRD_APPLETLK: + return "APPLETLK"; + case ARPHRD_DLCI: + return "DLCI"; + case ARPHRD_ATM: + return "ATM"; + case ARPHRD_METRICOM: + return "METRICOM"; + case ARPHRD_IEEE1394: + return "IEEE1394"; + case ARPHRD_EUI64: + return "EUI64"; + case ARPHRD_INFINIBAND: + return "INFINIBAND"; + case ARPHRD_SLIP: + return "SLIP"; + case ARPHRD_CSLIP: + return "CSLIP"; + case ARPHRD_SLIP6: + return "SLIP6"; + case ARPHRD_CSLIP6: + return "CSLIP6"; + case ARPHRD_RSRVD: + return "RSRVD"; + case ARPHRD_ADAPT: + return "ADAPT"; + case ARPHRD_ROSE: + return "ROSE"; + case ARPHRD_X25: + return "X25"; + case ARPHRD_PPP: + return "PPP"; + case ARPHRD_HDLC: + return "HDLC"; + case ARPHRD_LAPB: + return "LAPB"; + case ARPHRD_DDCMP: + return "DDCMP"; + case ARPHRD_RAWHDLC: + return "RAWHDLC"; + case ARPHRD_TUNNEL: + return "TUNNEL"; + case ARPHRD_TUNNEL6: + return "TUNNEL6"; + case ARPHRD_FRAD: + return "FRAD"; + case ARPHRD_SKIP: + return "SKIP"; + case ARPHRD_LOOPBACK: + return "LOOPBACK"; + case ARPHRD_LOCALTLK: + return "LOCALTLK"; + case ARPHRD_FDDI: + return "FDDI"; + case ARPHRD_BIF: + return "BIF"; + case ARPHRD_SIT: + return "SIT"; + case ARPHRD_IPDDP: + return "IPDDP"; + case ARPHRD_IPGRE: + return "IPGRE"; + case ARPHRD_PIMREG: + return "PIMREG"; + case ARPHRD_HIPPI: + return "HIPPI"; + case ARPHRD_ASH: + return "ASH"; + case ARPHRD_ECONET: + return "ECONET"; + case ARPHRD_IRDA: + return "IRDA"; + case ARPHRD_FCPP: + return "FCPP"; + case ARPHRD_FCAL: + return "FCAL"; + case ARPHRD_FCPL: + return "FCPL"; + case ARPHRD_FCFABRIC: + return "FCFABRIC"; + case ARPHRD_IEEE802_TR: + return "IEEE802_TR"; + case ARPHRD_IEEE80211: + return "IEEE80211"; + case ARPHRD_IEEE80211_PRISM: + return "IEEE80211_PRISM"; + case ARPHRD_IEEE80211_RADIOTAP: + return "IEEE80211_RADIOTAP"; + case ARPHRD_IEEE802154: + return "IEEE802154"; +#ifdef ARPHRD_VSOCKMON + case ARPHRD_VSOCKMON: + return "VSOCKMON"; +#endif /* ARPHRD_VSOCKMON */ + case ARPHRD_VOID: + return "VOID"; + case ARPHRD_NONE: + return "NONE"; + default: + return "UNKNOWN"; + } +} + +const char *ifla_pdr_type2str(int type) +{ + switch (type) { + case IFLA_PROTO_DOWN_REASON_UNSPEC: + return "UNSPEC"; + case IFLA_PROTO_DOWN_REASON_MASK: + return "MASK"; + case IFLA_PROTO_DOWN_REASON_VALUE: + return "VALUE"; + default: + return "UNKNOWN"; + } +} + +const char *ifla_info_type2str(int type) +{ + switch (type) { + case IFLA_INFO_UNSPEC: + return "UNSPEC"; + case IFLA_INFO_KIND: + return "KIND"; + case IFLA_INFO_DATA: + return "DATA"; + case IFLA_INFO_XSTATS: + return "XSTATS"; + case IFLA_INFO_SLAVE_KIND: + return "SLAVE_KIND"; + case IFLA_INFO_SLAVE_DATA: + return "SLAVE_DATA"; + default: + return "UNKNOWN"; + } +} + +const char *rta_type2str(int type) +{ + switch (type) { + case IFLA_UNSPEC: + return "UNSPEC"; + case IFLA_ADDRESS: + return "ADDRESS"; + case IFLA_BROADCAST: + return "BROADCAST"; + case IFLA_IFNAME: + return "IFNAME"; + case IFLA_MTU: + return "MTU"; + case IFLA_LINK: + return "LINK"; + case IFLA_QDISC: + return "QDISC"; + case IFLA_STATS: + return "STATS"; + case IFLA_COST: + return "COST"; + case IFLA_PRIORITY: + return "PRIORITY"; + case IFLA_MASTER: + return "MASTER"; + case IFLA_WIRELESS: + return "WIRELESS"; + case IFLA_PROTINFO: + return "PROTINFO"; + case IFLA_TXQLEN: + return "TXQLEN"; + case IFLA_MAP: + return "MAP"; + case IFLA_WEIGHT: + return "WEIGHT"; + case IFLA_OPERSTATE: + return "OPERSTATE"; + case IFLA_LINKMODE: + return "LINKMODE"; + case IFLA_LINKINFO: + return "LINKINFO"; + case IFLA_NET_NS_PID: + return "NET_NS_PID"; + case IFLA_IFALIAS: + return "IFALIAS"; + case IFLA_NUM_VF: + return "NUM_VF"; + case IFLA_VFINFO_LIST: + return "VFINFO_LIST"; + case IFLA_STATS64: + return "STATS64"; + case IFLA_VF_PORTS: + return "VF_PORTS"; + case IFLA_PORT_SELF: + return "PORT_SELF"; + case IFLA_AF_SPEC: + return "AF_SPEC"; + case IFLA_GROUP: + return "GROUP"; + case IFLA_NET_NS_FD: + return "NET_NS_FD"; + case IFLA_EXT_MASK: + return "EXT_MASK"; + case IFLA_PROMISCUITY: + return "PROMISCUITY"; + case IFLA_NUM_TX_QUEUES: + return "NUM_TX_QUEUES"; + case IFLA_NUM_RX_QUEUES: + return "NUM_RX_QUEUES"; + case IFLA_CARRIER: + return "CARRIER"; + case IFLA_PHYS_PORT_ID: + return "PHYS_PORT_ID"; + case IFLA_CARRIER_CHANGES: + return "CARRIER_CHANGES"; + case IFLA_PHYS_SWITCH_ID: + return "PHYS_SWITCH_ID"; + case IFLA_LINK_NETNSID: + return "LINK_NETNSID"; + case IFLA_PHYS_PORT_NAME: + return "PHYS_PORT_NAME"; + case IFLA_PROTO_DOWN: + return "PROTO_DOWN"; +#ifdef IFLA_GSO_MAX_SEGS + case IFLA_GSO_MAX_SEGS: + return "GSO_MAX_SEGS"; +#endif /* IFLA_GSO_MAX_SEGS */ +#ifdef IFLA_GSO_MAX_SIZE + case IFLA_GSO_MAX_SIZE: + return "GSO_MAX_SIZE"; +#endif /* IFLA_GSO_MAX_SIZE */ +#ifdef IFLA_PAD + case IFLA_PAD: + return "PAD"; +#endif /* IFLA_PAD */ +#ifdef IFLA_XDP + case IFLA_XDP: + return "XDP"; +#endif /* IFLA_XDP */ +#ifdef IFLA_EVENT + case IFLA_EVENT: + return "EVENT"; +#endif /* IFLA_EVENT */ + case IFLA_PROTO_DOWN_REASON: + return "PROTO_DOWN_REASON"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_type2str(int type) +{ + switch (type) { + case RTN_UNSPEC: + return "UNSPEC"; + case RTN_UNICAST: + return "UNICAST"; + case RTN_LOCAL: + return "LOCAL"; + case RTN_BROADCAST: + return "BROADCAST"; + case RTN_ANYCAST: + return "ANYCAST"; + case RTN_MULTICAST: + return "MULTICAST"; + case RTN_BLACKHOLE: + return "BLACKHOLE"; + case RTN_UNREACHABLE: + return "UNREACHABLE"; + case RTN_PROHIBIT: + return "PROHIBIT"; + case RTN_THROW: + return "THROW"; + case RTN_NAT: + return "NAT"; + case RTN_XRESOLVE: + return "XRESOLVE"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_protocol2str(int type) +{ + switch (type) { + case RTPROT_UNSPEC: + return "UNSPEC"; + case RTPROT_REDIRECT: + return "REDIRECT"; + case RTPROT_KERNEL: + return "KERNEL"; + case RTPROT_BOOT: + return "BOOT"; + case RTPROT_STATIC: + return "STATIC"; + case RTPROT_GATED: + return "GATED"; + case RTPROT_RA: + return "RA"; + case RTPROT_MRT: + return "MRT"; + case RTPROT_ZEBRA: + return "ZEBRA"; + case RTPROT_BGP: + return "BGP"; + case RTPROT_ISIS: + return "ISIS"; + case RTPROT_OSPF: + return "OSPF"; + case RTPROT_BIRD: + return "BIRD"; + case RTPROT_DNROUTED: + return "DNROUTED"; + case RTPROT_XORP: + return "XORP"; + case RTPROT_NTK: + return "NTK"; + case RTPROT_DHCP: + return "DHCP"; + case RTPROT_MROUTED: + return "MROUTED"; + case RTPROT_BABEL: + return "BABEL"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_scope2str(int type) +{ + switch (type) { + case RT_SCOPE_UNIVERSE: + return "UNIVERSE"; + case RT_SCOPE_SITE: + return "SITE"; + case RT_SCOPE_LINK: + return "LINK"; + case RT_SCOPE_HOST: + return "HOST"; + case RT_SCOPE_NOWHERE: + return "NOWHERE"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_rta2str(int type) +{ + switch (type) { + case RTA_UNSPEC: + return "UNSPEC"; + case RTA_DST: + return "DST"; + case RTA_SRC: + return "SRC"; + case RTA_IIF: + return "IIF"; + case RTA_OIF: + return "OIF"; + case RTA_GATEWAY: + return "GATEWAY"; + case RTA_PRIORITY: + return "PRIORITY"; + case RTA_PREF: + return "PREF"; + case RTA_PREFSRC: + return "PREFSRC"; + case RTA_MARK: + return "MARK"; + case RTA_METRICS: + return "METRICS"; + case RTA_MULTIPATH: + return "MULTIPATH"; + case RTA_PROTOINFO: + return "PROTOINFO"; + case RTA_FLOW: + return "FLOW"; + case RTA_CACHEINFO: + return "CACHEINFO"; + case RTA_TABLE: + return "TABLE"; + case RTA_MFC_STATS: + return "MFC_STATS"; + case RTA_NH_ID: + return "NH_ID"; + case RTA_EXPIRES: + return "EXPIRES"; + default: + return "UNKNOWN"; + } +} + +const char *neigh_rta2str(int type) +{ + switch (type) { + case NDA_UNSPEC: + return "UNSPEC"; + case NDA_DST: + return "DST"; + case NDA_LLADDR: + return "LLADDR"; + case NDA_CACHEINFO: + return "CACHEINFO"; + case NDA_PROBES: + return "PROBES"; + case NDA_VLAN: + return "VLAN"; + case NDA_PORT: + return "PORT"; + case NDA_VNI: + return "VNI"; + case NDA_IFINDEX: + return "IFINDEX"; + case NDA_MASTER: + return "MASTER"; + case NDA_LINK_NETNSID: + return "LINK_NETNSID"; + default: + return "UNKNOWN"; + } +} + +const char *ifa_rta2str(int type) +{ + switch (type) { + case IFA_UNSPEC: + return "UNSPEC"; + case IFA_ADDRESS: + return "ADDRESS"; + case IFA_LOCAL: + return "LOCAL"; + case IFA_LABEL: + return "LABEL"; + case IFA_BROADCAST: + return "BROADCAST"; + case IFA_ANYCAST: + return "ANYCAST"; + case IFA_CACHEINFO: + return "CACHEINFO"; + case IFA_MULTICAST: + return "MULTICAST"; + case IFA_FLAGS: + return "FLAGS"; + default: + return "UNKNOWN"; + } +} + +const char *nhm_rta2str(int type) +{ + switch (type) { + case NHA_UNSPEC: + return "UNSPEC"; + case NHA_ID: + return "ID"; + case NHA_GROUP: + return "GROUP"; + case NHA_GROUP_TYPE: + return "GROUP_TYPE"; + case NHA_BLACKHOLE: + return "BLACKHOLE"; + case NHA_OIF: + return "OIF"; + case NHA_GATEWAY: + return "GATEWAY"; + case NHA_ENCAP_TYPE: + return "ENCAP_TYPE"; + case NHA_ENCAP: + return "ENCAP"; + case NHA_GROUPS: + return "GROUPS"; + case NHA_MASTER: + return "MASTER"; + default: + return "UNKNOWN"; + } +} + +const char *frh_rta2str(int type) +{ + switch (type) { + case FRA_DST: + return "DST"; + case FRA_SRC: + return "SRC"; + case FRA_IIFNAME: + return "IIFNAME"; + case FRA_GOTO: + return "GOTO"; + case FRA_UNUSED2: + return "UNUSED2"; + case FRA_PRIORITY: + return "PRIORITY"; + case FRA_UNUSED3: + return "UNUSED3"; + case FRA_UNUSED4: + return "UNUSED4"; + case FRA_UNUSED5: + return "UNUSED5"; + case FRA_FWMARK: + return "FWMARK"; + case FRA_FLOW: + return "FLOW"; + case FRA_TUN_ID: + return "TUN_ID"; + case FRA_SUPPRESS_IFGROUP: + return "SUPPRESS_IFGROUP"; + case FRA_SUPPRESS_PREFIXLEN: + return "SUPPRESS_PREFIXLEN"; + case FRA_TABLE: + return "TABLE"; + case FRA_FWMASK: + return "FWMASK"; + case FRA_OIFNAME: + return "OIFNAME"; + case FRA_PAD: + return "PAD"; + case FRA_L3MDEV: + return "L3MDEV"; + case FRA_UID_RANGE: + return "UID_RANGE"; + case FRA_PROTOCOL: + return "PROTOCOL"; + case FRA_IP_PROTO: + return "IP_PROTO"; + case FRA_SPORT_RANGE: + return "SPORT_RANGE"; + case FRA_DPORT_RANGE: + return "DPORT_RANGE"; + default: + return "UNKNOWN"; + } +} + +const char *frh_action2str(uint8_t action) +{ + switch (action) { + case FR_ACT_TO_TBL: + return "TO_TBL"; + case FR_ACT_GOTO: + return "GOTO"; + case FR_ACT_NOP: + return "NOP"; + case FR_ACT_RES3: + return "RES3"; + case FR_ACT_RES4: + return "RES4"; + case FR_ACT_BLACKHOLE: + return "BLACKHOLE"; + case FR_ACT_UNREACHABLE: + return "UNREACHABLE"; + case FR_ACT_PROHIBIT: + return "PROHIBIT"; + default: + return "UNKNOWN"; + } +} + +static const char *ncm_rta2str(int type) +{ + switch (type) { + case NETCONFA_UNSPEC: + return "UNSPEC"; + case NETCONFA_IFINDEX: + return "IFINDEX"; + case NETCONFA_FORWARDING: + return "FORWARDING"; + case NETCONFA_RP_FILTER: + return "RP_FILTER"; + case NETCONFA_MC_FORWARDING: + return "MCAST"; + case NETCONFA_PROXY_NEIGH: + return "PROXY_NEIGH"; + case NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN: + return "IGNORE_LINKDOWN"; + case NETCONFA_INPUT: + return "MPLS"; + case NETCONFA_BC_FORWARDING: + return "BCAST"; + default: + return "UNKNOWN"; + } +} + +static void dump_on_off(uint32_t ival, const char *prefix) +{ + zlog_debug("%s%s", prefix, (ival != 0) ? "on" : "off"); +} + +static inline void flag_write(int flags, int flag, const char *flagstr, + char *buf, size_t buflen) +{ + if (CHECK_FLAG(flags, flag) == 0) + return; + + if (buf[0]) + strlcat(buf, ",", buflen); + + strlcat(buf, flagstr, buflen); +} + +const char *nlmsg_flags2str(uint16_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + /* Specific flags. */ + flag_write(flags, NLM_F_REQUEST, "REQUEST", buf, buflen); + flag_write(flags, NLM_F_MULTI, "MULTI", buf, buflen); + flag_write(flags, NLM_F_ACK, "ACK", buf, buflen); + flag_write(flags, NLM_F_ECHO, "ECHO", buf, buflen); + flag_write(flags, NLM_F_DUMP, "DUMP", buf, buflen); + + /* Netlink family type dependent. */ + flag_write(flags, 0x0100, "(ROOT|REPLACE|CAPPED)", buf, buflen); + flag_write(flags, 0x0200, "(MATCH|EXCLUDE|ACK_TLVS)", buf, buflen); + flag_write(flags, 0x0400, "(ATOMIC|CREATE)", buf, buflen); + flag_write(flags, 0x0800, "(DUMP|APPEND)", buf, buflen); + + return (bufp); +} + +const char *if_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, IFF_UP, "UP", buf, buflen); + flag_write(flags, IFF_BROADCAST, "BROADCAST", buf, buflen); + flag_write(flags, IFF_DEBUG, "DEBUG", buf, buflen); + flag_write(flags, IFF_LOOPBACK, "LOOPBACK", buf, buflen); + flag_write(flags, IFF_POINTOPOINT, "POINTOPOINT", buf, buflen); + flag_write(flags, IFF_NOTRAILERS, "NOTRAILERS", buf, buflen); + flag_write(flags, IFF_RUNNING, "RUNNING", buf, buflen); + flag_write(flags, IFF_NOARP, "NOARP", buf, buflen); + flag_write(flags, IFF_PROMISC, "PROMISC", buf, buflen); + flag_write(flags, IFF_ALLMULTI, "ALLMULTI", buf, buflen); + flag_write(flags, IFF_MASTER, "MASTER", buf, buflen); + flag_write(flags, IFF_SLAVE, "SLAVE", buf, buflen); + flag_write(flags, IFF_MULTICAST, "MULTICAST", buf, buflen); + flag_write(flags, IFF_PORTSEL, "PORTSEL", buf, buflen); + flag_write(flags, IFF_AUTOMEDIA, "AUTOMEDIA", buf, buflen); + flag_write(flags, IFF_DYNAMIC, "DYNAMIC", buf, buflen); + + return (bufp); +} + +const char *rtm_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, RTM_F_NOTIFY, "NOTIFY", buf, buflen); + flag_write(flags, RTM_F_CLONED, "CLONED", buf, buflen); + flag_write(flags, RTM_F_EQUALIZE, "EQUALIZE", buf, buflen); + + return (bufp); +} + +const char *neigh_state2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, NUD_INCOMPLETE, "INCOMPLETE", buf, buflen); + flag_write(flags, NUD_REACHABLE, "REACHABLE", buf, buflen); + flag_write(flags, NUD_STALE, "STALE", buf, buflen); + flag_write(flags, NUD_DELAY, "DELAY", buf, buflen); + flag_write(flags, NUD_PROBE, "PROBE", buf, buflen); + flag_write(flags, NUD_FAILED, "FAILED", buf, buflen); + flag_write(flags, NUD_NOARP, "NOARP", buf, buflen); + flag_write(flags, NUD_PERMANENT, "PERMANENT", buf, buflen); + + return (bufp); +} + +const char *neigh_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, NTF_USE, "USE", buf, buflen); + flag_write(flags, NTF_SELF, "SELF", buf, buflen); + flag_write(flags, NTF_MASTER, "MASTER", buf, buflen); + flag_write(flags, NTF_PROXY, "PROXY", buf, buflen); + flag_write(flags, NTF_EXT_LEARNED, "EXT_LEARNED", buf, buflen); +#ifdef NTF_OFFLOADED + flag_write(flags, NTF_OFFLOADED, "OFFLOADED", buf, buflen); +#endif /* NTF_OFFLOADED */ + flag_write(flags, NTF_ROUTER, "ROUTER", buf, buflen); + + return (bufp); +} + +const char *ifa_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, IFA_F_SECONDARY, "SECONDARY", buf, buflen); + flag_write(flags, IFA_F_NODAD, "NODAD", buf, buflen); + flag_write(flags, IFA_F_OPTIMISTIC, "OPTIMISTIC", buf, buflen); + flag_write(flags, IFA_F_DADFAILED, "DADFAILED", buf, buflen); + flag_write(flags, IFA_F_HOMEADDRESS, "HOMEADDRESS", buf, buflen); + flag_write(flags, IFA_F_DEPRECATED, "DEPRECATED", buf, buflen); + flag_write(flags, IFA_F_TENTATIVE, "TENTATIVE", buf, buflen); + flag_write(flags, IFA_F_PERMANENT, "PERMANENT", buf, buflen); + flag_write(flags, IFA_F_MANAGETEMPADDR, "MANAGETEMPADDR", buf, buflen); + flag_write(flags, IFA_F_NOPREFIXROUTE, "NOPREFIXROUTE", buf, buflen); + flag_write(flags, IFA_F_MCAUTOJOIN, "MCAUTOJOIN", buf, buflen); + flag_write(flags, IFA_F_STABLE_PRIVACY, "STABLE_PRIVACY", buf, buflen); + + return (bufp); +} + +const char *nh_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, RTNH_F_DEAD, "DEAD", buf, buflen); + flag_write(flags, RTNH_F_PERVASIVE, "PERVASIVE", buf, buflen); + flag_write(flags, RTNH_F_ONLINK, "ONLINK", buf, buflen); + flag_write(flags, RTNH_F_OFFLOAD, "OFFLOAD", buf, buflen); + flag_write(flags, RTNH_F_LINKDOWN, "LINKDOWN", buf, buflen); + flag_write(flags, RTNH_F_UNRESOLVED, "UNRESOLVED", buf, buflen); + + return (bufp); +} + +/* + * Netlink abstractions. + */ +static void nllink_pdr_dump(struct rtattr *rta, size_t msglen) +{ + size_t plen; + uint32_t u32v; + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" linkinfo [len=%d (payload=%zu) type=(%d) %s]", + rta->rta_len, plen, rta->rta_type, + ifla_pdr_type2str(rta->rta_type)); + switch (rta->rta_type) { + case IFLA_PROTO_DOWN_REASON_MASK: + case IFLA_PROTO_DOWN_REASON_VALUE: + if (plen < sizeof(uint32_t)) { + zlog_debug(" invalid length"); + break; + } + + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nllink_linkinfo_dump(struct rtattr *rta, size_t msglen) +{ + size_t plen; + char dbuf[128]; + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" linkinfo [len=%d (payload=%zu) type=(%d) %s]", + rta->rta_len, plen, rta->rta_type, + ifla_info_type2str(rta->rta_type)); + switch (rta->rta_type) { + case IFLA_INFO_KIND: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + case IFLA_INFO_SLAVE_KIND: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nllink_dump(struct ifinfomsg *ifi, size_t msglen) +{ + uint8_t *datap; + struct rtattr *rta; + size_t plen, it; + uint32_t u32v; + uint8_t u8v; + char bytestr[16]; + char dbuf[128]; + unsigned short rta_type; + + /* Get the first attribute and go from there. */ + rta = IFLA_RTA(ifi); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + rta_type = rta->rta_type & ~NLA_F_NESTED; + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta_type, rta_type2str(rta_type)); + switch (rta_type) { + case IFLA_IFALIAS: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + case IFLA_MTU: + case IFLA_TXQLEN: + case IFLA_NUM_TX_QUEUES: + case IFLA_NUM_RX_QUEUES: + case IFLA_GROUP: + case IFLA_PROMISCUITY: +#ifdef IFLA_GSO_MAX_SEGS + case IFLA_GSO_MAX_SEGS: +#endif /* IFLA_GSO_MAX_SEGS */ +#ifdef IFLA_GSO_MAX_SIZE + case IFLA_GSO_MAX_SIZE: +#endif /* IFLA_GSO_MAX_SIZE */ + case IFLA_CARRIER_CHANGES: + case IFLA_MASTER: + case IFLA_LINK: + if (plen < sizeof(uint32_t)) { + zlog_debug(" invalid length"); + break; + } + + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case IFLA_PROTO_DOWN: + if (plen < sizeof(uint8_t)) { + zlog_debug(" invalid length"); + break; + } + + u8v = *(uint8_t *)RTA_DATA(rta); + zlog_debug(" %u", u8v); + break; + case IFLA_ADDRESS: + datap = RTA_DATA(rta); + dbuf[0] = 0; + for (it = 0; it < plen; it++) { + snprintf(bytestr, sizeof(bytestr), "%02X:", *datap); + strlcat(dbuf, bytestr, sizeof(dbuf)); + datap++; + } + /* Remove trailing ':'. */ + if (dbuf[0]) + dbuf[strlen(dbuf) - 1] = 0; + + zlog_debug(" %s", dbuf[0] ? dbuf : "<empty>"); + break; + + case IFLA_LINKINFO: + nllink_linkinfo_dump(RTA_DATA(rta), plen); + break; + + case IFLA_PROTO_DOWN_REASON: + nllink_pdr_dump(RTA_DATA(rta), plen); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlroute_dump(struct rtmsg *rtm, size_t msglen) +{ + struct rta_mfc_stats *mfc_stats; + struct rtattr *rta; + size_t plen; + uint32_t u32v; + uint64_t u64v; + + /* Get the first attribute and go from there. */ + rta = RTM_RTA(rtm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type & NLA_TYPE_MASK, + rtm_rta2str(rta->rta_type & NLA_TYPE_MASK)); + switch (rta->rta_type & NLA_TYPE_MASK) { + case RTA_IIF: + case RTA_OIF: + case RTA_PRIORITY: + case RTA_TABLE: + case RTA_NH_ID: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case RTA_EXPIRES: + u64v = *(uint64_t *)RTA_DATA(rta); + zlog_debug(" %" PRIu64, u64v); + break; + + case RTA_GATEWAY: + case RTA_DST: + case RTA_SRC: + case RTA_PREFSRC: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case RTA_MFC_STATS: + mfc_stats = (struct rta_mfc_stats *)RTA_DATA(rta); + zlog_debug(" pkts=%ju bytes=%ju wrong_if=%ju", + (uintmax_t)mfc_stats->mfcs_packets, + (uintmax_t)mfc_stats->mfcs_bytes, + (uintmax_t)mfc_stats->mfcs_wrong_if); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlneigh_dump(struct ndmsg *ndm, size_t msglen) +{ + struct rtattr *rta; + uint8_t *datap; + size_t plen, it; + uint16_t vid; + char bytestr[16]; + char dbuf[128]; + unsigned short rta_type; + +#ifndef NDA_RTA +#define NDA_RTA(ndm) \ + /* struct ndmsg *ndm; */ \ + ((struct rtattr *)(((uint8_t *)(ndm)) \ + + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#endif /* NDA_RTA */ + + /* Get the first attribute and go from there. */ + rta = NDA_RTA(ndm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + rta_type = rta->rta_type & ~NLA_F_NESTED; + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, neigh_rta2str(rta_type)); + switch (rta_type) { + case NDA_LLADDR: + datap = RTA_DATA(rta); + dbuf[0] = 0; + for (it = 0; it < plen; it++) { + snprintf(bytestr, sizeof(bytestr), "%02X:", *datap); + strlcat(dbuf, bytestr, sizeof(dbuf)); + datap++; + } + /* Remove trailing ':'. */ + if (dbuf[0]) + dbuf[strlen(dbuf) - 1] = 0; + + zlog_debug(" %s", dbuf[0] ? dbuf : "<empty>"); + break; + + case NDA_DST: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case NDA_VLAN: + vid = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %d", vid); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlifa_dump(struct ifaddrmsg *ifa, size_t msglen) +{ + struct rtattr *rta; + size_t plen; + uint32_t u32v; + + /* Get the first attribute and go from there. */ + rta = IFA_RTA(ifa); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, ifa_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case IFA_UNSPEC: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case IFA_LABEL: + zlog_debug(" %s", (const char *)RTA_DATA(rta)); + break; + + case IFA_ADDRESS: + case IFA_LOCAL: + case IFA_BROADCAST: + switch (plen) { + case 4: + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case 16: + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nltnl_dump(struct tunnel_msg *tnlm, size_t msglen) +{ + struct rtattr *attr; + vni_t vni_start = 0, vni_end = 0; + struct rtattr *ttb[VXLAN_VNIFILTER_ENTRY_MAX + 1]; + uint8_t rta_type; + + attr = TUNNEL_RTA(tnlm); +next_attr: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(attr, msglen) == 0) + return; + + rta_type = attr->rta_type & NLA_TYPE_MASK; + + if (rta_type != VXLAN_VNIFILTER_ENTRY) { + attr = RTA_NEXT(attr, msglen); + goto next_attr; + } + + memset(ttb, 0, sizeof(ttb)); + + netlink_parse_rtattr_flags(ttb, VXLAN_VNIFILTER_ENTRY_MAX, + RTA_DATA(attr), RTA_PAYLOAD(attr), + NLA_F_NESTED); + + if (ttb[VXLAN_VNIFILTER_ENTRY_START]) + vni_start = + *(uint32_t *)RTA_DATA(ttb[VXLAN_VNIFILTER_ENTRY_START]); + + if (ttb[VXLAN_VNIFILTER_ENTRY_END]) + vni_end = *(uint32_t *)RTA_DATA(ttb[VXLAN_VNIFILTER_ENTRY_END]); + zlog_debug(" vni_start %u, vni_end %u", vni_start, vni_end); + + attr = RTA_NEXT(attr, msglen); + goto next_attr; +} + +static const char *lwt_type2str(uint16_t type) +{ + switch (type) { + case LWTUNNEL_ENCAP_NONE: + return "NONE"; + case LWTUNNEL_ENCAP_MPLS: + return "MPLS"; + case LWTUNNEL_ENCAP_IP: + return "IPv4"; + case LWTUNNEL_ENCAP_ILA: + return "ILA"; + case LWTUNNEL_ENCAP_IP6: + return "IPv6"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; + case LWTUNNEL_ENCAP_BPF: + return "BPF"; + case LWTUNNEL_ENCAP_SEG6_LOCAL: + return "SEG6_LOCAL"; + default: + return "UNKNOWN"; + } +} + +static const char *nhg_type2str(uint16_t type) +{ + switch (type) { + case NEXTHOP_GRP_TYPE_MPATH: + return "MULTIPATH"; + case NEXTHOP_GRP_TYPE_RES: + return "RESILIENT MULTIPATH"; + default: + return "UNKNOWN"; + } +} + +static void nlnh_dump(struct nhmsg *nhm, size_t msglen) +{ + struct rtattr *rta; + int ifindex; + size_t plen; + uint16_t u16v; + uint32_t u32v; + unsigned long count, i; + struct nexthop_grp *nhgrp; + unsigned short rta_type; + + rta = RTM_NHA(nhm); + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + rta_type = rta->rta_type & ~NLA_F_NESTED; + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, nhm_rta2str(rta_type)); + switch (rta_type) { + case NHA_ID: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + case NHA_GROUP: + nhgrp = (struct nexthop_grp *)RTA_DATA(rta); + count = (RTA_PAYLOAD(rta) / sizeof(*nhgrp)); + if (count == 0 + || (count * sizeof(*nhgrp)) != RTA_PAYLOAD(rta)) { + zlog_debug(" invalid nexthop group received"); + return; + } + + for (i = 0; i < count; i++) + zlog_debug(" id %d weight %d", nhgrp[i].id, + nhgrp[i].weight); + break; + case NHA_ENCAP_TYPE: + u16v = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %s", lwt_type2str(u16v)); + break; + case NHA_GROUP_TYPE: + u16v = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %s", nhg_type2str(u16v)); + break; + case NHA_BLACKHOLE: + /* NOTHING */ + break; + case NHA_OIF: + ifindex = *(int *)RTA_DATA(rta); + zlog_debug(" %d", ifindex); + break; + case NHA_GATEWAY: + switch (nhm->nh_family) { + case AF_INET: + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case AF_INET6: + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + + default: + zlog_debug(" invalid family %d", nhm->nh_family); + break; + } + break; + case NHA_ENCAP: + /* TODO: handle MPLS labels. */ + zlog_debug(" unparsed MPLS labels"); + break; + case NHA_GROUPS: + /* TODO: handle this message. */ + zlog_debug(" unparsed GROUPS message"); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlrule_dump(struct fib_rule_hdr *frh, size_t msglen) +{ + struct rtattr *rta; + size_t plen; + uint8_t u8v; + uint32_t u32v; + int32_t s32v; + uint64_t u64v; + char dbuf[128]; + struct fib_rule_uid_range *u_range; + struct fib_rule_port_range *p_range; + + /* Get the first attribute and go from there. */ + rta = RTM_RTA(frh); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, frh_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case FRA_DST: + case FRA_SRC: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case FRA_IIFNAME: + case FRA_OIFNAME: + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + case FRA_GOTO: + case FRA_UNUSED2: + case FRA_PRIORITY: + case FRA_UNUSED3: + case FRA_UNUSED4: + case FRA_UNUSED5: + case FRA_FWMARK: + case FRA_FLOW: + case FRA_TABLE: + case FRA_FWMASK: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case FRA_SUPPRESS_IFGROUP: + case FRA_SUPPRESS_PREFIXLEN: + s32v = *(int32_t *)RTA_DATA(rta); + zlog_debug(" %d", s32v); + break; + + case FRA_TUN_ID: + u64v = *(uint64_t *)RTA_DATA(rta); + zlog_debug(" %" PRIu64, u64v); + break; + + case FRA_L3MDEV: + case FRA_PROTOCOL: + case FRA_IP_PROTO: + u8v = *(uint8_t *)RTA_DATA(rta); + zlog_debug(" %u", u8v); + break; + + case FRA_UID_RANGE: + u_range = (struct fib_rule_uid_range *)RTA_DATA(rta); + if (u_range->start == u_range->end) + zlog_debug(" %u", u_range->start); + else + zlog_debug(" %u-%u", u_range->start, u_range->end); + break; + + case FRA_SPORT_RANGE: + case FRA_DPORT_RANGE: + p_range = (struct fib_rule_port_range *)RTA_DATA(rta); + if (p_range->start == p_range->end) + zlog_debug(" %u", p_range->start); + else + zlog_debug(" %u-%u", p_range->start, p_range->end); + break; + + case FRA_PAD: /* fallthrough */ + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static const char *tcm_nltype2str(int nltype) +{ + switch (nltype) { + case RTM_NEWQDISC: + case RTM_DELQDISC: + return "qdisc"; + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + return "tclass"; + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + return "tfilter"; + default: + /* should never hit */ + return "unknown"; + } +} + +static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen) +{ + const struct rtattr *rta; + size_t plen; + uint32_t ival; + + rta = (void *)((const char *)ncm + + NLMSG_ALIGN(sizeof(struct netconfmsg))); + +next_rta: + /* Check the attr header for valid length. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, ncm_rta2str(rta->rta_type)); + + switch (rta->rta_type) { + case NETCONFA_IFINDEX: + ival = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %d", (int32_t)ival); + break; + + /* Most attrs are just on/off. */ + case NETCONFA_FORWARDING: + case NETCONFA_RP_FILTER: + case NETCONFA_MC_FORWARDING: + case NETCONFA_PROXY_NEIGH: + case NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN: + case NETCONFA_INPUT: + case NETCONFA_BC_FORWARDING: + ival = *(uint32_t *)RTA_DATA(rta); + dump_on_off(ival, " "); + break; + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +void nl_dump(void *msg, size_t msglen) +{ + struct nlmsghdr *nlmsg = msg; + struct nlmsgerr *nlmsgerr; + struct rtgenmsg *rtgen; + struct ifaddrmsg *ifa; + struct ndmsg *ndm; + struct rtmsg *rtm; + struct nhmsg *nhm; + struct netconfmsg *ncm; + struct ifinfomsg *ifi; + struct tunnel_msg *tnlm; + struct fib_rule_hdr *frh; + struct tcmsg *tcm; + + char fbuf[128]; + char ibuf[128]; + +next_header: + zlog_debug( + "nlmsghdr [len=%u type=(%d) %s flags=(0x%04x) {%s} seq=%u pid=%u]", + nlmsg->nlmsg_len, nlmsg->nlmsg_type, + nlmsg_type2str(nlmsg->nlmsg_type), nlmsg->nlmsg_flags, + nlmsg_flags2str(nlmsg->nlmsg_flags, fbuf, sizeof(fbuf)), + nlmsg->nlmsg_seq, nlmsg->nlmsg_pid); + + switch (nlmsg->nlmsg_type) { + /* Generic. */ + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + nlmsgerr = NLMSG_DATA(nlmsg); + zlog_debug(" nlmsgerr [error=(%d) %s]", nlmsgerr->error, + strerror(-nlmsgerr->error)); + break; + case NLMSG_DONE: + return; + case NLMSG_OVERRUN: + break; + + /* RTM. */ + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_SETLINK: + ifi = NLMSG_DATA(nlmsg); + zlog_debug( + " ifinfomsg [family=%d type=(%d) %s index=%d flags=0x%04x {%s}]", + ifi->ifi_family, ifi->ifi_type, + ifi_type2str(ifi->ifi_type), ifi->ifi_index, + ifi->ifi_flags, + if_flags2str(ifi->ifi_flags, ibuf, sizeof(ibuf))); + nllink_dump(ifi, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_GETLINK: + rtgen = NLMSG_DATA(nlmsg); + zlog_debug(" rtgen [family=(%d) %s]", rtgen->rtgen_family, + af_type2str(rtgen->rtgen_family)); + break; + + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + rtm = NLMSG_DATA(nlmsg); + zlog_debug( + " rtmsg [family=(%d) %s dstlen=%d srclen=%d tos=%d table=%d protocol=(%d) %s scope=(%d) %s type=(%d) %s flags=0x%04x {%s}]", + rtm->rtm_family, af_type2str(rtm->rtm_family), + rtm->rtm_dst_len, rtm->rtm_src_len, rtm->rtm_tos, + rtm->rtm_table, rtm->rtm_protocol, + rtm_protocol2str(rtm->rtm_protocol), rtm->rtm_scope, + rtm_scope2str(rtm->rtm_scope), rtm->rtm_type, + rtm_type2str(rtm->rtm_type), rtm->rtm_flags, + rtm_flags2str(rtm->rtm_flags, fbuf, sizeof(fbuf))); + nlroute_dump(rtm, + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + break; + + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + ndm = NLMSG_DATA(nlmsg); + zlog_debug( + " ndm [family=%d (%s) ifindex=%d state=0x%04x {%s} flags=0x%04x {%s} type=%d (%s)]", + ndm->ndm_family, af_type2str(ndm->ndm_family), + ndm->ndm_ifindex, ndm->ndm_state, + neigh_state2str(ndm->ndm_state, ibuf, sizeof(ibuf)), + ndm->ndm_flags, + neigh_flags2str(ndm->ndm_flags, fbuf, sizeof(fbuf)), + ndm->ndm_type, rtm_type2str(ndm->ndm_type)); + nlneigh_dump(ndm, + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ndm))); + break; + + case RTM_NEWRULE: + case RTM_DELRULE: + frh = NLMSG_DATA(nlmsg); + zlog_debug( + " frh [family=%d (%s) dst_len=%d src_len=%d tos=%d table=%d res1=%d res2=%d action=%d (%s) flags=0x%x]", + frh->family, af_type2str(frh->family), frh->dst_len, + frh->src_len, frh->tos, frh->table, frh->res1, + frh->res2, frh->action, frh_action2str(frh->action), + frh->flags); + nlrule_dump(frh, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*frh))); + break; + + + case RTM_NEWADDR: + case RTM_DELADDR: + ifa = NLMSG_DATA(nlmsg); + zlog_debug( + " ifa [family=(%d) %s prefixlen=%d flags=0x%04x {%s} scope=%d index=%u]", + ifa->ifa_family, af_type2str(ifa->ifa_family), + ifa->ifa_prefixlen, ifa->ifa_flags, + if_flags2str(ifa->ifa_flags, fbuf, sizeof(fbuf)), + ifa->ifa_scope, ifa->ifa_index); + nlifa_dump(ifa, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + break; + + case RTM_NEWNEXTHOP: + case RTM_DELNEXTHOP: + case RTM_GETNEXTHOP: + nhm = NLMSG_DATA(nlmsg); + zlog_debug( + " nhm [family=(%d) %s scope=(%d) %s protocol=(%d) %s flags=0x%08x {%s}]", + nhm->nh_family, af_type2str(nhm->nh_family), + nhm->nh_scope, rtm_scope2str(nhm->nh_scope), + nhm->nh_protocol, rtm_protocol2str(nhm->nh_protocol), + nhm->nh_flags, + nh_flags2str(nhm->nh_flags, fbuf, sizeof(fbuf))); + nlnh_dump(nhm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*nhm))); + break; + + case RTM_NEWTUNNEL: + case RTM_DELTUNNEL: + case RTM_GETTUNNEL: + tnlm = NLMSG_DATA(nlmsg); + zlog_debug(" tnlm [family=(%d) %s ifindex=%d ", tnlm->family, + af_type2str(tnlm->family), tnlm->ifindex); + nltnl_dump(tnlm, + nlmsg->nlmsg_len - + NLMSG_LENGTH(sizeof(struct tunnel_msg))); + break; + + + case RTM_NEWNETCONF: + case RTM_DELNETCONF: + ncm = NLMSG_DATA(nlmsg); + zlog_debug(" ncm [family=%s (%d)]", + af_type2str(ncm->ncm_family), ncm->ncm_family); + nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm))); + break; + + case RTM_NEWQDISC: + case RTM_DELQDISC: + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + tcm = NLMSG_DATA(nlmsg); + zlog_debug( + " tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]", + tcm_nltype2str(nlmsg->nlmsg_type), + af_type2str(tcm->tcm_family), tcm->tcm_family, + tcm->tcm_ifindex, tcm->tcm_handle >> 16, + tcm->tcm_handle & 0xffff); + break; + + default: + break; + } + + /* + * Try to get the next header. There should only be more + * messages if this header was flagged as MULTI, otherwise just + * end it here. + */ + nlmsg = NLMSG_NEXT(nlmsg, msglen); + if (NLMSG_OK(nlmsg, msglen) == 0) + return; + + goto next_header; +} + +#endif /* NETLINK_DEBUG */ diff --git a/zebra/dpdk/zebra_dplane_dpdk.c b/zebra/dpdk/zebra_dplane_dpdk.c new file mode 100644 index 0000000..11a1af8 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk.c @@ -0,0 +1,733 @@ +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" /* Include this explicitly */ +#endif + +#include "lib/libfrr.h" + +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_dplane.h" +#include "zebra/debug.h" +#include "zebra/zebra_pbr.h" + +#include "zebra/dpdk/zebra_dplane_dpdk_private.h" + +static const char *plugin_name = "zebra_dplane_dpdk"; + +static struct zd_dpdk_ctx dpdk_ctx_buf, *dpdk_ctx = &dpdk_ctx_buf; +#define dpdk_stat (&dpdk_ctx->stats) + +static struct zd_dpdk_port *zd_dpdk_port_find_by_index(int ifindex); + +DEFINE_MTYPE_STATIC(ZEBRA, DPDK_PORTS, "ZD DPDK port database"); + +void zd_dpdk_stat_show(struct vty *vty) +{ + uint32_t tmp_cnt; + + vty_out(vty, "%30s\n%30s\n", "Dataplane DPDK counters", + "======================="); + +#define ZD_DPDK_SHOW_COUNTER(label, counter) \ + do { \ + tmp_cnt = \ + atomic_load_explicit(&counter, memory_order_relaxed); \ + vty_out(vty, "%28s: %u\n", (label), (tmp_cnt)); \ + } while (0) + + ZD_DPDK_SHOW_COUNTER("PBR rule adds", dpdk_stat->rule_adds); + ZD_DPDK_SHOW_COUNTER("PBR rule dels", dpdk_stat->rule_dels); + ZD_DPDK_SHOW_COUNTER("Ignored updates", dpdk_stat->ignored_updates); +} + + +static void zd_dpdk_flow_stat_show(struct vty *vty, int in_ifindex, + intptr_t dp_flow_ptr) +{ + struct rte_flow_action_count count = {.shared = 0, .id = 0}; + const struct rte_flow_action actions[] = { + { + .type = RTE_FLOW_ACTION_TYPE_COUNT, + .conf = &count, + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + int rc; + struct zd_dpdk_port *in_dport; + struct rte_flow_query_count query; + struct rte_flow_error error; + uint64_t hits, bytes; + + in_dport = zd_dpdk_port_find_by_index(in_ifindex); + if (!in_dport) { + vty_out(vty, "PBR dpdk flow query failed; in_port %d missing\n", + in_ifindex); + return; + } + memset(&query, 0, sizeof(query)); + rc = rte_flow_query(in_dport->port_id, (struct rte_flow *)dp_flow_ptr, + actions, &query, &error); + if (rc) { + vty_out(vty, + "PBR dpdk flow query failed; in_ifindex %d rc %d\n", + in_ifindex, error.type); + return; + } + hits = (query.hits_set) ? query.hits : 0; + bytes = (query.bytes_set) ? query.bytes : 0; + vty_out(vty, " DPDK stats: packets %" PRIu64 " bytes %" PRIu64 "\n", + hits, bytes); +} + + +static int zd_dpdk_pbr_show_rules_walkcb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_pbr_rule *rule = (struct zebra_pbr_rule *)bucket->data; + struct vty *vty = (struct vty *)arg; + struct vrf *vrf; + struct interface *ifp = NULL; + struct zebra_pbr_action *zaction = &rule->action; + + zebra_pbr_show_rule_unit(rule, vty); + if (zaction->dp_flow_ptr) { + vrf = vrf_lookup_by_id(rule->vrf_id); + if (vrf) + ifp = if_lookup_by_name_vrf(rule->ifname, vrf); + + if (ifp) + zd_dpdk_flow_stat_show(vty, ifp->ifindex, + zaction->dp_flow_ptr); + } + return HASHWALK_CONTINUE; +} + + +void zd_dpdk_pbr_flows_show(struct vty *vty) +{ + hash_walk(zrouter.rules_hash, zd_dpdk_pbr_show_rules_walkcb, vty); +} + + +static void zd_dpdk_rule_add(struct zebra_dplane_ctx *ctx) +{ + static struct rte_flow_attr attrs = {.ingress = 1, .transfer = 1}; + uint32_t filter_bm = dplane_ctx_rule_get_filter_bm(ctx); + int in_ifindex = dplane_ctx_get_ifindex(ctx); + int out_ifindex = dplane_ctx_rule_get_out_ifindex(ctx); + struct rte_flow_item_eth eth, eth_mask; + struct rte_flow_item_ipv4 ip, ip_mask; + struct rte_flow_item_udp udp, udp_mask; + struct rte_flow_action_count conf_count; + struct rte_flow_action_set_mac conf_smac, conf_dmac; + struct rte_flow_action_port_id conf_port; + struct rte_flow_item items[ZD_PBR_PATTERN_MAX]; + struct rte_flow_action actions[ZD_PBR_ACTION_MAX]; + int item_cnt = 0; + int act_cnt = 0; + struct in_addr tmp_mask; + const struct ethaddr *mac; + struct rte_flow *flow; + struct rte_flow_error error; + struct zd_dpdk_port *in_dport; + struct zd_dpdk_port *out_dport; + uint32_t pri = dplane_ctx_rule_get_priority(ctx); + int seq = dplane_ctx_rule_get_seq(ctx); + int unique = dplane_ctx_rule_get_unique(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow create ifname %s seq %d pri %u unique %d\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, unique); + in_dport = zd_dpdk_port_find_by_index(in_ifindex); + if (!in_dport) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow create ifname %s seq %d pri %u unique %d failed; in_port %d missing\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, + unique, in_ifindex); + return; + } + + out_dport = zd_dpdk_port_find_by_index(out_ifindex); + if (!out_dport) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow create ifname %s seq %d pri %u unique %d failed; out_port %d missing\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, + unique, out_ifindex); + return; + } + + /*********************** match items **************************/ + memset(ð, 0, sizeof(eth)); + memset(ð_mask, 0, sizeof(eth_mask)); + eth.type = eth_mask.type = htons(RTE_ETHER_TYPE_IPV4); + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_ETH; + items[item_cnt].spec = ð + items[item_cnt].mask = ð_mask; + items[item_cnt].last = NULL; + ++item_cnt; + + memset(&ip, 0, sizeof(ip)); + memset(&ip_mask, 0, sizeof(ip_mask)); + if (filter_bm & PBR_FILTER_SRC_IP) { + const struct prefix *src_ip; + + src_ip = dplane_ctx_rule_get_src_ip(ctx); + ip.hdr.src_addr = src_ip->u.prefix4.s_addr; + masklen2ip(src_ip->prefixlen, &tmp_mask); + ip_mask.hdr.src_addr = tmp_mask.s_addr; + } + if (filter_bm & PBR_FILTER_DST_IP) { + const struct prefix *dst_ip; + + dst_ip = dplane_ctx_rule_get_dst_ip(ctx); + ip.hdr.dst_addr = dst_ip->u.prefix4.s_addr; + masklen2ip(dst_ip->prefixlen, &tmp_mask); + ip_mask.hdr.dst_addr = tmp_mask.s_addr; + } + if (filter_bm & PBR_FILTER_IP_PROTOCOL) { + ip.hdr.next_proto_id = dplane_ctx_rule_get_ipproto(ctx); + ip_mask.hdr.next_proto_id = UINT8_MAX; + } + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_IPV4; + items[item_cnt].spec = &ip; + items[item_cnt].mask = &ip_mask; + items[item_cnt].last = NULL; + ++item_cnt; + + if ((filter_bm & (PBR_FILTER_SRC_PORT | PBR_FILTER_DST_PORT))) { + memset(&udp, 0, sizeof(udp)); + memset(&udp_mask, 0, sizeof(udp_mask)); + if (filter_bm & PBR_FILTER_SRC_PORT) { + udp.hdr.src_port = + RTE_BE16(dplane_ctx_rule_get_src_port(ctx)); + udp_mask.hdr.src_port = UINT16_MAX; + } + if (filter_bm & PBR_FILTER_DST_PORT) { + udp.hdr.dst_port = + RTE_BE16(dplane_ctx_rule_get_dst_port(ctx)); + udp_mask.hdr.dst_port = UINT16_MAX; + } + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_UDP; + items[item_cnt].spec = &udp; + items[item_cnt].mask = &udp_mask; + items[item_cnt].last = NULL; + ++item_cnt; + } + + items[item_cnt].type = RTE_FLOW_ITEM_TYPE_END; + + /*************************** actions *****************************/ + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_COUNT; + memset(&conf_count, 0, sizeof(conf_count)); + actions[act_cnt].conf = &conf_count; + ++act_cnt; + + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_DEC_TTL; + ++act_cnt; + + mac = dplane_ctx_rule_get_smac(ctx); + memcpy(conf_smac.mac_addr, mac, RTE_ETHER_ADDR_LEN); + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_SET_MAC_SRC; + actions[act_cnt].conf = &conf_smac; + ++act_cnt; + + mac = dplane_ctx_rule_get_dmac(ctx); + memcpy(conf_dmac.mac_addr, mac, RTE_ETHER_ADDR_LEN); + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_SET_MAC_DST; + actions[act_cnt].conf = &conf_dmac; + ++act_cnt; + + memset(&conf_port, 0, sizeof(conf_port)); + conf_port.id = out_dport->port_id; + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_PORT_ID; + actions[act_cnt].conf = &conf_port; + ++act_cnt; + + actions[act_cnt].type = RTE_FLOW_ACTION_TYPE_END; + + frr_with_privs (&zserv_privs) { + flow = rte_flow_create(in_dport->port_id, &attrs, items, + actions, &error); + } + + if (flow) { + dplane_ctx_rule_set_dp_flow_ptr(ctx, (intptr_t)flow); + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow 0x%" PRIxPTR + " created ifname %s seq %d pri %u unique %d\n", + (intptr_t)flow, dplane_ctx_rule_get_ifname(ctx), + seq, pri, unique); + } else { + zlog_warn( + "PBR dpdk flow create failed ifname %s seq %d pri %u unique %d; rc %d\n", + dplane_ctx_rule_get_ifname(ctx), seq, pri, unique, + error.type); + } +} + + +static void zd_dpdk_rule_del(struct zebra_dplane_ctx *ctx, const char *ifname, + int in_ifindex, intptr_t dp_flow_ptr) +{ + struct zd_dpdk_port *in_dport; + struct rte_flow_error error; + int rc; + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow delete ifname %s ifindex %d dp_flow 0x%" PRIxPTR + "\n", + ifname, in_ifindex, dp_flow_ptr); + + if (!dp_flow_ptr) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow delete failed; ifname %s ifindex %d dp_flow 0x%" PRIxPTR + "; empty dp\n", + ifname, in_ifindex, dp_flow_ptr); + return; + } + + dplane_ctx_rule_set_dp_flow_ptr(ctx, (intptr_t)NULL); + in_dport = zd_dpdk_port_find_by_index(in_ifindex); + if (!in_dport) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug( + "PBR dpdk flow delete failed; ifname %s ifindex %d dp_flow 0x%" PRIxPTR + " in port missing\n", + ifname, in_ifindex, dp_flow_ptr); + return; + } + + frr_with_privs (&zserv_privs) { + rc = rte_flow_destroy(in_dport->port_id, + (struct rte_flow *)dp_flow_ptr, &error); + } + + if (rc) + zlog_warn( + "PBR dpdk flow delete failed; ifname %s ifindex %d dp_flow 0x%" PRIxPTR + "\n", + ifname, in_ifindex, dp_flow_ptr); +} + + +static void zd_dpdk_rule_update(struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + int in_ifindex; + intptr_t dp_flow_ptr; + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug("Dplane %s", dplane_op2str(dplane_ctx_get_op(ctx))); + + + op = dplane_ctx_get_op(ctx); + switch (op) { + case DPLANE_OP_RULE_ADD: + atomic_fetch_add_explicit(&dpdk_stat->rule_adds, 1, + memory_order_relaxed); + zd_dpdk_rule_add(ctx); + break; + + case DPLANE_OP_RULE_UPDATE: + /* delete old rule and install new one */ + atomic_fetch_add_explicit(&dpdk_stat->rule_adds, 1, + memory_order_relaxed); + in_ifindex = dplane_ctx_get_ifindex(ctx); + dp_flow_ptr = dplane_ctx_rule_get_old_dp_flow_ptr(ctx); + zd_dpdk_rule_del(ctx, dplane_ctx_rule_get_ifname(ctx), + in_ifindex, dp_flow_ptr); + zd_dpdk_rule_add(ctx); + break; + + case DPLANE_OP_RULE_DELETE: + atomic_fetch_add_explicit(&dpdk_stat->rule_dels, 1, + memory_order_relaxed); + in_ifindex = dplane_ctx_get_ifindex(ctx); + dp_flow_ptr = dplane_ctx_rule_get_dp_flow_ptr(ctx); + zd_dpdk_rule_del(ctx, dplane_ctx_rule_get_ifname(ctx), + in_ifindex, dp_flow_ptr); + break; + + case DPLANE_OP_NONE: + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + break; + } +} + + +/* DPDK provider callback. + */ +static void zd_dpdk_process_update(struct zebra_dplane_ctx *ctx) +{ + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_RULE_DELETE: + zd_dpdk_rule_update(ctx); + break; + case DPLANE_OP_NONE: + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + atomic_fetch_add_explicit(&dpdk_stat->ignored_updates, 1, + memory_order_relaxed); + + break; + } +} + + +static int zd_dpdk_process(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + int counter, limit; + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK_DETAIL) + zlog_debug("processing %s", dplane_provider_get_name(prov)); + + limit = dplane_provider_get_work_limit(prov); + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (!ctx) + break; + + zd_dpdk_process_update(ctx); + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + return 0; +} + +static void zd_dpdk_port_show_entry(struct zd_dpdk_port *dport, struct vty *vty, + int detail) +{ + struct rte_eth_dev_info *dev_info; + + dev_info = &dport->dev_info; + if (detail) { + vty_out(vty, "DPDK port: %u\n", dport->port_id); + vty_out(vty, " Device: %s\n", + dev_info->device ? dev_info->device->name : "-"); + vty_out(vty, " Driver: %s\n", + dev_info->driver_name ? dev_info->driver_name : "-"); + vty_out(vty, " Interface: %s (%d)\n", + ifindex2ifname(dev_info->if_index, VRF_DEFAULT), + dev_info->if_index); + vty_out(vty, " Switch: %s Domain: %u Port: %u\n", + dev_info->switch_info.name, + dev_info->switch_info.domain_id, + dev_info->switch_info.port_id); + vty_out(vty, "\n"); + } else { + vty_out(vty, "%-4u %-16s %-16s %-16d %s,%u,%u\n", + dport->port_id, + dev_info->device ? dev_info->device->name : "-", + ifindex2ifname(dev_info->if_index, VRF_DEFAULT), + dev_info->if_index, dev_info->switch_info.name, + dev_info->switch_info.domain_id, + dev_info->switch_info.port_id); + } +} + + +static struct zd_dpdk_port *zd_dpdk_port_find_by_index(int ifindex) +{ + int count; + struct zd_dpdk_port *dport; + struct rte_eth_dev_info *dev_info; + + for (count = 0; count < RTE_MAX_ETHPORTS; ++count) { + dport = &dpdk_ctx->dpdk_ports[count]; + if (!(dport->flags & ZD_DPDK_PORT_FLAG_INITED)) + continue; + dev_info = &dport->dev_info; + if (dev_info->if_index == (uint32_t)ifindex) + return dport; + } + + return NULL; +} + + +void zd_dpdk_port_show(struct vty *vty, uint16_t port_id, bool uj, int detail) +{ + int count; + struct zd_dpdk_port *dport; + + /* XXX - support for json is yet to be added */ + if (uj) + return; + + if (!detail) { + vty_out(vty, "%-4s %-16s %-16s %-16s %s\n", "Port", "Device", + "IfName", "IfIndex", "sw,domain,port"); + } + + for (count = 0; count < RTE_MAX_ETHPORTS; ++count) { + dport = &dpdk_ctx->dpdk_ports[count]; + if (dport->flags & ZD_DPDK_PORT_FLAG_INITED) + zd_dpdk_port_show_entry(dport, vty, detail); + } +} + + +static void zd_dpdk_port_init(void) +{ + struct zd_dpdk_port *dport; + uint16_t port_id; + struct rte_eth_dev_info *dev_info; + int count; + int rc; + struct rte_flow_error error; + + /* allocate a list of ports */ + dpdk_ctx->dpdk_ports = + XCALLOC(MTYPE_DPDK_PORTS, + sizeof(struct zd_dpdk_port) * RTE_MAX_ETHPORTS); + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("dpdk port init"); + count = 0; + RTE_ETH_FOREACH_DEV(port_id) + { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("dpdk port init %d", port_id); + dport = &dpdk_ctx->dpdk_ports[count]; + count++; + dport->port_id = port_id; + dport->flags |= ZD_DPDK_PORT_FLAG_PROBED; + dev_info = &dport->dev_info; + if (rte_eth_dev_info_get(port_id, dev_info) < 0) { + zlog_warn("failed to get dev info for %u, %s", port_id, + rte_strerror(rte_errno)); + continue; + } + dport->flags |= ZD_DPDK_PORT_FLAG_INITED; + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug( + "port %u, dev %s, ifI %d, sw_name %s, sw_domain %u, sw_port %u", + port_id, + dev_info->device ? dev_info->device->name : "-", + dev_info->if_index, dev_info->switch_info.name, + dev_info->switch_info.domain_id, + dev_info->switch_info.port_id); + if (rte_flow_isolate(port_id, 1, &error)) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug( + "Flow isolate on port %u failed %d\n", + port_id, error.type); + } else { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("Flow isolate on port %u\n", + port_id); + } + rc = rte_eth_dev_start(port_id); + if (rc) { + zlog_warn("DPDK port %d start error: %s", port_id, + rte_strerror(-rc)); + continue; + } + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("DPDK port %d started in promiscuous mode ", + port_id); + } + + if (!count) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("no probed ethernet devices"); + } +} + + +static int zd_dpdk_init(void) +{ + int rc; + static const char *argv[] = {(char *)"/usr/lib/frr/zebra", + (char *)"--"}; + + zd_dpdk_vty_init(); + + frr_with_privs (&zserv_privs) { + rc = rte_eal_init(ARRAY_SIZE(argv), argv); + } + if (rc < 0) { + zlog_warn("EAL init failed %s", rte_strerror(rte_errno)); + return -1; + } + + frr_with_privs (&zserv_privs) { + zd_dpdk_port_init(); + } + return 0; +} + + +static int zd_dpdk_start(struct zebra_dplane_provider *prov) +{ + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s start", dplane_provider_get_name(prov)); + + return zd_dpdk_init(); +} + + +static int zd_dpdk_finish(struct zebra_dplane_provider *prov, bool early) +{ + int rc; + + if (early) { + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s early finish", + dplane_provider_get_name(prov)); + + return 0; + } + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s finish", dplane_provider_get_name(prov)); + + + frr_with_privs (&zserv_privs) { + rc = rte_eal_cleanup(); + } + if (rc < 0) + zlog_warn("EAL cleanup failed %s", rte_strerror(rte_errno)); + + return 0; +} + + +static int zd_dpdk_plugin_init(struct thread_master *tm) +{ + int ret; + + ret = dplane_provider_register( + plugin_name, DPLANE_PRIO_KERNEL, DPLANE_PROV_FLAGS_DEFAULT, + zd_dpdk_start, zd_dpdk_process, zd_dpdk_finish, dpdk_ctx, NULL); + + if (IS_ZEBRA_DEBUG_DPLANE_DPDK) + zlog_debug("%s register status %d", plugin_name, ret); + + return 0; +} + + +static int zd_dpdk_module_init(void) +{ + hook_register(frr_late_init, zd_dpdk_plugin_init); + return 0; +} + +FRR_MODULE_SETUP(.name = "dplane_dpdk", .version = "0.0.1", + .description = "Data plane plugin using dpdk for hw offload", + .init = zd_dpdk_module_init); diff --git a/zebra/dpdk/zebra_dplane_dpdk.h b/zebra/dpdk/zebra_dplane_dpdk.h new file mode 100644 index 0000000..40f9263 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk.h @@ -0,0 +1,36 @@ +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_DPLANE_DPDK_H +#define _ZEBRA_DPLANE_DPDK_H + +#include <zebra.h> + + +#define ZD_DPDK_INVALID_PORT 0xffff + +extern void zd_dpdk_pbr_flows_show(struct vty *vty); +extern void zd_dpdk_port_show(struct vty *vty, uint16_t port_id, bool uj, + int detail); +extern void zd_dpdk_stat_show(struct vty *vty); +extern void zd_dpdk_vty_init(void); + +#endif diff --git a/zebra/dpdk/zebra_dplane_dpdk_private.h b/zebra/dpdk/zebra_dplane_dpdk_private.h new file mode 100644 index 0000000..b8483f4 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk_private.h @@ -0,0 +1,61 @@ +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_DPLANE_DPDK_PRIVATE_H +#define _ZEBRA_DPLANE_DPDK_PRIVATE_H + +#include <zebra.h> + +#include <rte_ethdev.h> + +#include "zebra_dplane_dpdk.h" + +/* match on eth, sip, dip, udp */ +#define ZD_PBR_PATTERN_MAX 6 +/* dec_ttl, set_smac, set_dmac, * phy_port, count + */ +#define ZD_PBR_ACTION_MAX 6 + +#define ZD_ETH_TYPE_IP 0x800 + +struct zd_dpdk_port { + uint16_t port_id; /* dpdk port_id */ + struct rte_eth_dev_info dev_info; /* PCI info + driver name */ + uint32_t flags; +#define ZD_DPDK_PORT_FLAG_PROBED (1 << 0) +#define ZD_DPDK_PORT_FLAG_INITED (1 << 1) +}; + +struct zd_dpdk_stat { + _Atomic uint32_t ignored_updates; + + _Atomic uint32_t rule_adds; + _Atomic uint32_t rule_dels; +}; + +struct zd_dpdk_ctx { + /* Stats */ + struct zd_dpdk_stat stats; + struct zd_dpdk_port *dpdk_ports; + int dpdk_logtype; +}; + +#endif diff --git a/zebra/dpdk/zebra_dplane_dpdk_vty.c b/zebra/dpdk/zebra_dplane_dpdk_vty.c new file mode 100644 index 0000000..748bce9 --- /dev/null +++ b/zebra/dpdk/zebra_dplane_dpdk_vty.c @@ -0,0 +1,85 @@ +/* + * Zebra dataplane plugin for DPDK based hw offload + * + * Copyright (C) 2021 Nvidia + * Donald Sharp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <zebra.h> + +#include "lib/json.h" +#include "zebra/dpdk/zebra_dplane_dpdk.h" + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/dpdk/zebra_dplane_dpdk_vty_clippy.c" +#endif + +#define ZD_STR "Zebra dataplane information\n" +#define ZD_DPDK_STR "DPDK offload information\n" + +DEFPY(zd_dpdk_show_counters, zd_dpdk_show_counters_cmd, + "show dplane dpdk counters", + SHOW_STR ZD_STR ZD_DPDK_STR "show counters\n") +{ + zd_dpdk_stat_show(vty); + + return CMD_SUCCESS; +} + + +DEFPY (zd_dpdk_show_ports, + zd_dpdk_show_ports_cmd, + "show dplane dpdk port [(1-32)$port_id] [detail$detail] [json$json]", + SHOW_STR + ZD_STR + ZD_DPDK_STR + "show port info\n" + "DPDK port identifier\n" + "Detailed information\n" + JSON_STR) +{ + bool uj = !!json; + bool ud = !!detail; + + if (!port_id) + port_id = ZD_DPDK_INVALID_PORT; + zd_dpdk_port_show(vty, port_id, uj, ud); + + return CMD_SUCCESS; +} + + +DEFPY (zd_dpdk_show_pbr_flows, + zd_dpdk_show_pbr_flows_cmd, + "show dplane dpdk pbr flows", + SHOW_STR + ZD_STR + ZD_DPDK_STR + "show pbr info\n" + "DPDK flows\n") +{ + zd_dpdk_pbr_flows_show(vty); + + return CMD_SUCCESS; +} + + +void zd_dpdk_vty_init(void) +{ + install_element(VIEW_NODE, &zd_dpdk_show_counters_cmd); + install_element(VIEW_NODE, &zd_dpdk_show_ports_cmd); + install_element(VIEW_NODE, &zd_dpdk_show_pbr_flows_cmd); +} diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c new file mode 100644 index 0000000..c5e1c11 --- /dev/null +++ b/zebra/dplane_fpm_nl.c @@ -0,0 +1,1511 @@ +/* + * Zebra dataplane plugin for Forwarding Plane Manager (FPM) using netlink. + * + * Copyright (C) 2019 Network Device Education Foundation, Inc. ("NetDEF") + * Rafael Zalamena + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" /* Include this explicitly */ +#endif + +#include <arpa/inet.h> + +#include <sys/types.h> +#include <sys/socket.h> + +#include <errno.h> +#include <string.h> + +#include "lib/zebra.h" +#include "lib/json.h" +#include "lib/libfrr.h" +#include "lib/frratomic.h" +#include "lib/command.h" +#include "lib/memory.h" +#include "lib/network.h" +#include "lib/ns.h" +#include "lib/frr_pthread.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/debug.h" + +#define SOUTHBOUND_DEFAULT_ADDR INADDR_LOOPBACK +#define SOUTHBOUND_DEFAULT_PORT 2620 + +/** + * FPM header: + * { + * version: 1 byte (always 1), + * type: 1 byte (1 for netlink, 2 protobuf), + * len: 2 bytes (network order), + * } + * + * This header is used with any format to tell the users how many bytes to + * expect. + */ +#define FPM_HEADER_SIZE 4 + +static const char *prov_name = "dplane_fpm_nl"; + +struct fpm_nl_ctx { + /* data plane connection. */ + int socket; + bool disabled; + bool connecting; + bool use_nhg; + struct sockaddr_storage addr; + + /* data plane buffers. */ + struct stream *ibuf; + struct stream *obuf; + pthread_mutex_t obuf_mutex; + + /* + * data plane context queue: + * When a FPM server connection becomes a bottleneck, we must keep the + * data plane contexts until we get a chance to process them. + */ + struct dplane_ctx_q ctxqueue; + pthread_mutex_t ctxqueue_mutex; + + /* data plane events. */ + struct zebra_dplane_provider *prov; + struct frr_pthread *fthread; + struct thread *t_connect; + struct thread *t_read; + struct thread *t_write; + struct thread *t_event; + struct thread *t_nhg; + struct thread *t_dequeue; + + /* zebra events. */ + struct thread *t_lspreset; + struct thread *t_lspwalk; + struct thread *t_nhgreset; + struct thread *t_nhgwalk; + struct thread *t_ribreset; + struct thread *t_ribwalk; + struct thread *t_rmacreset; + struct thread *t_rmacwalk; + + /* Statistic counters. */ + struct { + /* Amount of bytes read into ibuf. */ + _Atomic uint32_t bytes_read; + /* Amount of bytes written from obuf. */ + _Atomic uint32_t bytes_sent; + /* Output buffer current usage. */ + _Atomic uint32_t obuf_bytes; + /* Output buffer peak usage. */ + _Atomic uint32_t obuf_peak; + + /* Amount of connection closes. */ + _Atomic uint32_t connection_closes; + /* Amount of connection errors. */ + _Atomic uint32_t connection_errors; + + /* Amount of user configurations: FNE_RECONNECT. */ + _Atomic uint32_t user_configures; + /* Amount of user disable requests: FNE_DISABLE. */ + _Atomic uint32_t user_disables; + + /* Amount of data plane context processed. */ + _Atomic uint32_t dplane_contexts; + /* Amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len; + /* Peak amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len_peak; + + /* Amount of buffer full events. */ + _Atomic uint32_t buffer_full; + } counters; +} *gfnc; + +enum fpm_nl_events { + /* Ask for FPM to reconnect the external server. */ + FNE_RECONNECT, + /* Disable FPM. */ + FNE_DISABLE, + /* Reset counters. */ + FNE_RESET_COUNTERS, + /* Toggle next hop group feature. */ + FNE_TOGGLE_NHG, + /* Reconnect request by our own code to avoid races. */ + FNE_INTERNAL_RECONNECT, + + /* LSP walk finished. */ + FNE_LSP_FINISHED, + /* Next hop groups walk finished. */ + FNE_NHG_FINISHED, + /* RIB walk finished. */ + FNE_RIB_FINISHED, + /* RMAC walk finished. */ + FNE_RMAC_FINISHED, +}; + +#define FPM_RECONNECT(fnc) \ + thread_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \ + FNE_INTERNAL_RECONNECT, &(fnc)->t_event) + +#define WALK_FINISH(fnc, ev) \ + thread_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \ + (ev), NULL) + +/* + * Prototypes. + */ +static void fpm_process_event(struct thread *t); +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx); +static void fpm_lsp_send(struct thread *t); +static void fpm_lsp_reset(struct thread *t); +static void fpm_nhg_send(struct thread *t); +static void fpm_nhg_reset(struct thread *t); +static void fpm_rib_send(struct thread *t); +static void fpm_rib_reset(struct thread *t); +static void fpm_rmac_send(struct thread *t); +static void fpm_rmac_reset(struct thread *t); + +/* + * CLI. + */ +#define FPM_STR "Forwarding Plane Manager configuration\n" + +DEFUN(fpm_set_address, fpm_set_address_cmd, + "fpm address <A.B.C.D|X:X::X:X> [port (1-65535)]", + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + uint16_t port = 0; + uint8_t naddr[INET6_BUFSIZ]; + + if (argc == 5) + port = strtol(argv[4]->arg, NULL, 10); + + /* Handle IPv4 addresses. */ + if (inet_pton(AF_INET, argv[2]->arg, naddr) == 1) { + sin = (struct sockaddr_in *)&gfnc->addr; + + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_port = + port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin->sin_len = sizeof(*sin); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin->sin_addr, naddr, sizeof(sin->sin_addr)); + + goto ask_reconnect; + } + + /* Handle IPv6 addresses. */ + if (inet_pton(AF_INET6, argv[2]->arg, naddr) != 1) { + vty_out(vty, "%% Invalid address: %s\n", argv[2]->arg); + return CMD_WARNING; + } + + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin6->sin6_len = sizeof(*sin6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin6->sin6_addr, naddr, sizeof(sin6->sin6_addr)); + +ask_reconnect: + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RECONNECT, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(no_fpm_set_address, no_fpm_set_address_cmd, + "no fpm address [<A.B.C.D|X:X::X:X> [port <1-65535>]]", + NO_STR + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_DISABLE, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_use_nhg, fpm_use_nhg_cmd, + "fpm use-next-hop-groups", + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already enabled. */ + if (gfnc->use_nhg) + return CMD_SUCCESS; + + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_nhg); + + return CMD_SUCCESS; +} + +DEFUN(no_fpm_use_nhg, no_fpm_use_nhg_cmd, + "no fpm use-next-hop-groups", + NO_STR + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already disabled. */ + if (!gfnc->use_nhg) + return CMD_SUCCESS; + + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_nhg); + + return CMD_SUCCESS; +} + +DEFUN(fpm_reset_counters, fpm_reset_counters_cmd, + "clear fpm counters", + CLEAR_STR + FPM_STR + "FPM statistic counters\n") +{ + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RESET_COUNTERS, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters, fpm_show_counters_cmd, + "show fpm counters", + SHOW_STR + FPM_STR + "FPM statistic counters\n") +{ + vty_out(vty, "%30s\n%30s\n", "FPM counters", "============"); + +#define SHOW_COUNTER(label, counter) \ + vty_out(vty, "%28s: %u\n", (label), (counter)) + + SHOW_COUNTER("Input bytes", gfnc->counters.bytes_read); + SHOW_COUNTER("Output bytes", gfnc->counters.bytes_sent); + SHOW_COUNTER("Output buffer current size", gfnc->counters.obuf_bytes); + SHOW_COUNTER("Output buffer peak size", gfnc->counters.obuf_peak); + SHOW_COUNTER("Connection closes", gfnc->counters.connection_closes); + SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors); + SHOW_COUNTER("Data plane items processed", + gfnc->counters.dplane_contexts); + SHOW_COUNTER("Data plane items enqueued", + gfnc->counters.ctxqueue_len); + SHOW_COUNTER("Data plane items queue peak", + gfnc->counters.ctxqueue_len_peak); + SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full); + SHOW_COUNTER("User FPM configurations", gfnc->counters.user_configures); + SHOW_COUNTER("User FPM disable requests", gfnc->counters.user_disables); + +#undef SHOW_COUNTER + + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd, + "show fpm counters json", + SHOW_STR + FPM_STR + "FPM statistic counters\n" + JSON_STR) +{ + struct json_object *jo; + + jo = json_object_new_object(); + json_object_int_add(jo, "bytes-read", gfnc->counters.bytes_read); + json_object_int_add(jo, "bytes-sent", gfnc->counters.bytes_sent); + json_object_int_add(jo, "obuf-bytes", gfnc->counters.obuf_bytes); + json_object_int_add(jo, "obuf-bytes-peak", gfnc->counters.obuf_peak); + json_object_int_add(jo, "connection-closes", + gfnc->counters.connection_closes); + json_object_int_add(jo, "connection-errors", + gfnc->counters.connection_errors); + json_object_int_add(jo, "data-plane-contexts", + gfnc->counters.dplane_contexts); + json_object_int_add(jo, "data-plane-contexts-queue", + gfnc->counters.ctxqueue_len); + json_object_int_add(jo, "data-plane-contexts-queue-peak", + gfnc->counters.ctxqueue_len_peak); + json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full); + json_object_int_add(jo, "user-configures", + gfnc->counters.user_configures); + json_object_int_add(jo, "user-disables", gfnc->counters.user_disables); + vty_json(vty, jo); + + return CMD_SUCCESS; +} + +static int fpm_write_config(struct vty *vty) +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + int written = 0; + + if (gfnc->disabled) + return written; + + switch (gfnc->addr.ss_family) { + case AF_INET: + written = 1; + sin = (struct sockaddr_in *)&gfnc->addr; + vty_out(vty, "fpm address %pI4", &sin->sin_addr); + if (sin->sin_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin->sin_port)); + + vty_out(vty, "\n"); + break; + case AF_INET6: + written = 1; + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + vty_out(vty, "fpm address %pI6", &sin6->sin6_addr); + if (sin6->sin6_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin6->sin6_port)); + + vty_out(vty, "\n"); + break; + + default: + break; + } + + if (!gfnc->use_nhg) { + vty_out(vty, "no fpm use-next-hop-groups\n"); + written = 1; + } + + return written; +} + +static struct cmd_node fpm_node = { + .name = "fpm", + .node = FPM_NODE, + .prompt = "", + .config_write = fpm_write_config, +}; + +/* + * FPM functions. + */ +static void fpm_connect(struct thread *t); + +static void fpm_reconnect(struct fpm_nl_ctx *fnc) +{ + /* Cancel all zebra threads first. */ + thread_cancel_async(zrouter.master, &fnc->t_lspreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_lspwalk, NULL); + thread_cancel_async(zrouter.master, &fnc->t_nhgreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_nhgwalk, NULL); + thread_cancel_async(zrouter.master, &fnc->t_ribreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL); + thread_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL); + + /* + * Grab the lock to empty the streams (data plane might try to + * enqueue updates while we are closing). + */ + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + /* Avoid calling close on `-1`. */ + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + stream_reset(fnc->ibuf); + stream_reset(fnc->obuf); + THREAD_OFF(fnc->t_read); + THREAD_OFF(fnc->t_write); + + /* FPM is disabled, don't attempt to connect. */ + if (fnc->disabled) + return; + + thread_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); +} + +static void fpm_read(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + ssize_t rv; + + /* Let's ignore the input at the moment. */ + rv = stream_read_try(fnc->ibuf, fnc->socket, + STREAM_WRITEABLE(fnc->ibuf)); + /* We've got an interruption. */ + if (rv == -2) { + /* Schedule next read. */ + thread_add_read(fnc->fthread->master, fpm_read, fnc, + fnc->socket, &fnc->t_read); + return; + } + if (rv == 0) { + atomic_fetch_add_explicit(&fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + + FPM_RECONNECT(fnc); + return; + } + if (rv == -1) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + FPM_RECONNECT(fnc); + return; + } + stream_reset(fnc->ibuf); + + /* Account all bytes read. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_read, rv, + memory_order_relaxed); + + thread_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, + &fnc->t_read); +} + +static void fpm_write(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + socklen_t statuslen; + ssize_t bwritten; + int rv, status; + size_t btotal; + + if (fnc->connecting == true) { + status = 0; + statuslen = sizeof(status); + + rv = getsockopt(fnc->socket, SOL_SOCKET, SO_ERROR, &status, + &statuslen); + if (rv == -1 || status != 0) { + if (rv != -1) + zlog_warn("%s: connection failed: %s", __func__, + strerror(status)); + else + zlog_warn("%s: SO_ERROR failed: %s", __func__, + strerror(status)); + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + + FPM_RECONNECT(fnc); + return; + } + + fnc->connecting = false; + + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + */ + thread_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); + + /* Permit receiving messages now. */ + thread_add_read(fnc->fthread->master, fpm_read, fnc, + fnc->socket, &fnc->t_read); + } + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + while (true) { + /* Stream is empty: reset pointers and return. */ + if (STREAM_READABLE(fnc->obuf) == 0) { + stream_reset(fnc->obuf); + break; + } + + /* Try to write all at once. */ + btotal = stream_get_endp(fnc->obuf) - + stream_get_getp(fnc->obuf); + bwritten = write(fnc->socket, stream_pnt(fnc->obuf), btotal); + if (bwritten == 0) { + atomic_fetch_add_explicit( + &fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + break; + } + if (bwritten == -1) { + /* Attempt to continue if blocked by a signal. */ + if (errno == EINTR) + continue; + /* Receiver is probably slow, lets give it some time. */ + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + + FPM_RECONNECT(fnc); + return; + } + + /* Account all bytes sent. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_sent, bwritten, + memory_order_relaxed); + + /* Account number of bytes free. */ + atomic_fetch_sub_explicit(&fnc->counters.obuf_bytes, bwritten, + memory_order_relaxed); + + stream_forward_getp(fnc->obuf, (size_t)bwritten); + } + + /* Stream is not empty yet, we must schedule more writes. */ + if (STREAM_READABLE(fnc->obuf)) { + stream_pulldown(fnc->obuf); + thread_add_write(fnc->fthread->master, fpm_write, fnc, + fnc->socket, &fnc->t_write); + return; + } +} + +static void fpm_connect(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct sockaddr_in *sin = (struct sockaddr_in *)&fnc->addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&fnc->addr; + socklen_t slen; + int rv, sock; + char addrstr[INET6_ADDRSTRLEN]; + + sock = socket(fnc->addr.ss_family, SOCK_STREAM, 0); + if (sock == -1) { + zlog_err("%s: fpm socket failed: %s", __func__, + strerror(errno)); + thread_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return; + } + + set_nonblocking(sock); + + if (fnc->addr.ss_family == AF_INET) { + inet_ntop(AF_INET, &sin->sin_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin); + } else { + inet_ntop(AF_INET6, &sin6->sin6_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin6); + } + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: attempting to connect to %s:%d", __func__, + addrstr, ntohs(sin->sin_port)); + + rv = connect(sock, (struct sockaddr *)&fnc->addr, slen); + if (rv == -1 && errno != EINPROGRESS) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + close(sock); + zlog_warn("%s: fpm connection failed: %s", __func__, + strerror(errno)); + thread_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return; + } + + fnc->connecting = (errno == EINPROGRESS); + fnc->socket = sock; + if (!fnc->connecting) + thread_add_read(fnc->fthread->master, fpm_read, fnc, sock, + &fnc->t_read); + thread_add_write(fnc->fthread->master, fpm_write, fnc, sock, + &fnc->t_write); + + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + * + * If we are not connected, then delay the objects reset/send. + */ + if (!fnc->connecting) + thread_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); +} + +/** + * Encode data plane operation context into netlink and enqueue it in the FPM + * output buffer. + * + * @param fnc the netlink FPM context. + * @param ctx the data plane operation context data. + * @return 0 on success or -1 on not enough space. + */ +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) +{ + uint8_t nl_buf[NL_PKT_BUF_SIZE]; + size_t nl_buf_len; + ssize_t rv; + uint64_t obytes, obytes_peak; + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + /* + * If we were configured to not use next hop groups, then quit as soon + * as possible. + */ + if ((!fnc->use_nhg) + && (op == DPLANE_OP_NH_DELETE || op == DPLANE_OP_NH_INSTALL + || op == DPLANE_OP_NH_UPDATE)) + return 0; + + nl_buf_len = 0; + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + switch (op) { + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + rv = netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, + nl_buf, sizeof(nl_buf), + true, fnc->use_nhg); + if (rv <= 0) { + zlog_err( + "%s: netlink_route_multipath_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + + /* UPDATE operations need a INSTALL, otherwise just quit. */ + if (op == DPLANE_OP_ROUTE_DELETE) + break; + + /* FALL THROUGH */ + case DPLANE_OP_ROUTE_INSTALL: + rv = netlink_route_multipath_msg_encode( + RTM_NEWROUTE, ctx, &nl_buf[nl_buf_len], + sizeof(nl_buf) - nl_buf_len, true, fnc->use_nhg); + if (rv <= 0) { + zlog_err( + "%s: netlink_route_multipath_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + rv = netlink_macfdb_update_ctx(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_macfdb_update_ctx failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_NH_DELETE: + rv = netlink_nexthop_msg_encode(RTM_DELNEXTHOP, ctx, nl_buf, + sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + rv = netlink_nexthop_msg_encode(RTM_NEWNEXTHOP, ctx, nl_buf, + sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + rv = netlink_lsp_msg_encoder(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_lsp_msg_encoder failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + /* Un-handled by FPM at this time. */ + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + case DPLANE_OP_NONE: + break; + + } + + /* Skip empty enqueues. */ + if (nl_buf_len == 0) + return 0; + + /* We must know if someday a message goes beyond 65KiB. */ + assert((nl_buf_len + FPM_HEADER_SIZE) <= UINT16_MAX); + + /* Check if we have enough buffer space. */ + if (STREAM_WRITEABLE(fnc->obuf) < (nl_buf_len + FPM_HEADER_SIZE)) { + atomic_fetch_add_explicit(&fnc->counters.buffer_full, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: buffer full: wants to write %zu but has %zu", + __func__, nl_buf_len + FPM_HEADER_SIZE, + STREAM_WRITEABLE(fnc->obuf)); + + return -1; + } + + /* + * Fill in the FPM header information. + * + * See FPM_HEADER_SIZE definition for more information. + */ + stream_putc(fnc->obuf, 1); + stream_putc(fnc->obuf, 1); + stream_putw(fnc->obuf, nl_buf_len + FPM_HEADER_SIZE); + + /* Write current data. */ + stream_write(fnc->obuf, nl_buf, (size_t)nl_buf_len); + + /* Account number of bytes waiting to be written. */ + atomic_fetch_add_explicit(&fnc->counters.obuf_bytes, + nl_buf_len + FPM_HEADER_SIZE, + memory_order_relaxed); + obytes = atomic_load_explicit(&fnc->counters.obuf_bytes, + memory_order_relaxed); + obytes_peak = atomic_load_explicit(&fnc->counters.obuf_peak, + memory_order_relaxed); + if (obytes_peak < obytes) + atomic_store_explicit(&fnc->counters.obuf_peak, obytes, + memory_order_relaxed); + + /* Tell the thread to start writing. */ + thread_add_write(fnc->fthread->master, fpm_write, fnc, fnc->socket, + &fnc->t_write); + + return 0; +} + +/* + * LSP walk/send functions + */ +struct fpm_lsp_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_lsp_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_lsp *lsp = bucket->data; + struct fpm_lsp_arg *fla = arg; + + /* Skip entries which have already been sent */ + if (CHECK_FLAG(lsp->flags, LSP_FLAG_FPM)) + return HASHWALK_CONTINUE; + + dplane_ctx_reset(fla->ctx); + dplane_ctx_lsp_init(fla->ctx, DPLANE_OP_LSP_INSTALL, lsp); + + if (fpm_nl_enqueue(fla->fnc, fla->ctx) == -1) { + fla->complete = false; + return HASHWALK_ABORT; + } + + /* Mark entry as sent */ + SET_FLAG(lsp->flags, LSP_FLAG_FPM); + return HASHWALK_CONTINUE; +} + +static void fpm_lsp_send(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT); + struct fpm_lsp_arg fla; + + fla.fnc = fnc; + fla.ctx = dplane_ctx_alloc(); + fla.complete = true; + + hash_walk(zvrf->lsp_table, fpm_lsp_send_cb, &fla); + + dplane_ctx_fini(&fla.ctx); + + if (fla.complete) { + WALK_FINISH(fnc, FNE_LSP_FINISHED); + + /* Now move onto routes */ + thread_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, + &fnc->t_nhgreset); + } else { + /* Didn't finish - reschedule LSP walk */ + thread_add_timer(zrouter.master, fpm_lsp_send, fnc, 0, + &fnc->t_lspwalk); + } +} + +/* + * Next hop walk/send functions. + */ +struct fpm_nhg_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_nhg_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + struct fpm_nhg_arg *fna = arg; + + /* This entry was already sent, skip it. */ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_FPM)) + return HASHWALK_CONTINUE; + + /* Reset ctx to reuse allocated memory, take a snapshot and send it. */ + dplane_ctx_reset(fna->ctx); + dplane_ctx_nexthop_init(fna->ctx, DPLANE_OP_NH_INSTALL, nhe); + if (fpm_nl_enqueue(fna->fnc, fna->ctx) == -1) { + /* Our buffers are full, lets give it some cycles. */ + fna->complete = false; + return HASHWALK_ABORT; + } + + /* Mark group as sent, so it doesn't get sent again. */ + SET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); + + return HASHWALK_CONTINUE; +} + +static void fpm_nhg_send(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct fpm_nhg_arg fna; + + fna.fnc = fnc; + fna.ctx = dplane_ctx_alloc(); + fna.complete = true; + + /* Send next hops. */ + if (fnc->use_nhg) + hash_walk(zrouter.nhgs_id, fpm_nhg_send_cb, &fna); + + /* `free()` allocated memory. */ + dplane_ctx_fini(&fna.ctx); + + /* We are done sending next hops, lets install the routes now. */ + if (fna.complete) { + WALK_FINISH(fnc, FNE_NHG_FINISHED); + thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, + &fnc->t_ribreset); + } else /* Otherwise reschedule next hop group again. */ + thread_add_timer(zrouter.master, fpm_nhg_send, fnc, 0, + &fnc->t_nhgwalk); +} + +/** + * Send all RIB installed routes to the connected data plane. + */ +static void fpm_rib_send(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + struct zebra_dplane_ctx *ctx; + rib_tables_iter_t rt_iter; + + /* Allocate temporary context for all transactions. */ + ctx = dplane_ctx_alloc(); + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL || dest->selected_fib == NULL) + continue; + + /* Check for already sent routes. */ + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) + continue; + + /* Enqueue route install. */ + dplane_ctx_reset(ctx); + dplane_ctx_route_init(ctx, DPLANE_OP_ROUTE_INSTALL, rn, + dest->selected_fib); + if (fpm_nl_enqueue(fnc, ctx) == -1) { + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + thread_add_timer(zrouter.master, fpm_rib_send, + fnc, 1, &fnc->t_ribwalk); + return; + } + + /* Mark as sent. */ + SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + /* All RIB routes sent! */ + WALK_FINISH(fnc, FNE_RIB_FINISHED); + + /* Schedule next event: RMAC reset. */ + thread_add_event(zrouter.master, fpm_rmac_reset, fnc, 0, + &fnc->t_rmacreset); +} + +/* + * The next three functions will handle RMAC enqueue. + */ +struct fpm_rmac_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + struct zebra_l3vni *zl3vni; + bool complete; +}; + +static void fpm_enqueue_rmac_table(struct hash_bucket *bucket, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + struct zebra_mac *zrmac = bucket->data; + struct zebra_if *zif = fra->zl3vni->vxlan_if->info; + const struct zebra_l2info_vxlan *vxl = &zif->l2info.vxl; + struct zebra_if *br_zif; + vlanid_t vid; + bool sticky; + + /* Entry already sent. */ + if (CHECK_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT) || !fra->complete) + return; + + sticky = !!CHECK_FLAG(zrmac->flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); + br_zif = (struct zebra_if *)(zif->brslave_info.br_if->info); + vid = IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) ? vxl->access_vlan : 0; + + dplane_ctx_reset(fra->ctx); + dplane_ctx_set_op(fra->ctx, DPLANE_OP_MAC_INSTALL); + dplane_mac_init(fra->ctx, fra->zl3vni->vxlan_if, + zif->brslave_info.br_if, vid, + &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, sticky, + 0 /*nhg*/, 0 /*update_flags*/); + if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) { + thread_add_timer(zrouter.master, fpm_rmac_send, + fra->fnc, 1, &fra->fnc->t_rmacwalk); + fra->complete = false; + } +} + +static void fpm_enqueue_l3vni_table(struct hash_bucket *bucket, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + struct zebra_l3vni *zl3vni = bucket->data; + + fra->zl3vni = zl3vni; + hash_iterate(zl3vni->rmac_table, fpm_enqueue_rmac_table, zl3vni); +} + +static void fpm_rmac_send(struct thread *t) +{ + struct fpm_rmac_arg fra; + + fra.fnc = THREAD_ARG(t); + fra.ctx = dplane_ctx_alloc(); + fra.complete = true; + hash_iterate(zrouter.l3vni_table, fpm_enqueue_l3vni_table, &fra); + dplane_ctx_fini(&fra.ctx); + + /* RMAC walk completed. */ + if (fra.complete) + WALK_FINISH(fra.fnc, FNE_RMAC_FINISHED); +} + +/* + * Resets the next hop FPM flags so we send all next hops again. + */ +static void fpm_nhg_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + + /* Unset FPM installation flag so it gets installed again. */ + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); +} + +static void fpm_nhg_reset(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + + hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL); + + /* Schedule next step: send next hop groups. */ + thread_add_event(zrouter.master, fpm_nhg_send, fnc, 0, &fnc->t_nhgwalk); +} + +/* + * Resets the LSP FPM flag so we send all LSPs again. + */ +static void fpm_lsp_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_lsp *lsp = bucket->data; + + UNSET_FLAG(lsp->flags, LSP_FLAG_FPM); +} + +static void fpm_lsp_reset(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT); + + hash_iterate(zvrf->lsp_table, fpm_lsp_reset_cb, NULL); + + /* Schedule next step: send LSPs */ + thread_add_event(zrouter.master, fpm_lsp_send, fnc, 0, &fnc->t_lspwalk); +} + +/** + * Resets the RIB FPM flags so we send all routes again. + */ +static void fpm_rib_reset(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + rib_tables_iter_t rt_iter; + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL) + continue; + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Schedule next step: send RIB routes. */ + thread_add_event(zrouter.master, fpm_rib_send, fnc, 0, &fnc->t_ribwalk); +} + +/* + * The next three function will handle RMAC table reset. + */ +static void fpm_unset_rmac_table(struct hash_bucket *bucket, void *arg) +{ + struct zebra_mac *zrmac = bucket->data; + + UNSET_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT); +} + +static void fpm_unset_l3vni_table(struct hash_bucket *bucket, void *arg) +{ + struct zebra_l3vni *zl3vni = bucket->data; + + hash_iterate(zl3vni->rmac_table, fpm_unset_rmac_table, zl3vni); +} + +static void fpm_rmac_reset(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + + hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL); + + /* Schedule next event: send RMAC entries. */ + thread_add_event(zrouter.master, fpm_rmac_send, fnc, 0, + &fnc->t_rmacwalk); +} + +static void fpm_process_queue(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct zebra_dplane_ctx *ctx; + bool no_bufs = false; + uint64_t processed_contexts = 0; + + while (true) { + /* No space available yet. */ + if (STREAM_WRITEABLE(fnc->obuf) < NL_PKT_BUF_SIZE) { + no_bufs = true; + break; + } + + /* Dequeue next item or quit processing. */ + frr_with_mutex (&fnc->ctxqueue_mutex) { + ctx = dplane_ctx_dequeue(&fnc->ctxqueue); + } + if (ctx == NULL) + break; + + /* + * Intentionally ignoring the return value + * as that we are ensuring that we can write to + * the output data in the STREAM_WRITEABLE + * check above, so we can ignore the return + */ + if (fnc->socket != -1) + (void)fpm_nl_enqueue(fnc, ctx); + + /* Account the processed entries. */ + processed_contexts++; + atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1, + memory_order_relaxed); + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(fnc->prov, ctx); + } + + /* Update count of processed contexts */ + atomic_fetch_add_explicit(&fnc->counters.dplane_contexts, + processed_contexts, memory_order_relaxed); + + /* Re-schedule if we ran out of buffer space */ + if (no_bufs) + thread_add_timer(fnc->fthread->master, fpm_process_queue, + fnc, 0, &fnc->t_dequeue); + + /* + * Let the dataplane thread know if there are items in the + * output queue to be processed. Otherwise they may sit + * until the dataplane thread gets scheduled for new, + * unrelated work. + */ + if (dplane_provider_out_ctx_queue_len(fnc->prov) > 0) + dplane_provider_work_ready(); +} + +/** + * Handles external (e.g. CLI, data plane or others) events. + */ +static void fpm_process_event(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + enum fpm_nl_events event = THREAD_VAL(t); + + switch (event) { + case FNE_DISABLE: + zlog_info("%s: manual FPM disable event", __func__); + fnc->disabled = true; + atomic_fetch_add_explicit(&fnc->counters.user_disables, 1, + memory_order_relaxed); + + /* Call reconnect to disable timers and clean up context. */ + fpm_reconnect(fnc); + break; + + case FNE_RECONNECT: + zlog_info("%s: manual FPM reconnect event", __func__); + fnc->disabled = false; + atomic_fetch_add_explicit(&fnc->counters.user_configures, 1, + memory_order_relaxed); + fpm_reconnect(fnc); + break; + + case FNE_RESET_COUNTERS: + zlog_info("%s: manual FPM counters reset event", __func__); + memset(&fnc->counters, 0, sizeof(fnc->counters)); + break; + + case FNE_TOGGLE_NHG: + zlog_info("%s: toggle next hop groups support", __func__); + fnc->use_nhg = !fnc->use_nhg; + fpm_reconnect(fnc); + break; + + case FNE_INTERNAL_RECONNECT: + fpm_reconnect(fnc); + break; + + case FNE_NHG_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: next hop groups walk finished", + __func__); + break; + case FNE_RIB_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: RIB walk finished", __func__); + break; + case FNE_RMAC_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: RMAC walk finished", __func__); + break; + case FNE_LSP_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: LSP walk finished", __func__); + break; + } +} + +/* + * Data plane functions. + */ +static int fpm_nl_start(struct zebra_dplane_provider *prov) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + fnc->fthread = frr_pthread_new(NULL, prov_name, prov_name); + assert(frr_pthread_run(fnc->fthread, NULL) == 0); + fnc->ibuf = stream_new(NL_PKT_BUF_SIZE); + fnc->obuf = stream_new(NL_PKT_BUF_SIZE * 128); + pthread_mutex_init(&fnc->obuf_mutex, NULL); + fnc->socket = -1; + fnc->disabled = true; + fnc->prov = prov; + TAILQ_INIT(&fnc->ctxqueue); + pthread_mutex_init(&fnc->ctxqueue_mutex, NULL); + + /* Set default values. */ + fnc->use_nhg = true; + + return 0; +} + +static int fpm_nl_finish_early(struct fpm_nl_ctx *fnc) +{ + /* Disable all events and close socket. */ + THREAD_OFF(fnc->t_lspreset); + THREAD_OFF(fnc->t_lspwalk); + THREAD_OFF(fnc->t_nhgreset); + THREAD_OFF(fnc->t_nhgwalk); + THREAD_OFF(fnc->t_ribreset); + THREAD_OFF(fnc->t_ribwalk); + THREAD_OFF(fnc->t_rmacreset); + THREAD_OFF(fnc->t_rmacwalk); + THREAD_OFF(fnc->t_event); + THREAD_OFF(fnc->t_nhg); + thread_cancel_async(fnc->fthread->master, &fnc->t_read, NULL); + thread_cancel_async(fnc->fthread->master, &fnc->t_write, NULL); + thread_cancel_async(fnc->fthread->master, &fnc->t_connect, NULL); + + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + return 0; +} + +static int fpm_nl_finish_late(struct fpm_nl_ctx *fnc) +{ + /* Stop the running thread. */ + frr_pthread_stop(fnc->fthread, NULL); + + /* Free all allocated resources. */ + pthread_mutex_destroy(&fnc->obuf_mutex); + pthread_mutex_destroy(&fnc->ctxqueue_mutex); + stream_free(fnc->ibuf); + stream_free(fnc->obuf); + free(gfnc); + gfnc = NULL; + + return 0; +} + +static int fpm_nl_finish(struct zebra_dplane_provider *prov, bool early) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + if (early) + return fpm_nl_finish_early(fnc); + + return fpm_nl_finish_late(fnc); +} + +static int fpm_nl_process(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + int counter, limit; + uint64_t cur_queue, peak_queue = 0, stored_peak_queue; + + fnc = dplane_provider_get_data(prov); + limit = dplane_provider_get_work_limit(prov); + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + + /* + * Skip all notifications if not connected, we'll walk the RIB + * anyway. + */ + if (fnc->socket != -1 && fnc->connecting == false) { + /* + * Update the number of queued contexts *before* + * enqueueing, to ensure counter consistency. + */ + atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len, + 1, memory_order_relaxed); + + frr_with_mutex (&fnc->ctxqueue_mutex) { + dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); + } + + cur_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len, + memory_order_relaxed); + if (peak_queue < cur_queue) + peak_queue = cur_queue; + continue; + } + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + /* Update peak queue length, if we just observed a new peak */ + stored_peak_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len_peak, memory_order_relaxed); + if (stored_peak_queue < peak_queue) + atomic_store_explicit(&fnc->counters.ctxqueue_len_peak, + peak_queue, memory_order_relaxed); + + if (atomic_load_explicit(&fnc->counters.ctxqueue_len, + memory_order_relaxed) + > 0) + thread_add_timer(fnc->fthread->master, fpm_process_queue, + fnc, 0, &fnc->t_dequeue); + + /* Ensure dataplane thread is rescheduled if we hit the work limit */ + if (counter >= limit) + dplane_provider_work_ready(); + + return 0; +} + +static int fpm_nl_new(struct thread_master *tm) +{ + struct zebra_dplane_provider *prov = NULL; + int rv; + + gfnc = calloc(1, sizeof(*gfnc)); + rv = dplane_provider_register(prov_name, DPLANE_PRIO_POSTPROCESS, + DPLANE_PROV_FLAG_THREADED, fpm_nl_start, + fpm_nl_process, fpm_nl_finish, gfnc, + &prov); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s register status: %d", prov_name, rv); + + install_node(&fpm_node); + install_element(ENABLE_NODE, &fpm_show_counters_cmd); + install_element(ENABLE_NODE, &fpm_show_counters_json_cmd); + install_element(ENABLE_NODE, &fpm_reset_counters_cmd); + install_element(CONFIG_NODE, &fpm_set_address_cmd); + install_element(CONFIG_NODE, &no_fpm_set_address_cmd); + install_element(CONFIG_NODE, &fpm_use_nhg_cmd); + install_element(CONFIG_NODE, &no_fpm_use_nhg_cmd); + + return 0; +} + +static int fpm_nl_init(void) +{ + hook_register(frr_late_init, fpm_nl_new); + return 0; +} + +FRR_MODULE_SETUP( + .name = "dplane_fpm_nl", + .version = "0.0.1", + .description = "Data plane plugin for FPM using netlink.", + .init = fpm_nl_init, +); diff --git a/zebra/if_ioctl.c b/zebra/if_ioctl.c new file mode 100644 index 0000000..e02f3d5 --- /dev/null +++ b/zebra/if_ioctl.c @@ -0,0 +1,315 @@ +/* + * Interface looking up by ioctl (). + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef OPEN_BSD + +#include "if.h" +#include "sockunion.h" +#include "prefix.h" +#include "ioctl.h" +#include "connected.h" +#include "memory.h" +#include "log.h" +#include "vrf.h" +#include "vty.h" +#include "lib_errors.h" + +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zebra_errors.h" + +#include <ifaddrs.h> + +/* Interface looking up using infamous SIOCGIFCONF. */ +static int interface_list_ioctl(void) +{ + int ret; + int sock; +#define IFNUM_BASE 32 + int ifnum; + struct ifreq *ifreq; + struct ifconf ifconf; + struct interface *ifp; + int n; + int lastlen; + + /* Normally SIOCGIFCONF works with AF_INET socket. */ + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't make AF_INET socket stream: %s", + safe_strerror(errno)); + return -1; + } + +/* Set initial ifreq count. This will be double when SIOCGIFCONF + fail. Solaris has SIOCGIFNUM. */ +#ifdef SIOCGIFNUM + ret = ioctl(sock, SIOCGIFNUM, &ifnum); + if (ret < 0) + ifnum = IFNUM_BASE; + else + ifnum++; +#else + ifnum = IFNUM_BASE; +#endif /* SIOCGIFNUM */ + + ifconf.ifc_buf = NULL; + + lastlen = 0; + /* Loop until SIOCGIFCONF success. */ + for (;;) { + ifconf.ifc_len = sizeof(struct ifreq) * ifnum; + ifconf.ifc_buf = + XREALLOC(MTYPE_TMP, ifconf.ifc_buf, ifconf.ifc_len); + + ret = ioctl(sock, SIOCGIFCONF, &ifconf); + + if (ret < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "SIOCGIFCONF: %s", + safe_strerror(errno)); + goto end; + } + /* Repeatedly get info til buffer fails to grow. */ + if (ifconf.ifc_len > lastlen) { + lastlen = ifconf.ifc_len; + ifnum += 10; + continue; + } + /* Success. */ + break; + } + + /* Allocate interface. */ + ifreq = ifconf.ifc_req; + +#ifdef OPEN_BSD + for (n = 0; n < ifconf.ifc_len;) { + unsigned int size; + + ifreq = (struct ifreq *)((caddr_t)ifconf.ifc_req + n); + ifp = if_get_by_name(ifreq->ifr_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_add_update(ifp); + size = ifreq->ifr_addr.sa_len; + if (size < sizeof(ifreq->ifr_addr)) + size = sizeof(ifreq->ifr_addr); + size += sizeof(ifreq->ifr_name); + n += size; + } +#else + for (n = 0; n < ifconf.ifc_len; n += sizeof(struct ifreq)) { + ifp = if_get_by_name(ifreq->ifr_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_add_update(ifp); + ifreq++; + } +#endif /* OPEN_BSD */ + +end: + close(sock); + XFREE(MTYPE_TMP, ifconf.ifc_buf); + + return ret; +} + +/* Get interface's index by ioctl. */ +static int if_get_index(struct interface *ifp) +{ + if_set_index(ifp, if_nametoindex(ifp->name)); + return ifp->ifindex; +} + +#ifdef SIOCGIFHWADDR +static int if_get_hwaddr(struct interface *ifp) +{ + int ret; + struct ifreq ifreq; + int i; + + strlcpy(ifreq.ifr_name, ifp->name, sizeof(ifreq.ifr_name)); + ifreq.ifr_addr.sa_family = AF_INET; + + /* Fetch Hardware address if available. */ + ret = vrf_if_ioctl(SIOCGIFHWADDR, (caddr_t)&ifreq, ifp->vrf->vrf_id); + if (ret < 0) + ifp->hw_addr_len = 0; + else { + memcpy(ifp->hw_addr, ifreq.ifr_hwaddr.sa_data, 6); + + for (i = 0; i < 6; i++) + if (ifp->hw_addr[i] != 0) + break; + + if (i == 6) + ifp->hw_addr_len = 0; + else + ifp->hw_addr_len = 6; + } + return 0; +} +#endif /* SIOCGIFHWADDR */ + +static int if_getaddrs(void) +{ + int ret; + struct ifaddrs *ifap; + struct ifaddrs *ifapfree; + struct interface *ifp; + int prefixlen; + + ret = getifaddrs(&ifap); + if (ret != 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "getifaddrs(): %s", + safe_strerror(errno)); + return -1; + } + + for (ifapfree = ifap; ifap; ifap = ifap->ifa_next) { + if (ifap->ifa_addr == NULL) { + flog_err( + EC_LIB_INTERFACE, + "%s: nonsensical ifaddr with NULL ifa_addr, ifname %s", + __func__, + (ifap->ifa_name ? ifap->ifa_name : "(null)")); + continue; + } + + ifp = if_lookup_by_name(ifap->ifa_name, VRF_DEFAULT); + if (ifp == NULL) { + flog_err(EC_LIB_INTERFACE, + "%s: Can't lookup interface %s", __func__, + ifap->ifa_name); + continue; + } + + if (ifap->ifa_addr->sa_family == AF_INET) { + struct sockaddr_in *addr; + struct sockaddr_in *mask; + struct sockaddr_in *dest; + struct in_addr *dest_pnt; + int flags = 0; + + addr = (struct sockaddr_in *)ifap->ifa_addr; + mask = (struct sockaddr_in *)ifap->ifa_netmask; + prefixlen = ip_masklen(mask->sin_addr); + + dest_pnt = NULL; + + if (if_is_pointopoint(ifp) && ifap->ifa_dstaddr + && !IPV4_ADDR_SAME(&addr->sin_addr, + &((struct sockaddr_in *) + ifap->ifa_dstaddr) + ->sin_addr)) { + dest = (struct sockaddr_in *)ifap->ifa_dstaddr; + dest_pnt = &dest->sin_addr; + flags = ZEBRA_IFA_PEER; + } else if (ifap->ifa_broadaddr + && !IPV4_ADDR_SAME( + &addr->sin_addr, + &((struct sockaddr_in *) + ifap->ifa_broadaddr) + ->sin_addr)) { + dest = (struct sockaddr_in *) + ifap->ifa_broadaddr; + dest_pnt = &dest->sin_addr; + } + + connected_add_ipv4(ifp, flags, &addr->sin_addr, + prefixlen, dest_pnt, NULL, + METRIC_MAX); + } + if (ifap->ifa_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr; + struct sockaddr_in6 *mask; + int flags = 0; + + addr = (struct sockaddr_in6 *)ifap->ifa_addr; + mask = (struct sockaddr_in6 *)ifap->ifa_netmask; + prefixlen = ip6_masklen(mask->sin6_addr); + +#if defined(KAME) + if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { + addr->sin6_scope_id = + ntohs(*(uint16_t *)&addr->sin6_addr + .s6_addr[2]); + addr->sin6_addr.s6_addr[2] = + addr->sin6_addr.s6_addr[3] = 0; + } +#endif + + connected_add_ipv6(ifp, flags, &addr->sin6_addr, NULL, + prefixlen, NULL, METRIC_MAX); + } + } + + freeifaddrs(ifapfree); + + return 0; +} + +/* Fetch interface information via ioctl(). */ +static void interface_info_ioctl() +{ + struct vrf *vrf = vrf_lookup_by_id(VRF_DEFAULT); + struct interface *ifp; + + FOR_ALL_INTERFACES (vrf, ifp) { + if_get_index(ifp); +#ifdef SIOCGIFHWADDR + if_get_hwaddr(ifp); +#endif /* SIOCGIFHWADDR */ + if_get_flags(ifp); + if_get_mtu(ifp); + if_get_metric(ifp); + } +} + +/* Lookup all interface information. */ +void interface_list(struct zebra_ns *zns) +{ + + zlog_info("%s: NS %u", __func__, zns->ns_id); + +/* Linux can do both proc & ioctl, ioctl is the only way to get + interface aliases in 2.2 series kernels. */ +#ifdef HAVE_PROC_NET_DEV + interface_list_proc(); +#endif /* HAVE_PROC_NET_DEV */ + interface_list_ioctl(); + + /* After listing is done, get index, address, flags and other + interface's information. */ + interface_info_ioctl(); + + if_getaddrs(); + +#if defined(HAVE_PROC_NET_IF_INET6) + /* Linux provides interface's IPv6 address via + /proc/net/if_inet6. */ + ifaddr_proc_ipv6(); +#endif /* HAVE_PROC_NET_IF_INET6 */ +} + +#endif /* OPEN_BSD */ diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c new file mode 100644 index 0000000..a52bd34 --- /dev/null +++ b/zebra/if_netlink.c @@ -0,0 +1,2373 @@ +/* + * Interface looking up by netlink. + * Copyright (C) 1998 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef GNU_LINUX + +/* The following definition is to workaround an issue in the Linux kernel + * header files with redefinition of 'struct in6_addr' in both + * netinet/in.h and linux/in6.h. + * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html + */ +#define _LINUX_IN6_H +#define _LINUX_IF_H +#define _LINUX_IP_H + +#include <netinet/if_ether.h> +#include <linux/if_bridge.h> +#include <linux/if_link.h> +#include <linux/if_tunnel.h> +#include <net/if_arp.h> +#include <linux/sockios.h> +#include <linux/ethtool.h> + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "rib.h" +#include "thread.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "vrf_int.h" +#include "mpls.h" +#include "lib_errors.h" + +#include "vty.h" +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_mpls.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_l2.h" +#include "zebra/netconf_netlink.h" +#include "zebra/zebra_trace.h" + +extern struct zebra_privs_t zserv_privs; +uint8_t frr_protodown_r_bit = FRR_PROTODOWN_REASON_DEFAULT_BIT; + +/* Note: on netlink systems, there should be a 1-to-1 mapping between interface + names and ifindex values. */ +static void set_ifindex(struct interface *ifp, ifindex_t ifi_index, + struct zebra_ns *zns) +{ + struct interface *oifp; + + if (((oifp = if_lookup_by_index_per_ns(zns, ifi_index)) != NULL) + && (oifp != ifp)) { + if (ifi_index == IFINDEX_INTERNAL) + flog_err( + EC_LIB_INTERFACE, + "Netlink is setting interface %s ifindex to reserved internal value %u", + ifp->name, ifi_index); + else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "interface index %d was renamed from %s to %s", + ifi_index, oifp->name, ifp->name); + if (if_is_up(oifp)) + flog_err( + EC_LIB_INTERFACE, + "interface rename detected on up interface: index %d was renamed from %s to %s, results are uncertain!", + ifi_index, oifp->name, ifp->name); + if_delete_update(&oifp); + } + } + if_set_index(ifp, ifi_index); +} + +/* Utility function to parse hardware link-layer address and update ifp */ +static void netlink_interface_update_hw_addr(struct rtattr **tb, + struct interface *ifp) +{ + int i; + + if (tb[IFLA_ADDRESS]) { + int hw_addr_len; + + hw_addr_len = RTA_PAYLOAD(tb[IFLA_ADDRESS]); + + if (hw_addr_len > INTERFACE_HWADDR_MAX) + zlog_debug("Hardware address is too large: %d", + hw_addr_len); + else { + ifp->hw_addr_len = hw_addr_len; + memcpy(ifp->hw_addr, RTA_DATA(tb[IFLA_ADDRESS]), + hw_addr_len); + + for (i = 0; i < hw_addr_len; i++) + if (ifp->hw_addr[i] != 0) + break; + + if (i == hw_addr_len) + ifp->hw_addr_len = 0; + else + ifp->hw_addr_len = hw_addr_len; + } + } +} + +static enum zebra_link_type netlink_to_zebra_link_type(unsigned int hwt) +{ + switch (hwt) { + case ARPHRD_ETHER: + return ZEBRA_LLT_ETHER; + case ARPHRD_EETHER: + return ZEBRA_LLT_EETHER; + case ARPHRD_AX25: + return ZEBRA_LLT_AX25; + case ARPHRD_PRONET: + return ZEBRA_LLT_PRONET; + case ARPHRD_IEEE802: + return ZEBRA_LLT_IEEE802; + case ARPHRD_ARCNET: + return ZEBRA_LLT_ARCNET; + case ARPHRD_APPLETLK: + return ZEBRA_LLT_APPLETLK; + case ARPHRD_DLCI: + return ZEBRA_LLT_DLCI; + case ARPHRD_ATM: + return ZEBRA_LLT_ATM; + case ARPHRD_METRICOM: + return ZEBRA_LLT_METRICOM; + case ARPHRD_IEEE1394: + return ZEBRA_LLT_IEEE1394; + case ARPHRD_EUI64: + return ZEBRA_LLT_EUI64; + case ARPHRD_INFINIBAND: + return ZEBRA_LLT_INFINIBAND; + case ARPHRD_SLIP: + return ZEBRA_LLT_SLIP; + case ARPHRD_CSLIP: + return ZEBRA_LLT_CSLIP; + case ARPHRD_SLIP6: + return ZEBRA_LLT_SLIP6; + case ARPHRD_CSLIP6: + return ZEBRA_LLT_CSLIP6; + case ARPHRD_RSRVD: + return ZEBRA_LLT_RSRVD; + case ARPHRD_ADAPT: + return ZEBRA_LLT_ADAPT; + case ARPHRD_ROSE: + return ZEBRA_LLT_ROSE; + case ARPHRD_X25: + return ZEBRA_LLT_X25; + case ARPHRD_PPP: + return ZEBRA_LLT_PPP; + case ARPHRD_CISCO: + return ZEBRA_LLT_CHDLC; + case ARPHRD_LAPB: + return ZEBRA_LLT_LAPB; + case ARPHRD_RAWHDLC: + return ZEBRA_LLT_RAWHDLC; + case ARPHRD_TUNNEL: + return ZEBRA_LLT_IPIP; + case ARPHRD_TUNNEL6: + return ZEBRA_LLT_IPIP6; + case ARPHRD_FRAD: + return ZEBRA_LLT_FRAD; + case ARPHRD_SKIP: + return ZEBRA_LLT_SKIP; + case ARPHRD_LOOPBACK: + return ZEBRA_LLT_LOOPBACK; + case ARPHRD_LOCALTLK: + return ZEBRA_LLT_LOCALTLK; + case ARPHRD_FDDI: + return ZEBRA_LLT_FDDI; + case ARPHRD_SIT: + return ZEBRA_LLT_SIT; + case ARPHRD_IPDDP: + return ZEBRA_LLT_IPDDP; + case ARPHRD_IPGRE: + return ZEBRA_LLT_IPGRE; + case ARPHRD_PIMREG: + return ZEBRA_LLT_PIMREG; + case ARPHRD_HIPPI: + return ZEBRA_LLT_HIPPI; + case ARPHRD_ECONET: + return ZEBRA_LLT_ECONET; + case ARPHRD_IRDA: + return ZEBRA_LLT_IRDA; + case ARPHRD_FCPP: + return ZEBRA_LLT_FCPP; + case ARPHRD_FCAL: + return ZEBRA_LLT_FCAL; + case ARPHRD_FCPL: + return ZEBRA_LLT_FCPL; + case ARPHRD_FCFABRIC: + return ZEBRA_LLT_FCFABRIC; + case ARPHRD_IEEE802_TR: + return ZEBRA_LLT_IEEE802_TR; + case ARPHRD_IEEE80211: + return ZEBRA_LLT_IEEE80211; +#ifdef ARPHRD_IEEE802154 + case ARPHRD_IEEE802154: + return ZEBRA_LLT_IEEE802154; +#endif +#ifdef ARPHRD_IP6GRE + case ARPHRD_IP6GRE: + return ZEBRA_LLT_IP6GRE; +#endif +#ifdef ARPHRD_IEEE802154_PHY + case ARPHRD_IEEE802154_PHY: + return ZEBRA_LLT_IEEE802154_PHY; +#endif + + default: + return ZEBRA_LLT_UNKNOWN; + } +} + +static inline void zebra_if_set_ziftype(struct interface *ifp, + enum zebra_iftype zif_type, + enum zebra_slave_iftype zif_slave_type) +{ + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + zif->zif_slave_type = zif_slave_type; + + if (zif->zif_type != zif_type) { + zif->zif_type = zif_type; + /* If the if_type has been set to bond initialize ES info + * against it. XXX - note that we don't handle the case where + * a zif changes from bond to non-bond; it is really + * an unexpected/error condition. + */ + zebra_evpn_if_init(zif); + } +} + +static void netlink_determine_zebra_iftype(const char *kind, + enum zebra_iftype *zif_type) +{ + *zif_type = ZEBRA_IF_OTHER; + + if (!kind) + return; + + if (strcmp(kind, "vrf") == 0) + *zif_type = ZEBRA_IF_VRF; + else if (strcmp(kind, "bridge") == 0) + *zif_type = ZEBRA_IF_BRIDGE; + else if (strcmp(kind, "vlan") == 0) + *zif_type = ZEBRA_IF_VLAN; + else if (strcmp(kind, "vxlan") == 0) + *zif_type = ZEBRA_IF_VXLAN; + else if (strcmp(kind, "macvlan") == 0) + *zif_type = ZEBRA_IF_MACVLAN; + else if (strcmp(kind, "veth") == 0) + *zif_type = ZEBRA_IF_VETH; + else if (strcmp(kind, "bond") == 0) + *zif_type = ZEBRA_IF_BOND; + else if (strcmp(kind, "bond_slave") == 0) + *zif_type = ZEBRA_IF_BOND_SLAVE; + else if (strcmp(kind, "gre") == 0) + *zif_type = ZEBRA_IF_GRE; +} + +static void netlink_vrf_change(struct nlmsghdr *h, struct rtattr *tb, + uint32_t ns_id, const char *name) +{ + struct ifinfomsg *ifi; + struct rtattr *linkinfo[IFLA_INFO_MAX + 1]; + struct rtattr *attr[IFLA_VRF_MAX + 1]; + struct vrf *vrf = NULL; + struct zebra_vrf *zvrf; + uint32_t nl_table_id; + + ifi = NLMSG_DATA(h); + + netlink_parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb); + + if (!linkinfo[IFLA_INFO_DATA]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: IFLA_INFO_DATA missing from VRF message: %s", + __func__, name); + return; + } + + netlink_parse_rtattr_nested(attr, IFLA_VRF_MAX, + linkinfo[IFLA_INFO_DATA]); + if (!attr[IFLA_VRF_TABLE]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: IFLA_VRF_TABLE missing from VRF message: %s", + __func__, name); + return; + } + + nl_table_id = *(uint32_t *)RTA_DATA(attr[IFLA_VRF_TABLE]); + + if (h->nlmsg_type == RTM_NEWLINK) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("RTM_NEWLINK for VRF %s(%u) table %u", name, + ifi->ifi_index, nl_table_id); + + if (!vrf_lookup_by_id((vrf_id_t)ifi->ifi_index)) { + vrf_id_t exist_id; + + exist_id = vrf_lookup_by_table(nl_table_id, ns_id); + if (exist_id != VRF_DEFAULT) { + vrf = vrf_lookup_by_id(exist_id); + + flog_err( + EC_ZEBRA_VRF_MISCONFIGURED, + "VRF %s id %u table id overlaps existing vrf %s, misconfiguration exiting", + name, ifi->ifi_index, vrf->name); + exit(-1); + } + } + + vrf = vrf_update((vrf_id_t)ifi->ifi_index, name); + if (!vrf) { + flog_err(EC_LIB_INTERFACE, "VRF %s id %u not created", + name, ifi->ifi_index); + return; + } + + /* + * This is the only place that we get the actual kernel table_id + * being used. We need it to set the table_id of the routes + * we are passing to the kernel.... And to throw some totally + * awesome parties. that too. + * + * At this point we *must* have a zvrf because the vrf_create + * callback creates one. We *must* set the table id + * before the vrf_enable because of( at the very least ) + * static routes being delayed for installation until + * during the vrf_enable callbacks. + */ + zvrf = (struct zebra_vrf *)vrf->info; + zvrf->table_id = nl_table_id; + + /* Enable the created VRF. */ + if (!vrf_enable(vrf)) { + flog_err(EC_LIB_INTERFACE, + "Failed to enable VRF %s id %u", name, + ifi->ifi_index); + return; + } + + } else // h->nlmsg_type == RTM_DELLINK + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("RTM_DELLINK for VRF %s(%u)", name, + ifi->ifi_index); + + vrf = vrf_lookup_by_id((vrf_id_t)ifi->ifi_index); + + if (!vrf) { + flog_warn(EC_ZEBRA_VRF_NOT_FOUND, "%s: vrf not found", + __func__); + return; + } + + vrf_delete(vrf); + } +} + +static uint32_t get_iflink_speed(struct interface *interface, int *error) +{ + struct ifreq ifdata; + struct ethtool_cmd ecmd; + int sd; + int rc; + const char *ifname = interface->name; + + if (error) + *error = 0; + /* initialize struct */ + memset(&ifdata, 0, sizeof(ifdata)); + + /* set interface name */ + strlcpy(ifdata.ifr_name, ifname, sizeof(ifdata.ifr_name)); + + /* initialize ethtool interface */ + memset(&ecmd, 0, sizeof(ecmd)); + ecmd.cmd = ETHTOOL_GSET; /* ETHTOOL_GLINK */ + ifdata.ifr_data = (caddr_t)&ecmd; + + /* use ioctl to get speed of an interface */ + frr_with_privs(&zserv_privs) { + sd = vrf_socket(PF_INET, SOCK_DGRAM, IPPROTO_IP, + interface->vrf->vrf_id, NULL); + if (sd < 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Failure to read interface %s speed: %d %s", + ifname, errno, safe_strerror(errno)); + /* no vrf socket creation may probably mean vrf issue */ + if (error) + *error = -1; + return 0; + } + /* Get the current link state for the interface */ + rc = vrf_ioctl(interface->vrf->vrf_id, sd, SIOCETHTOOL, + (char *)&ifdata); + } + if (rc < 0) { + if (errno != EOPNOTSUPP && IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IOCTL failure to read interface %s speed: %d %s", + ifname, errno, safe_strerror(errno)); + /* no device means interface unreachable */ + if (errno == ENODEV && error) + *error = -1; + ecmd.speed_hi = 0; + ecmd.speed = 0; + } + + close(sd); + + return ((uint32_t)ecmd.speed_hi << 16) | ecmd.speed; +} + +uint32_t kernel_get_speed(struct interface *ifp, int *error) +{ + return get_iflink_speed(ifp, error); +} + +static ssize_t +netlink_gre_set_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifi; + char buf[]; + } *req = buf; + uint32_t link_idx; + unsigned int mtu; + struct rtattr *rta_info, *rta_data; + const struct zebra_l2info_gre *gre_info; + + if (buflen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_type = RTM_NEWLINK; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + + req->ifi.ifi_index = dplane_ctx_get_ifindex(ctx); + + gre_info = dplane_ctx_gre_get_info(ctx); + if (!gre_info) + return 0; + + req->ifi.ifi_change = 0xFFFFFFFF; + link_idx = dplane_ctx_gre_get_link_ifindex(ctx); + mtu = dplane_ctx_gre_get_mtu(ctx); + + if (mtu && !nl_attr_put32(&req->n, buflen, IFLA_MTU, mtu)) + return 0; + + rta_info = nl_attr_nest(&req->n, buflen, IFLA_LINKINFO); + if (!rta_info) + return 0; + + if (!nl_attr_put(&req->n, buflen, IFLA_INFO_KIND, "gre", 3)) + return 0; + + rta_data = nl_attr_nest(&req->n, buflen, IFLA_INFO_DATA); + if (!rta_data) + return 0; + + if (!nl_attr_put32(&req->n, buflen, IFLA_GRE_LINK, link_idx)) + return 0; + + if (gre_info->vtep_ip.s_addr && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_LOCAL, + gre_info->vtep_ip.s_addr)) + return 0; + + if (gre_info->vtep_ip_remote.s_addr && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_REMOTE, + gre_info->vtep_ip_remote.s_addr)) + return 0; + + if (gre_info->ikey && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_IKEY, + gre_info->ikey)) + return 0; + if (gre_info->okey && + !nl_attr_put32(&req->n, buflen, IFLA_GRE_IKEY, + gre_info->okey)) + return 0; + + nl_attr_nest_end(&req->n, rta_data); + nl_attr_nest_end(&req->n, rta_info); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static int netlink_extract_bridge_info(struct rtattr *link_data, + struct zebra_l2info_bridge *bridge_info) +{ + struct rtattr *attr[IFLA_BR_MAX + 1]; + + memset(bridge_info, 0, sizeof(*bridge_info)); + netlink_parse_rtattr_nested(attr, IFLA_BR_MAX, link_data); + if (attr[IFLA_BR_VLAN_FILTERING]) + bridge_info->vlan_aware = + *(uint8_t *)RTA_DATA(attr[IFLA_BR_VLAN_FILTERING]); + return 0; +} + +static int netlink_extract_vlan_info(struct rtattr *link_data, + struct zebra_l2info_vlan *vlan_info) +{ + struct rtattr *attr[IFLA_VLAN_MAX + 1]; + vlanid_t vid_in_msg; + + memset(vlan_info, 0, sizeof(*vlan_info)); + netlink_parse_rtattr_nested(attr, IFLA_VLAN_MAX, link_data); + if (!attr[IFLA_VLAN_ID]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_VLAN_ID missing from VLAN IF message"); + return -1; + } + + vid_in_msg = *(vlanid_t *)RTA_DATA(attr[IFLA_VLAN_ID]); + vlan_info->vid = vid_in_msg; + return 0; +} + +static int netlink_extract_gre_info(struct rtattr *link_data, + struct zebra_l2info_gre *gre_info) +{ + struct rtattr *attr[IFLA_GRE_MAX + 1]; + + memset(gre_info, 0, sizeof(*gre_info)); + memset(attr, 0, sizeof(attr)); + netlink_parse_rtattr_nested(attr, IFLA_GRE_MAX, link_data); + + if (!attr[IFLA_GRE_LOCAL]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_GRE_LOCAL missing from GRE IF message"); + } else + gre_info->vtep_ip = + *(struct in_addr *)RTA_DATA(attr[IFLA_GRE_LOCAL]); + if (!attr[IFLA_GRE_REMOTE]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_GRE_REMOTE missing from GRE IF message"); + } else + gre_info->vtep_ip_remote = + *(struct in_addr *)RTA_DATA(attr[IFLA_GRE_REMOTE]); + + if (!attr[IFLA_GRE_LINK]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_GRE_LINK missing from GRE IF message"); + } else { + gre_info->ifindex_link = + *(ifindex_t *)RTA_DATA(attr[IFLA_GRE_LINK]); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_GRE_LINK obtained is %u", + gre_info->ifindex_link); + } + if (attr[IFLA_GRE_IKEY]) + gre_info->ikey = *(uint32_t *)RTA_DATA(attr[IFLA_GRE_IKEY]); + if (attr[IFLA_GRE_OKEY]) + gre_info->okey = *(uint32_t *)RTA_DATA(attr[IFLA_GRE_OKEY]); + return 0; +} + +static int netlink_extract_vxlan_info(struct rtattr *link_data, + struct zebra_l2info_vxlan *vxl_info) +{ + struct rtattr *attr[IFLA_VXLAN_MAX + 1]; + vni_t vni_in_msg; + struct in_addr vtep_ip_in_msg; + ifindex_t ifindex_link; + + memset(vxl_info, 0, sizeof(*vxl_info)); + netlink_parse_rtattr_nested(attr, IFLA_VXLAN_MAX, link_data); + if (!attr[IFLA_VXLAN_ID]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_VXLAN_ID missing from VXLAN IF message"); + return -1; + } + + vni_in_msg = *(vni_t *)RTA_DATA(attr[IFLA_VXLAN_ID]); + vxl_info->vni = vni_in_msg; + if (!attr[IFLA_VXLAN_LOCAL]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "IFLA_VXLAN_LOCAL missing from VXLAN IF message"); + } else { + vtep_ip_in_msg = + *(struct in_addr *)RTA_DATA(attr[IFLA_VXLAN_LOCAL]); + vxl_info->vtep_ip = vtep_ip_in_msg; + } + + if (attr[IFLA_VXLAN_GROUP]) { + vxl_info->mcast_grp = + *(struct in_addr *)RTA_DATA(attr[IFLA_VXLAN_GROUP]); + } + + if (!attr[IFLA_VXLAN_LINK]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_VXLAN_LINK missing from VXLAN IF message"); + } else { + ifindex_link = + *(ifindex_t *)RTA_DATA(attr[IFLA_VXLAN_LINK]); + vxl_info->ifindex_link = ifindex_link; + } + return 0; +} + +/* + * Extract and save L2 params (of interest) for an interface. When a + * bridge interface is added or updated, take further actions to map + * its members. Likewise, for VxLAN interface. + */ +static void netlink_interface_update_l2info(struct interface *ifp, + struct rtattr *link_data, int add, + ns_id_t link_nsid) +{ + if (!link_data) + return; + + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + struct zebra_l2info_bridge bridge_info; + + netlink_extract_bridge_info(link_data, &bridge_info); + zebra_l2_bridge_add_update(ifp, &bridge_info, add); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + struct zebra_l2info_vlan vlan_info; + + netlink_extract_vlan_info(link_data, &vlan_info); + zebra_l2_vlanif_update(ifp, &vlan_info); + zebra_evpn_acc_bd_svi_set(ifp->info, NULL, + !!if_is_operative(ifp)); + } else if (IS_ZEBRA_IF_VXLAN(ifp)) { + struct zebra_l2info_vxlan vxlan_info; + + netlink_extract_vxlan_info(link_data, &vxlan_info); + vxlan_info.link_nsid = link_nsid; + zebra_l2_vxlanif_add_update(ifp, &vxlan_info, add); + if (link_nsid != NS_UNKNOWN && + vxlan_info.ifindex_link) + zebra_if_update_link(ifp, vxlan_info.ifindex_link, + link_nsid); + } else if (IS_ZEBRA_IF_GRE(ifp)) { + struct zebra_l2info_gre gre_info; + + netlink_extract_gre_info(link_data, &gre_info); + gre_info.link_nsid = link_nsid; + zebra_l2_greif_add_update(ifp, &gre_info, add); + if (link_nsid != NS_UNKNOWN && + gre_info.ifindex_link) + zebra_if_update_link(ifp, gre_info.ifindex_link, + link_nsid); + } +} + +static int netlink_bridge_vxlan_update(struct interface *ifp, + struct rtattr *af_spec) +{ + struct rtattr *aftb[IFLA_BRIDGE_MAX + 1]; + struct bridge_vlan_info *vinfo; + vlanid_t access_vlan; + + if (!af_spec) + return 0; + + /* There is a 1-to-1 mapping of VLAN to VxLAN - hence + * only 1 access VLAN is accepted. + */ + netlink_parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, af_spec); + if (!aftb[IFLA_BRIDGE_VLAN_INFO]) + return 0; + + vinfo = RTA_DATA(aftb[IFLA_BRIDGE_VLAN_INFO]); + if (!(vinfo->flags & BRIDGE_VLAN_INFO_PVID)) + return 0; + + access_vlan = (vlanid_t)vinfo->vid; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Access VLAN %u for VxLAN IF %s(%u)", access_vlan, + ifp->name, ifp->ifindex); + zebra_l2_vxlanif_update_access_vlan(ifp, access_vlan); + return 0; +} + +static void netlink_bridge_vlan_update(struct interface *ifp, + struct rtattr *af_spec) +{ + struct rtattr *i; + int rem; + uint16_t vid_range_start = 0; + struct zebra_if *zif; + bitfield_t old_vlan_bitmap; + struct bridge_vlan_info *vinfo; + + zif = (struct zebra_if *)ifp->info; + + /* cache the old bitmap addrs */ + old_vlan_bitmap = zif->vlan_bitmap; + /* create a new bitmap space for re-eval */ + bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX); + + if (af_spec) { + for (i = RTA_DATA(af_spec), rem = RTA_PAYLOAD(af_spec); + RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + + if (i->rta_type != IFLA_BRIDGE_VLAN_INFO) + continue; + + vinfo = RTA_DATA(i); + + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + vid_range_start = vinfo->vid; + continue; + } + + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + vid_range_start = vinfo->vid; + + zebra_vlan_bitmap_compute(ifp, vid_range_start, + vinfo->vid); + } + } + + zebra_vlan_mbr_re_eval(ifp, old_vlan_bitmap); + + bf_free(old_vlan_bitmap); +} + +static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id, + int startup) +{ + char *name = NULL; + struct ifinfomsg *ifi; + struct rtattr *tb[IFLA_MAX + 1]; + struct interface *ifp; + struct zebra_if *zif; + struct rtattr *af_spec; + + /* Fetch name and ifindex */ + ifi = NLMSG_DATA(h); + netlink_parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len); + + if (tb[IFLA_IFNAME] == NULL) + return -1; + name = (char *)RTA_DATA(tb[IFLA_IFNAME]); + + /* The interface should already be known, if not discard. */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), ifi->ifi_index); + if (!ifp) { + zlog_debug("Cannot find bridge IF %s(%u)", name, + ifi->ifi_index); + return 0; + } + + /* We are only interested in the access VLAN i.e., AF_SPEC */ + af_spec = tb[IFLA_AF_SPEC]; + + if (IS_ZEBRA_IF_VXLAN(ifp)) + return netlink_bridge_vxlan_update(ifp, af_spec); + + /* build vlan bitmap associated with this interface if that + * device type is interested in the vlans + */ + zif = (struct zebra_if *)ifp->info; + if (bf_is_inited(zif->vlan_bitmap)) + netlink_bridge_vlan_update(ifp, af_spec); + + return 0; +} + +static bool is_if_protodown_reason_only_frr(uint32_t rc_bitfield) +{ + /* This shouldn't be possible */ + assert(frr_protodown_r_bit < 32); + return (rc_bitfield == (((uint32_t)1) << frr_protodown_r_bit)); +} + +/* + * Process interface protodown dplane update. + * + * If the interface is an es bond member then it must follow EVPN's + * protodown setting. + */ +static void netlink_proc_dplane_if_protodown(struct zebra_if *zif, + struct rtattr **tb) +{ + bool protodown; + bool old_protodown; + uint32_t rc_bitfield = 0; + struct rtattr *pd_reason_info[IFLA_MAX + 1]; + + protodown = !!*(uint8_t *)RTA_DATA(tb[IFLA_PROTO_DOWN]); + + if (tb[IFLA_PROTO_DOWN_REASON]) { + netlink_parse_rtattr_nested(pd_reason_info, IFLA_INFO_MAX, + tb[IFLA_PROTO_DOWN_REASON]); + + if (pd_reason_info[IFLA_PROTO_DOWN_REASON_VALUE]) + rc_bitfield = *(uint32_t *)RTA_DATA( + pd_reason_info[IFLA_PROTO_DOWN_REASON_VALUE]); + } + + /* + * Set our reason code to note it wasn't us. + * If the reason we got from the kernel is ONLY frr though, don't + * set it. + */ + COND_FLAG(zif->protodown_rc, ZEBRA_PROTODOWN_EXTERNAL, + protodown && rc_bitfield && + !is_if_protodown_reason_only_frr(rc_bitfield)); + + + old_protodown = !!ZEBRA_IF_IS_PROTODOWN(zif); + if (protodown == old_protodown) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s dplane change, protdown %s", + zif->ifp->name, protodown ? "on" : "off"); + + /* Set protodown, respectively */ + COND_FLAG(zif->flags, ZIF_FLAG_PROTODOWN, protodown); + + if (zebra_evpn_is_es_bond_member(zif->ifp)) { + /* Check it's not already being sent to the dplane first */ + if (protodown && + CHECK_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "bond mbr %s protodown on recv'd but already sent protodown on to the dplane", + zif->ifp->name); + return; + } + + if (!protodown && + CHECK_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "bond mbr %s protodown off recv'd but already sent protodown off to the dplane", + zif->ifp->name); + return; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "bond mbr %s reinstate protodown %s in the dplane", + zif->ifp->name, old_protodown ? "on" : "off"); + + if (old_protodown) + SET_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + else + SET_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); + + dplane_intf_update(zif->ifp); + } +} + +static uint8_t netlink_parse_lacp_bypass(struct rtattr **linkinfo) +{ + uint8_t bypass = 0; + struct rtattr *mbrinfo[IFLA_BOND_SLAVE_MAX + 1]; + + netlink_parse_rtattr_nested(mbrinfo, IFLA_BOND_SLAVE_MAX, + linkinfo[IFLA_INFO_SLAVE_DATA]); + if (mbrinfo[IFLA_BOND_SLAVE_AD_RX_BYPASS]) + bypass = *(uint8_t *)RTA_DATA( + mbrinfo[IFLA_BOND_SLAVE_AD_RX_BYPASS]); + + return bypass; +} + +/* + * Only called at startup to cleanup leftover protodown reasons we may + * have not cleaned up. We leave protodown set though. + */ +static void if_sweep_protodown(struct zebra_if *zif) +{ + bool protodown; + + protodown = !!ZEBRA_IF_IS_PROTODOWN(zif); + + if (!protodown) + return; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s sweeping protodown %s reason 0x%x", + zif->ifp->name, protodown ? "on" : "off", + zif->protodown_rc); + + /* Only clear our reason codes, leave external if it was set */ + UNSET_FLAG(zif->protodown_rc, ZEBRA_PROTODOWN_ALL); + dplane_intf_update(zif->ifp); +} + +/* + * Called from interface_lookup_netlink(). This function is only used + * during bootstrap. + */ +static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ifinfomsg *ifi; + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_MAX + 1]; + struct interface *ifp; + char *name = NULL; + char *kind = NULL; + char *desc = NULL; + char *slave_kind = NULL; + struct zebra_ns *zns = NULL; + vrf_id_t vrf_id = VRF_DEFAULT; + enum zebra_iftype zif_type = ZEBRA_IF_OTHER; + enum zebra_slave_iftype zif_slave_type = ZEBRA_IF_SLAVE_NONE; + ifindex_t bridge_ifindex = IFINDEX_INTERNAL; + ifindex_t link_ifindex = IFINDEX_INTERNAL; + ifindex_t bond_ifindex = IFINDEX_INTERNAL; + struct zebra_if *zif; + ns_id_t link_nsid = ns_id; + uint8_t bypass = 0; + + frrtrace(3, frr_zebra, netlink_interface, h, ns_id, startup); + + zns = zebra_ns_lookup(ns_id); + ifi = NLMSG_DATA(h); + + if (h->nlmsg_type != RTM_NEWLINK) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifinfomsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ifinfomsg))); + return -1; + } + + /* We are interested in some AF_BRIDGE notifications. */ + if (ifi->ifi_family == AF_BRIDGE) + return netlink_bridge_interface(h, len, ns_id, startup); + + /* Looking up interface name. */ + memset(linkinfo, 0, sizeof(linkinfo)); + netlink_parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, + NLA_F_NESTED); + + /* check for wireless messages to ignore */ + if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ignoring IFLA_WIRELESS message", + __func__); + return 0; + } + + if (tb[IFLA_IFNAME] == NULL) + return -1; + name = (char *)RTA_DATA(tb[IFLA_IFNAME]); + + if (tb[IFLA_IFALIAS]) + desc = (char *)RTA_DATA(tb[IFLA_IFALIAS]); + + if (tb[IFLA_LINKINFO]) { + netlink_parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO]); + + if (linkinfo[IFLA_INFO_KIND]) + kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); + + if (linkinfo[IFLA_INFO_SLAVE_KIND]) + slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); + + if ((slave_kind != NULL) && strcmp(slave_kind, "bond") == 0) + netlink_determine_zebra_iftype("bond_slave", &zif_type); + else + netlink_determine_zebra_iftype(kind, &zif_type); + } + + /* If VRF, create the VRF structure itself. */ + if (zif_type == ZEBRA_IF_VRF && !vrf_is_backend_netns()) { + netlink_vrf_change(h, tb[IFLA_LINKINFO], ns_id, name); + vrf_id = (vrf_id_t)ifi->ifi_index; + } + + if (tb[IFLA_MASTER]) { + if (slave_kind && (strcmp(slave_kind, "vrf") == 0) + && !vrf_is_backend_netns()) { + zif_slave_type = ZEBRA_IF_SLAVE_VRF; + vrf_id = *(uint32_t *)RTA_DATA(tb[IFLA_MASTER]); + } else if (slave_kind && (strcmp(slave_kind, "bridge") == 0)) { + zif_slave_type = ZEBRA_IF_SLAVE_BRIDGE; + bridge_ifindex = + *(ifindex_t *)RTA_DATA(tb[IFLA_MASTER]); + } else if (slave_kind && (strcmp(slave_kind, "bond") == 0)) { + zif_slave_type = ZEBRA_IF_SLAVE_BOND; + bond_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_MASTER]); + bypass = netlink_parse_lacp_bypass(linkinfo); + } else + zif_slave_type = ZEBRA_IF_SLAVE_OTHER; + } + if (vrf_is_backend_netns()) + vrf_id = (vrf_id_t)ns_id; + + /* If linking to another interface, note it. */ + if (tb[IFLA_LINK]) + link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); + + if (tb[IFLA_LINK_NETNSID]) { + link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); + link_nsid = ns_id_get_absolute(ns_id, link_nsid); + } + + ifp = if_get_by_name(name, vrf_id, NULL); + set_ifindex(ifp, ifi->ifi_index, zns); /* add it to ns struct */ + + ifp->flags = ifi->ifi_flags & 0x0000fffff; + ifp->mtu6 = ifp->mtu = *(uint32_t *)RTA_DATA(tb[IFLA_MTU]); + ifp->metric = 0; + ifp->speed = get_iflink_speed(ifp, NULL); + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + + /* Set zebra interface type */ + zebra_if_set_ziftype(ifp, zif_type, zif_slave_type); + if (IS_ZEBRA_IF_VRF(ifp)) + SET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + + /* + * Just set the @link/lower-device ifindex. During nldump interfaces are + * not ordered in any fashion so we may end up getting upper devices + * before lower devices. We will setup the real linkage once the dump + * is complete. + */ + zif = (struct zebra_if *)ifp->info; + zif->link_ifindex = link_ifindex; + + if (desc) { + XFREE(MTYPE_TMP, zif->desc); + zif->desc = XSTRDUP(MTYPE_TMP, desc); + } + + /* Hardware type and address. */ + ifp->ll_type = netlink_to_zebra_link_type(ifi->ifi_type); + + netlink_interface_update_hw_addr(tb, ifp); + + if_add_update(ifp); + + /* Extract and save L2 interface information, take additional actions. + */ + netlink_interface_update_l2info(ifp, linkinfo[IFLA_INFO_DATA], + 1, link_nsid); + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, true); + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) + zebra_l2if_update_bridge_slave(ifp, bridge_ifindex, ns_id, + ZEBRA_BRIDGE_NO_ACTION); + else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, !!bypass); + + if (tb[IFLA_PROTO_DOWN]) { + netlink_proc_dplane_if_protodown(zif, tb); + if_sweep_protodown(zif); + } + + return 0; +} + +/* Request for specific interface or address information from the kernel */ +static int netlink_request_intf_addr(struct nlsock *netlink_cmd, int family, + int type, uint32_t filter_mask) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifm; + char buf[256]; + } req; + + frrtrace(4, frr_zebra, netlink_request_intf_addr, netlink_cmd, family, + type, filter_mask); + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.ifm.ifi_family = family; + + /* Include filter, if specified. */ + if (filter_mask) + nl_attr_put32(&req.n, sizeof(req), IFLA_EXT_MASK, filter_mask); + + return netlink_request(netlink_cmd, &req); +} + +enum netlink_msg_status +netlink_put_gre_set_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + + op = dplane_ctx_get_op(ctx); + assert(op == DPLANE_OP_GRE_SET); + + ret = netlink_batch_add_msg(bth, ctx, netlink_gre_set_msg_encoder, false); + + return ret; +} + +/* Interface lookup by netlink socket. */ +int interface_lookup_netlink(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + struct nlsock *netlink_cmd = &zns->netlink_cmd; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get interface information. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_PACKET, RTM_GETLINK, 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_interface, netlink_cmd, &dp_info, 0, + true); + if (ret < 0) + return ret; + + /* Get interface information - for bridge interfaces. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_BRIDGE, RTM_GETLINK, + RTEXT_FILTER_BRVLAN); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_interface, netlink_cmd, &dp_info, 0, + true); + if (ret < 0) + return ret; + + /* + * So netlink_tunneldump_read will initiate a request + * per tunnel to get data. If we are on a kernel that + * does not support this then we will get X error messages + * (one per tunnel request )back which netlink_parse_info will + * stop after the first one. So we need to read equivalent + * error messages per tunnel then we can continue. + * if we do not gather all the read failures then + * later requests will not work right. + */ + ret = netlink_tunneldump_read(zns); + if (ret < 0) + return ret; + + /* fixup linkages */ + zebra_if_update_all_links(zns); + return 0; +} + +/** + * interface_addr_lookup_netlink() - Look up interface addresses + * + * @zns: Zebra netlink socket + * Return: Result status + */ +static int interface_addr_lookup_netlink(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + struct nlsock *netlink_cmd = &zns->netlink_cmd; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get IPv4 address of the interfaces. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_INET, RTM_GETADDR, 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_interface_addr, netlink_cmd, &dp_info, + 0, true); + if (ret < 0) + return ret; + + /* Get IPv6 address of the interfaces. */ + ret = netlink_request_intf_addr(netlink_cmd, AF_INET6, RTM_GETADDR, 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_interface_addr, netlink_cmd, &dp_info, + 0, true); + if (ret < 0) + return ret; + + return 0; +} + +int kernel_interface_set_master(struct interface *master, + struct interface *slave) +{ + struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); + + struct { + struct nlmsghdr n; + struct ifinfomsg ifa; + char buf[NL_PKT_BUF_SIZE]; + } req; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_SETLINK; + req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.ifa.ifi_index = slave->ifindex; + + nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master->ifindex); + nl_attr_put32(&req.n, sizeof(req), IFLA_LINK, slave->ifindex); + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +/* Interface address modification. */ +static ssize_t netlink_address_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + int bytelen; + const struct prefix *p; + int cmd; + const char *label; + + struct { + struct nlmsghdr n; + struct ifaddrmsg ifa; + char buf[0]; + } *req = buf; + + if (buflen < sizeof(*req)) + return 0; + + p = dplane_ctx_get_intf_addr(ctx); + memset(req, 0, sizeof(*req)); + + bytelen = (p->family == AF_INET ? 4 : 16); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) + cmd = RTM_NEWADDR; + else + cmd = RTM_DELADDR; + + req->n.nlmsg_type = cmd; + req->ifa.ifa_family = p->family; + + req->ifa.ifa_index = dplane_ctx_get_ifindex(ctx); + + if (!nl_attr_put(&req->n, buflen, IFA_LOCAL, &p->u.prefix, bytelen)) + return 0; + + if (p->family == AF_INET) { + if (dplane_ctx_intf_is_connected(ctx)) { + p = dplane_ctx_get_intf_dest(ctx); + if (!nl_attr_put(&req->n, buflen, IFA_ADDRESS, + &p->u.prefix, bytelen)) + return 0; + } else if (cmd == RTM_NEWADDR) { + struct in_addr broad = { + .s_addr = ipv4_broadcast_addr(p->u.prefix4.s_addr, + p->prefixlen) + }; + if (!nl_attr_put(&req->n, buflen, IFA_BROADCAST, &broad, + bytelen)) + return 0; + } + } + + /* p is now either address or destination/bcast addr */ + req->ifa.ifa_prefixlen = p->prefixlen; + + if (dplane_ctx_intf_is_secondary(ctx)) + SET_FLAG(req->ifa.ifa_flags, IFA_F_SECONDARY); + + if (dplane_ctx_intf_has_label(ctx)) { + label = dplane_ctx_get_intf_label(ctx); + if (!nl_attr_put(&req->n, buflen, IFA_LABEL, label, + strlen(label) + 1)) + return 0; + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +enum netlink_msg_status +netlink_put_address_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_address_msg_encoder, + false); +} + +static ssize_t netlink_intf_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + enum dplane_op_e op; + int cmd = 0; + + op = dplane_ctx_get_op(ctx); + + switch (op) { + case DPLANE_OP_INTF_UPDATE: + cmd = RTM_SETLINK; + break; + case DPLANE_OP_INTF_INSTALL: + cmd = RTM_NEWLINK; + break; + case DPLANE_OP_INTF_DELETE: + cmd = RTM_DELLINK; + break; + default: + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel interface update with incorrect OP code (%u)", + op); + return -1; + } + + return netlink_intf_msg_encode(cmd, ctx, buf, buflen); +} + +enum netlink_msg_status +netlink_put_intf_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_intf_msg_encoder, false); +} + +int netlink_interface_addr(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ifaddrmsg *ifa; + struct rtattr *tb[IFA_MAX + 1]; + struct interface *ifp; + void *addr; + void *broad; + uint8_t flags = 0; + char *label = NULL; + struct zebra_ns *zns; + uint32_t metric = METRIC_MAX; + uint32_t kernel_flags = 0; + + frrtrace(3, frr_zebra, netlink_interface_addr, h, ns_id, startup); + + zns = zebra_ns_lookup(ns_id); + ifa = NLMSG_DATA(h); + + if (ifa->ifa_family != AF_INET && ifa->ifa_family != AF_INET6) { + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel interface addr change: %s", + ifa->ifa_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ifaddrmsg))); + return -1; + } + + netlink_parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len); + + ifp = if_lookup_by_index_per_ns(zns, ifa->ifa_index); + if (ifp == NULL) { + if (startup) { + /* During startup, failure to lookup the referenced + * interface should not be an error, so we have + * downgraded this condition to warning, and we permit + * the startup interface state retrieval to continue. + */ + flog_warn(EC_LIB_INTERFACE, + "%s: can't find interface by index %d", + __func__, ifa->ifa_index); + return 0; + } else { + flog_err(EC_LIB_INTERFACE, + "%s: can't find interface by index %d", + __func__, ifa->ifa_index); + return -1; + } + } + + /* Flags passed through */ + if (tb[IFA_FLAGS]) + kernel_flags = *(int *)RTA_DATA(tb[IFA_FLAGS]); + else + kernel_flags = ifa->ifa_flags; + + if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */ + { + char buf[BUFSIZ]; + zlog_debug("%s %s %s flags 0x%x:", __func__, + nl_msg_type_to_str(h->nlmsg_type), ifp->name, + kernel_flags); + if (tb[IFA_LOCAL]) + zlog_debug(" IFA_LOCAL %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_LOCAL]), buf, + BUFSIZ), + ifa->ifa_prefixlen); + if (tb[IFA_ADDRESS]) + zlog_debug(" IFA_ADDRESS %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_ADDRESS]), buf, + BUFSIZ), + ifa->ifa_prefixlen); + if (tb[IFA_BROADCAST]) + zlog_debug(" IFA_BROADCAST %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_BROADCAST]), buf, + BUFSIZ), + ifa->ifa_prefixlen); + if (tb[IFA_LABEL] && strcmp(ifp->name, RTA_DATA(tb[IFA_LABEL]))) + zlog_debug(" IFA_LABEL %s", + (char *)RTA_DATA(tb[IFA_LABEL])); + + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci = RTA_DATA(tb[IFA_CACHEINFO]); + zlog_debug(" IFA_CACHEINFO pref %d, valid %d", + ci->ifa_prefered, ci->ifa_valid); + } + } + + /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */ + if (tb[IFA_LOCAL] == NULL) + tb[IFA_LOCAL] = tb[IFA_ADDRESS]; + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + + /* local interface address */ + addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL); + + /* is there a peer address? */ + if (tb[IFA_ADDRESS] + && memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), + RTA_PAYLOAD(tb[IFA_ADDRESS]))) { + broad = RTA_DATA(tb[IFA_ADDRESS]); + SET_FLAG(flags, ZEBRA_IFA_PEER); + } else + /* seeking a broadcast address */ + broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) + : NULL); + + /* addr is primary key, SOL if we don't have one */ + if (addr == NULL) { + zlog_debug("%s: Local Interface Address is NULL for %s", + __func__, ifp->name); + return -1; + } + + /* Flags. */ + if (kernel_flags & IFA_F_SECONDARY) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* Label */ + if (tb[IFA_LABEL]) + label = (char *)RTA_DATA(tb[IFA_LABEL]); + + if (label && strcmp(ifp->name, label) == 0) + label = NULL; + + if (tb[IFA_RT_PRIORITY]) + metric = *(uint32_t *)RTA_DATA(tb[IFA_RT_PRIORITY]); + + /* Register interface address to the interface. */ + if (ifa->ifa_family == AF_INET) { + if (ifa->ifa_prefixlen > IPV4_MAX_BITLEN) { + zlog_err( + "Invalid prefix length: %u received from kernel interface addr change: %s", + ifa->ifa_prefixlen, + nl_msg_type_to_str(h->nlmsg_type)); + return -1; + } + + if (h->nlmsg_type == RTM_NEWADDR) + connected_add_ipv4(ifp, flags, (struct in_addr *)addr, + ifa->ifa_prefixlen, + (struct in_addr *)broad, label, + metric); + else if (CHECK_FLAG(flags, ZEBRA_IFA_PEER)) { + /* Delete with a peer address */ + connected_delete_ipv4( + ifp, flags, (struct in_addr *)addr, + ifa->ifa_prefixlen, broad); + } else + connected_delete_ipv4( + ifp, flags, (struct in_addr *)addr, + ifa->ifa_prefixlen, NULL); + } + + if (ifa->ifa_family == AF_INET6) { + if (ifa->ifa_prefixlen > IPV6_MAX_BITLEN) { + zlog_err( + "Invalid prefix length: %u received from kernel interface addr change: %s", + ifa->ifa_prefixlen, + nl_msg_type_to_str(h->nlmsg_type)); + return -1; + } + if (h->nlmsg_type == RTM_NEWADDR) { + /* Only consider valid addresses; we'll not get a + * notification from + * the kernel till IPv6 DAD has completed, but at init + * time, Quagga + * does query for and will receive all addresses. + */ + if (!(kernel_flags + & (IFA_F_DADFAILED | IFA_F_TENTATIVE))) + connected_add_ipv6(ifp, flags, + (struct in6_addr *)addr, + (struct in6_addr *)broad, + ifa->ifa_prefixlen, label, + metric); + } else + connected_delete_ipv6(ifp, (struct in6_addr *)addr, + NULL, ifa->ifa_prefixlen); + } + + /* + * Linux kernel does not send route delete on interface down/addr del + * so we have to re-process routes it owns (i.e. kernel routes) + */ + if (h->nlmsg_type != RTM_NEWADDR) + rib_update(RIB_UPDATE_KERNEL); + + return 0; +} + +/* + * Parse and validate an incoming interface address change message, + * generating a dplane context object. + * This runs in the dplane pthread; the context is enqueued to the + * main pthread for processing. + */ +int netlink_interface_addr_dplane(struct nlmsghdr *h, ns_id_t ns_id, + int startup /*ignored*/) +{ + int len; + struct ifaddrmsg *ifa; + struct rtattr *tb[IFA_MAX + 1]; + void *addr; + void *broad; + char *label = NULL; + uint32_t metric = METRIC_MAX; + uint32_t kernel_flags = 0; + struct zebra_dplane_ctx *ctx; + struct prefix p; + + ifa = NLMSG_DATA(h); + + /* Validate message types */ + if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR) + return 0; + + if (ifa->ifa_family != AF_INET && ifa->ifa_family != AF_INET6) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid address family: %u", + __func__, nl_msg_type_to_str(h->nlmsg_type), + ifa->ifa_family); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifaddrmsg)); + if (len < 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: netlink msg bad size: %d %zu", + __func__, nl_msg_type_to_str(h->nlmsg_type), + h->nlmsg_len, + (size_t)NLMSG_LENGTH( + sizeof(struct ifaddrmsg))); + return -1; + } + + netlink_parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len); + + /* Flags passed through */ + if (tb[IFA_FLAGS]) + kernel_flags = *(int *)RTA_DATA(tb[IFA_FLAGS]); + else + kernel_flags = ifa->ifa_flags; + + if (IS_ZEBRA_DEBUG_KERNEL) { /* remove this line to see initial ifcfg */ + char buf[PREFIX_STRLEN]; + + zlog_debug("%s: %s nsid %u ifindex %u flags 0x%x:", __func__, + nl_msg_type_to_str(h->nlmsg_type), ns_id, + ifa->ifa_index, kernel_flags); + if (tb[IFA_LOCAL]) + zlog_debug(" IFA_LOCAL %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_LOCAL]), buf, + sizeof(buf)), + ifa->ifa_prefixlen); + if (tb[IFA_ADDRESS]) + zlog_debug(" IFA_ADDRESS %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_ADDRESS]), buf, + sizeof(buf)), + ifa->ifa_prefixlen); + if (tb[IFA_BROADCAST]) + zlog_debug(" IFA_BROADCAST %s/%d", + inet_ntop(ifa->ifa_family, + RTA_DATA(tb[IFA_BROADCAST]), buf, + sizeof(buf)), + ifa->ifa_prefixlen); + if (tb[IFA_LABEL]) + zlog_debug(" IFA_LABEL %s", + (const char *)RTA_DATA(tb[IFA_LABEL])); + + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci = RTA_DATA(tb[IFA_CACHEINFO]); + + zlog_debug(" IFA_CACHEINFO pref %d, valid %d", + ci->ifa_prefered, ci->ifa_valid); + } + } + + /* Validate prefix length */ + + if (ifa->ifa_family == AF_INET + && ifa->ifa_prefixlen > IPV4_MAX_BITLEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid prefix length: %u", + __func__, nl_msg_type_to_str(h->nlmsg_type), + ifa->ifa_prefixlen); + return -1; + } + + if (ifa->ifa_family == AF_INET6) { + if (ifa->ifa_prefixlen > IPV6_MAX_BITLEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid prefix length: %u", + __func__, + nl_msg_type_to_str(h->nlmsg_type), + ifa->ifa_prefixlen); + return -1; + } + + /* Only consider valid addresses; we'll not get a kernel + * notification till IPv6 DAD has completed, but at init + * time, FRR does query for and will receive all addresses. + */ + if (h->nlmsg_type == RTM_NEWADDR + && (kernel_flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: Invalid/tentative addr", + __func__, + nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + } + + /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */ + if (tb[IFA_LOCAL] == NULL) + tb[IFA_LOCAL] = tb[IFA_ADDRESS]; + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + + /* local interface address */ + addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL); + + /* addr is primary key, SOL if we don't have one */ + if (addr == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: No local interface address", + __func__, nl_msg_type_to_str(h->nlmsg_type)); + return -1; + } + + /* Allocate a context object, now that validation is done. */ + ctx = dplane_ctx_alloc(); + if (h->nlmsg_type == RTM_NEWADDR) + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_ADDR_ADD); + else + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_ADDR_DEL); + + dplane_ctx_set_ifindex(ctx, ifa->ifa_index); + dplane_ctx_set_ns_id(ctx, ns_id); + + /* Convert addr to prefix */ + memset(&p, 0, sizeof(p)); + p.family = ifa->ifa_family; + p.prefixlen = ifa->ifa_prefixlen; + if (p.family == AF_INET) + p.u.prefix4 = *(struct in_addr *)addr; + else + p.u.prefix6 = *(struct in6_addr *)addr; + + dplane_ctx_set_intf_addr(ctx, &p); + + /* is there a peer address? */ + if (tb[IFA_ADDRESS] + && memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), + RTA_PAYLOAD(tb[IFA_ADDRESS]))) { + broad = RTA_DATA(tb[IFA_ADDRESS]); + dplane_ctx_intf_set_connected(ctx); + } else if (tb[IFA_BROADCAST]) { + /* seeking a broadcast address */ + broad = RTA_DATA(tb[IFA_BROADCAST]); + dplane_ctx_intf_set_broadcast(ctx); + } else + broad = NULL; + + if (broad) { + /* Convert addr to prefix */ + memset(&p, 0, sizeof(p)); + p.family = ifa->ifa_family; + p.prefixlen = ifa->ifa_prefixlen; + if (p.family == AF_INET) + p.u.prefix4 = *(struct in_addr *)broad; + else + p.u.prefix6 = *(struct in6_addr *)broad; + + dplane_ctx_set_intf_dest(ctx, &p); + } + + /* Flags. */ + if (kernel_flags & IFA_F_SECONDARY) + dplane_ctx_intf_set_secondary(ctx); + + /* Label */ + if (tb[IFA_LABEL]) { + label = (char *)RTA_DATA(tb[IFA_LABEL]); + dplane_ctx_set_intf_label(ctx, label); + } + + if (tb[IFA_RT_PRIORITY]) + metric = *(uint32_t *)RTA_DATA(tb[IFA_RT_PRIORITY]); + + dplane_ctx_set_intf_metric(ctx, metric); + + /* Enqueue ctx for main pthread to process */ + dplane_provider_enqueue_to_zebra(ctx); + + return 0; +} + +int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ifinfomsg *ifi; + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_MAX + 1]; + struct interface *ifp; + char *name = NULL; + char *kind = NULL; + char *desc = NULL; + char *slave_kind = NULL; + struct zebra_ns *zns; + vrf_id_t vrf_id = VRF_DEFAULT; + enum zebra_iftype zif_type = ZEBRA_IF_OTHER; + enum zebra_slave_iftype zif_slave_type = ZEBRA_IF_SLAVE_NONE; + ifindex_t bridge_ifindex = IFINDEX_INTERNAL; + ifindex_t bond_ifindex = IFINDEX_INTERNAL; + ifindex_t link_ifindex = IFINDEX_INTERNAL; + uint8_t old_hw_addr[INTERFACE_HWADDR_MAX]; + struct zebra_if *zif; + ns_id_t link_nsid = ns_id; + ifindex_t master_infindex = IFINDEX_INTERNAL; + uint8_t bypass = 0; + + zns = zebra_ns_lookup(ns_id); + ifi = NLMSG_DATA(h); + + /* assume if not default zns, then new VRF */ + if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)) { + /* If this is not link add/delete message so print warning. */ + zlog_debug("%s: wrong kernel message %s", __func__, + nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + if (!(ifi->ifi_family == AF_UNSPEC || ifi->ifi_family == AF_BRIDGE + || ifi->ifi_family == AF_INET6)) { + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel link change: %s", + ifi->ifi_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifinfomsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ifinfomsg))); + return -1; + } + + /* We are interested in some AF_BRIDGE notifications. */ + if (ifi->ifi_family == AF_BRIDGE) + return netlink_bridge_interface(h, len, ns_id, startup); + + /* Looking up interface name. */ + memset(linkinfo, 0, sizeof(linkinfo)); + netlink_parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, + NLA_F_NESTED); + + /* check for wireless messages to ignore */ + if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ignoring IFLA_WIRELESS message", + __func__); + return 0; + } + + if (tb[IFLA_IFNAME] == NULL) + return -1; + name = (char *)RTA_DATA(tb[IFLA_IFNAME]); + + /* Must be valid string. */ + len = RTA_PAYLOAD(tb[IFLA_IFNAME]); + if (len < 2 || name[len - 1] != '\0') { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: invalid intf name", __func__); + return -1; + } + + if (tb[IFLA_LINKINFO]) { + netlink_parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO]); + + if (linkinfo[IFLA_INFO_KIND]) + kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); + + if (linkinfo[IFLA_INFO_SLAVE_KIND]) + slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); + + netlink_determine_zebra_iftype(kind, &zif_type); + } + + /* If linking to another interface, note it. */ + if (tb[IFLA_LINK]) + link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); + + if (tb[IFLA_LINK_NETNSID]) { + link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); + link_nsid = ns_id_get_absolute(ns_id, link_nsid); + } + if (tb[IFLA_IFALIAS]) { + desc = (char *)RTA_DATA(tb[IFLA_IFALIAS]); + } + + /* See if interface is present. */ + ifp = if_lookup_by_name_per_ns(zns, name); + + if (h->nlmsg_type == RTM_NEWLINK) { + /* If VRF, create or update the VRF structure itself. */ + if (zif_type == ZEBRA_IF_VRF && !vrf_is_backend_netns()) { + netlink_vrf_change(h, tb[IFLA_LINKINFO], ns_id, name); + vrf_id = (vrf_id_t)ifi->ifi_index; + } + + if (tb[IFLA_MASTER]) { + if (slave_kind && (strcmp(slave_kind, "vrf") == 0) + && !vrf_is_backend_netns()) { + zif_slave_type = ZEBRA_IF_SLAVE_VRF; + master_infindex = vrf_id = + *(uint32_t *)RTA_DATA(tb[IFLA_MASTER]); + } else if (slave_kind + && (strcmp(slave_kind, "bridge") == 0)) { + zif_slave_type = ZEBRA_IF_SLAVE_BRIDGE; + master_infindex = bridge_ifindex = + *(ifindex_t *)RTA_DATA(tb[IFLA_MASTER]); + } else if (slave_kind + && (strcmp(slave_kind, "bond") == 0)) { + zif_slave_type = ZEBRA_IF_SLAVE_BOND; + master_infindex = bond_ifindex = + *(ifindex_t *)RTA_DATA(tb[IFLA_MASTER]); + bypass = netlink_parse_lacp_bypass(linkinfo); + } else + zif_slave_type = ZEBRA_IF_SLAVE_OTHER; + } + if (vrf_is_backend_netns()) + vrf_id = (vrf_id_t)ns_id; + if (ifp == NULL + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + /* Add interface notification from kernel */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK ADD for %s(%u) vrf_id %u type %d sl_type %d master %u flags 0x%x", + name, ifi->ifi_index, vrf_id, zif_type, + zif_slave_type, master_infindex, + ifi->ifi_flags); + + if (ifp == NULL) { + /* unknown interface */ + ifp = if_get_by_name(name, vrf_id, NULL); + } else { + /* pre-configured interface, learnt now */ + if (ifp->vrf->vrf_id != vrf_id) + if_update_to_new_vrf(ifp, vrf_id); + } + + /* Update interface information. */ + set_ifindex(ifp, ifi->ifi_index, zns); + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (!tb[IFLA_MTU]) { + zlog_debug( + "RTM_NEWLINK for interface %s(%u) without MTU set", + name, ifi->ifi_index); + return 0; + } + ifp->mtu6 = ifp->mtu = *(int *)RTA_DATA(tb[IFLA_MTU]); + ifp->metric = 0; + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + + /* Set interface type */ + zebra_if_set_ziftype(ifp, zif_type, zif_slave_type); + if (IS_ZEBRA_IF_VRF(ifp)) + SET_FLAG(ifp->status, + ZEBRA_INTERFACE_VRF_LOOPBACK); + + /* Update link. */ + zebra_if_update_link(ifp, link_ifindex, link_nsid); + + ifp->ll_type = + netlink_to_zebra_link_type(ifi->ifi_type); + netlink_interface_update_hw_addr(tb, ifp); + + /* Inform clients, install any configured addresses. */ + if_add_update(ifp); + + /* Extract and save L2 interface information, take + * additional actions. */ + netlink_interface_update_l2info( + ifp, linkinfo[IFLA_INFO_DATA], + 1, link_nsid); + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) + zebra_l2if_update_bridge_slave( + ifp, bridge_ifindex, ns_id, + ZEBRA_BRIDGE_NO_ACTION); + else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, + !!bypass); + + if (tb[IFLA_PROTO_DOWN]) + netlink_proc_dplane_if_protodown(ifp->info, tb); + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + zif = ifp->info; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK ADD for %s(%u), vlan-aware %d", + name, ifp->ifindex, + IS_ZEBRA_IF_BRIDGE_VLAN_AWARE( + zif)); + } + } else if (ifp->vrf->vrf_id != vrf_id) { + /* VRF change for an interface. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK vrf-change for %s(%u) vrf_id %u -> %u flags 0x%x", + name, ifp->ifindex, ifp->vrf->vrf_id, + vrf_id, ifi->ifi_flags); + + if_handle_vrf_change(ifp, vrf_id); + } else { + bool was_bridge_slave, was_bond_slave; + uint8_t chgflags = ZEBRA_BRIDGE_NO_ACTION; + zif = ifp->info; + + /* Interface update. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK update for %s(%u) sl_type %d master %u flags 0x%x", + name, ifp->ifindex, zif_slave_type, + master_infindex, ifi->ifi_flags); + + set_ifindex(ifp, ifi->ifi_index, zns); + if (!tb[IFLA_MTU]) { + zlog_debug( + "RTM_NEWLINK for interface %s(%u) without MTU set", + name, ifi->ifi_index); + return 0; + } + ifp->mtu6 = ifp->mtu = *(int *)RTA_DATA(tb[IFLA_MTU]); + ifp->metric = 0; + + /* Update interface type - NOTE: Only slave_type can + * change. */ + was_bridge_slave = IS_ZEBRA_IF_BRIDGE_SLAVE(ifp); + was_bond_slave = IS_ZEBRA_IF_BOND_SLAVE(ifp); + zebra_if_set_ziftype(ifp, zif_type, zif_slave_type); + + memcpy(old_hw_addr, ifp->hw_addr, INTERFACE_HWADDR_MAX); + + /* Update link. */ + zebra_if_update_link(ifp, link_ifindex, link_nsid); + + ifp->ll_type = + netlink_to_zebra_link_type(ifi->ifi_type); + netlink_interface_update_hw_addr(tb, ifp); + + if (tb[IFLA_PROTO_DOWN]) + netlink_proc_dplane_if_protodown(ifp->info, tb); + + if (if_is_no_ptm_operative(ifp)) { + bool is_up = if_is_operative(ifp); + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (!if_is_no_ptm_operative(ifp) || + CHECK_FLAG(zif->flags, + ZIF_FLAG_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) has gone DOWN", + name, ifp->ifindex); + if_down(ifp); + rib_update(RIB_UPDATE_KERNEL); + } else if (if_is_operative(ifp)) { + bool mac_updated = false; + + /* Must notify client daemons of new + * interface status. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) PTM up, notifying clients", + name, ifp->ifindex); + if_up(ifp, !is_up); + + /* Update EVPN VNI when SVI MAC change + */ + if (memcmp(old_hw_addr, ifp->hw_addr, + INTERFACE_HWADDR_MAX)) + mac_updated = true; + if (IS_ZEBRA_IF_VLAN(ifp) + && mac_updated) { + struct interface *link_if; + + link_if = + if_lookup_by_index_per_ns( + zebra_ns_lookup(NS_DEFAULT), + link_ifindex); + if (link_if) + zebra_vxlan_svi_up(ifp, + link_if); + } else if (mac_updated + && IS_ZEBRA_IF_BRIDGE(ifp)) { + zlog_debug( + "Intf %s(%u) bridge changed MAC address", + name, ifp->ifindex); + chgflags = + ZEBRA_BRIDGE_MASTER_MAC_CHANGE; + } + } + } else { + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (if_is_operative(ifp) && + !CHECK_FLAG(zif->flags, + ZIF_FLAG_PROTODOWN)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) has come UP", + name, ifp->ifindex); + if_up(ifp, true); + if (IS_ZEBRA_IF_BRIDGE(ifp)) + chgflags = + ZEBRA_BRIDGE_MASTER_UP; + } else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Intf %s(%u) has gone DOWN", + name, ifp->ifindex); + if_down(ifp); + rib_update(RIB_UPDATE_KERNEL); + } + } + + /* Extract and save L2 interface information, take + * additional actions. */ + netlink_interface_update_l2info( + ifp, linkinfo[IFLA_INFO_DATA], + 0, link_nsid); + if (IS_ZEBRA_IF_BRIDGE(ifp)) + zebra_l2if_update_bridge(ifp, chgflags); + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, true); + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || was_bridge_slave) + zebra_l2if_update_bridge_slave( + ifp, bridge_ifindex, ns_id, chgflags); + else if (IS_ZEBRA_IF_BOND_SLAVE(ifp) || was_bond_slave) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, + !!bypass); + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_NEWLINK update for %s(%u), vlan-aware %d", + name, ifp->ifindex, + IS_ZEBRA_IF_BRIDGE_VLAN_AWARE( + zif)); + } + } + + zif = ifp->info; + if (zif) { + XFREE(MTYPE_TMP, zif->desc); + if (desc) + zif->desc = XSTRDUP(MTYPE_TMP, desc); + } + } else { + /* Delete interface notification from kernel */ + if (ifp == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "RTM_DELLINK for unknown interface %s(%u)", + name, ifi->ifi_index); + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("RTM_DELLINK for %s(%u)", name, + ifp->ifindex); + + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, false); + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) + zebra_l2if_update_bond_slave(ifp, bond_ifindex, false); + /* Special handling for bridge or VxLAN interfaces. */ + if (IS_ZEBRA_IF_BRIDGE(ifp)) + zebra_l2_bridge_del(ifp); + else if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_l2_vxlanif_del(ifp); + + if_delete_update(&ifp); + + /* If VRF, delete the VRF structure itself. */ + if (zif_type == ZEBRA_IF_VRF && !vrf_is_backend_netns()) + netlink_vrf_change(h, tb[IFLA_LINKINFO], ns_id, name); + } + + return 0; +} + +/** + * Interface encoding helper function. + * + * \param[in] cmd netlink command. + * \param[in] ctx dataplane context (information snapshot). + * \param[out] buf buffer to hold the packet. + * \param[in] buflen amount of buffer bytes. + */ + +ssize_t netlink_intf_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifa; + char buf[]; + } *req = buf; + + struct rtattr *nest_protodown_reason; + ifindex_t ifindex = dplane_ctx_get_ifindex(ctx); + bool down = dplane_ctx_intf_is_protodown(ctx); + bool pd_reason_val = dplane_ctx_get_intf_pd_reason_val(ctx); + struct nlsock *nl = + kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + if (buflen < sizeof(*req)) + return 0; + + memset(req, 0, sizeof(*req)); + + if (cmd != RTM_SETLINK) + flog_err( + EC_ZEBRA_INTF_UPDATE_FAILURE, + "Only RTM_SETLINK message type currently supported in dplane pthread"); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->ifa.ifi_index = ifindex; + + nl_attr_put8(&req->n, buflen, IFLA_PROTO_DOWN, down); + nl_attr_put32(&req->n, buflen, IFLA_LINK, ifindex); + + /* Reason info nest */ + nest_protodown_reason = + nl_attr_nest(&req->n, buflen, IFLA_PROTO_DOWN_REASON); + + if (!nest_protodown_reason) + return -1; + + nl_attr_put32(&req->n, buflen, IFLA_PROTO_DOWN_REASON_MASK, + (1 << frr_protodown_r_bit)); + nl_attr_put32(&req->n, buflen, IFLA_PROTO_DOWN_REASON_VALUE, + ((int)pd_reason_val) << frr_protodown_r_bit); + + nl_attr_nest_end(&req->n, nest_protodown_reason); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, protodown=%d reason_val=%d ifindex=%u", + __func__, nl_msg_type_to_str(cmd), down, + pd_reason_val, ifindex); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* Interface information read by netlink. */ +void interface_list(struct zebra_ns *zns) +{ + interface_lookup_netlink(zns); + /* We add routes for interface address, + * so we need to get the nexthop info + * from the kernel before we can do that + */ + netlink_nexthop_read(zns); + + interface_addr_lookup_netlink(zns); +} + +void if_netlink_set_frr_protodown_r_bit(uint8_t bit) +{ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Protodown reason bit index changed: bit-index %u -> bit-index %u", + frr_protodown_r_bit, bit); + + frr_protodown_r_bit = bit; +} + +void if_netlink_unset_frr_protodown_r_bit(void) +{ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Protodown reason bit index changed: bit-index %u -> bit-index %u", + frr_protodown_r_bit, FRR_PROTODOWN_REASON_DEFAULT_BIT); + + frr_protodown_r_bit = FRR_PROTODOWN_REASON_DEFAULT_BIT; +} + + +bool if_netlink_frr_protodown_r_bit_is_set(void) +{ + return (frr_protodown_r_bit != FRR_PROTODOWN_REASON_DEFAULT_BIT); +} + +uint8_t if_netlink_get_frr_protodown_r_bit(void) +{ + return frr_protodown_r_bit; +} + +/** + * netlink_request_tunneldump() - Request all tunnels from the linux kernel + * + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* (RTM_GETTUNNEL) route type + * + * Return: Result status + */ +static int netlink_request_tunneldump(struct zebra_ns *zns, int family, + int ifindex) +{ + struct { + struct nlmsghdr n; + struct tunnel_msg tmsg; + char buf[256]; + } req; + + /* Form the request */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tunnel_msg)); + req.n.nlmsg_type = RTM_GETTUNNEL; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.tmsg.family = family; + req.tmsg.ifindex = ifindex; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* + * Currently we only ask for vxlan l3svd vni information. + * In the future this can be expanded. + */ +int netlink_tunneldump_read(struct zebra_ns *zns) +{ + int ret = 0; + struct zebra_dplane_info dp_info; + struct route_node *rn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct nlsock *netlink_cmd = &zns->netlink_cmd; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + if (!tmp_if) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + + ret = netlink_request_tunneldump(zns, PF_BRIDGE, + tmp_if->ifindex); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_interface, netlink_cmd, + &dp_info, 0, true); + + if (ret < 0) + return ret; + } + + return 0; +} +#endif /* GNU_LINUX */ diff --git a/zebra/if_netlink.h b/zebra/if_netlink.h new file mode 100644 index 0000000..21ae171 --- /dev/null +++ b/zebra/if_netlink.h @@ -0,0 +1,74 @@ +/* Header file exported by if_netlink.c to zebra. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_IF_NETLINK_H +#define _ZEBRA_IF_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +extern int netlink_interface_addr(struct nlmsghdr *h, ns_id_t ns_id, + int startup); + +/* + * Parse an incoming interface address change message, generate a dplane + * context object for processing. + */ +int netlink_interface_addr_dplane(struct nlmsghdr *h, ns_id_t ns_id, + int startup); + +extern int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); +extern int interface_lookup_netlink(struct zebra_ns *zns); + +extern ssize_t netlink_intf_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen); +extern enum netlink_msg_status +netlink_put_gre_set_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +extern enum netlink_msg_status +netlink_put_address_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); + +extern int netlink_tunneldump_read(struct zebra_ns *zns); +extern enum netlink_msg_status +netlink_put_intf_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#define FRR_PROTODOWN_REASON_DEFAULT_BIT 7 +/* Protodown bit setter/getter + * + * Allow users to change the bit if it conflicts with another + * on their system. + */ +extern void if_netlink_set_frr_protodown_r_bit(uint8_t bit); +extern void if_netlink_unset_frr_protodown_r_bit(void); +extern bool if_netlink_frr_protodown_r_bit_is_set(void); +extern uint8_t if_netlink_get_frr_protodown_r_bit(void); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_IF_NETLINK_H */ diff --git a/zebra/if_socket.c b/zebra/if_socket.c new file mode 100644 index 0000000..da9fadf --- /dev/null +++ b/zebra/if_socket.c @@ -0,0 +1,52 @@ +/* + * Zebra Interface interaction with the kernel using socket. + * Copyright (C) 2022 NVIDIA CORPORATION & AFFILIATES + * Stephen Worley + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_intf_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +enum zebra_dplane_result +kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx) +{ + const char *ifname = dplane_ctx_get_ifname(ctx); + enum dplane_netconf_status_e mpls_on = dplane_ctx_get_netconf_mpls(ctx); + + zlog_warn("%s: Unable to set kernel mpls state for interface %s(%d)", + __func__, ifname, mpls_on); + + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} +#endif diff --git a/zebra/if_sysctl.c b/zebra/if_sysctl.c new file mode 100644 index 0000000..70d1164 --- /dev/null +++ b/zebra/if_sysctl.c @@ -0,0 +1,146 @@ +/* + * Get interface's address and mask information by sysctl() function. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#if !defined(GNU_LINUX) && !defined(OPEN_BSD) + +#include "if.h" +#include "sockunion.h" +#include "prefix.h" +#include "connected.h" +#include "memory.h" +#include "ioctl.h" +#include "log.h" +#include "interface.h" +#include "vrf.h" + +#include "zebra/rt.h" +#include "zebra/kernel_socket.h" +#include "zebra/rib.h" +#include "zebra/zebra_errors.h" + +void ifstat_update_sysctl(void) +{ + caddr_t ref, buf, end; + size_t bufsiz; + struct if_msghdr *ifm; + struct interface *ifp; + +#define MIBSIZ 6 + int mib[MIBSIZ] = { + CTL_NET, PF_ROUTE, 0, 0, /* AF_INET & AF_INET6 */ + NET_RT_IFLIST, 0}; + + /* Query buffer size. */ + if (sysctl(mib, MIBSIZ, NULL, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl() error by %s", + safe_strerror(errno)); + return; + } + + /* We free this memory at the end of this function. */ + ref = buf = XMALLOC(MTYPE_TMP, bufsiz); + + /* Fetch interface information into allocated buffer. */ + if (sysctl(mib, MIBSIZ, buf, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl error by %s", + safe_strerror(errno)); + XFREE(MTYPE_TMP, ref); + return; + } + + /* Parse both interfaces and addresses. */ + for (end = buf + bufsiz; buf < end; buf += ifm->ifm_msglen) { + ifm = (struct if_msghdr *)buf; + if (ifm->ifm_type == RTM_IFINFO) { + ifp = if_lookup_by_index(ifm->ifm_index, VRF_DEFAULT); + if (ifp) + ifp->stats = ifm->ifm_data; + } + } + + /* Free sysctl buffer. */ + XFREE(MTYPE_TMP, ref); + + return; +} + +/* Interface listing up function using sysctl(). */ +void interface_list(struct zebra_ns *zns) +{ + caddr_t ref, buf, end; + size_t bufsiz; + struct if_msghdr *ifm; + +#define MIBSIZ 6 + int mib[MIBSIZ] = { + CTL_NET, PF_ROUTE, 0, 0, /* AF_INET & AF_INET6 */ + NET_RT_IFLIST, 0}; + + if (zns->ns_id != NS_DEFAULT) { + zlog_debug("%s: ignore NS %u", __func__, zns->ns_id); + return; + } + + /* Query buffer size. */ + if (sysctl(mib, MIBSIZ, NULL, &bufsiz, NULL, 0) < 0) { + flog_err_sys(EC_ZEBRA_IFLIST_FAILED, + "Could not enumerate interfaces: %s", + safe_strerror(errno)); + return; + } + + /* We free this memory at the end of this function. */ + ref = buf = XMALLOC(MTYPE_TMP, bufsiz); + + /* Fetch interface information into allocated buffer. */ + if (sysctl(mib, MIBSIZ, buf, &bufsiz, NULL, 0) < 0) { + flog_err_sys(EC_ZEBRA_IFLIST_FAILED, + "Could not enumerate interfaces: %s", + safe_strerror(errno)); + return; + } + + /* Parse both interfaces and addresses. */ + for (end = buf + bufsiz; buf < end; buf += ifm->ifm_msglen) { + ifm = (struct if_msghdr *)buf; + + switch (ifm->ifm_type) { + case RTM_IFINFO: + ifm_read(ifm); + break; + case RTM_NEWADDR: + ifam_read((struct ifa_msghdr *)ifm); + break; + default: + zlog_info("%s: unexpected message type", __func__); + XFREE(MTYPE_TMP, ref); + return; + break; + } + } + + /* Free sysctl buffer. */ + XFREE(MTYPE_TMP, ref); +} + +#endif /* !defined(GNU_LINUX) && !defined(OPEN_BSD) */ diff --git a/zebra/interface.c b/zebra/interface.c new file mode 100644 index 0000000..27ac423 --- /dev/null +++ b/zebra/interface.c @@ -0,0 +1,4688 @@ +/* + * Interface function. + * Copyright (C) 1997, 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "if.h" +#include "lib_errors.h" +#include "vty.h" +#include "sockunion.h" +#include "prefix.h" +#include "command.h" +#include "memory.h" +#include "ioctl.h" +#include "connected.h" +#include "log.h" +#include "zclient.h" +#include "vrf.h" + +#include "zebra/rtadv.h" +#include "zebra_ns.h" +#include "zebra_vrf.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/irdp.h" +#include "zebra/zebra_ptm.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/interface.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZINFO, "Zebra Interface Information"); + +#define ZEBRA_PTM_SUPPORT + +DEFINE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp), + (vty, ifp)); +DEFINE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp), + (vty, ifp)); + + +static void if_down_del_nbr_connected(struct interface *ifp); + +static void if_zebra_speed_update(struct thread *thread) +{ + struct interface *ifp = THREAD_ARG(thread); + struct zebra_if *zif = ifp->info; + uint32_t new_speed; + bool changed = false; + int error = 0; + + new_speed = kernel_get_speed(ifp, &error); + + /* error may indicate vrf not available or + * interfaces not available. + * note that loopback & virtual interfaces can return 0 as speed + */ + if (error < 0) + return; + + if (new_speed != ifp->speed) { + zlog_info("%s: %s old speed: %u new speed: %u", __func__, + ifp->name, ifp->speed, new_speed); + ifp->speed = new_speed; + if_add_update(ifp); + changed = true; + } + + if (changed || new_speed == UINT32_MAX) { +#define SPEED_UPDATE_SLEEP_TIME 5 +#define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME) + /* + * Some interfaces never actually have an associated speed + * with them ( I am looking at you bridges ). + * So instead of iterating forever, let's give the + * system 4 minutes to try to figure out the speed + * if after that it it's probably never going to become + * useful. + * Since I don't know all the wonderful types of interfaces + * that may come into existence in the future I am going + * to not update the system to keep track of that. This + * is far simpler to just stop trying after 4 minutes + */ + if (new_speed == UINT32_MAX && + zif->speed_update_count == SPEED_UPDATE_COUNT_MAX) + return; + + zif->speed_update_count++; + thread_add_timer(zrouter.master, if_zebra_speed_update, ifp, + SPEED_UPDATE_SLEEP_TIME, &zif->speed_update); + thread_ignore_late_timer(zif->speed_update); + } +} + +static void zebra_if_node_destroy(route_table_delegate_t *delegate, + struct route_table *table, + struct route_node *node) +{ + if (node->info) + list_delete((struct list **)&node->info); + route_node_destroy(delegate, table, node); +} + +static void zebra_if_nhg_dependents_free(struct zebra_if *zebra_if) +{ + nhg_connected_tree_free(&zebra_if->nhg_dependents); +} + +static void zebra_if_nhg_dependents_init(struct zebra_if *zebra_if) +{ + nhg_connected_tree_init(&zebra_if->nhg_dependents); +} + + +route_table_delegate_t zebra_if_table_delegate = { + .create_node = route_node_create, + .destroy_node = zebra_if_node_destroy}; + +/* Called when new interface is added. */ +static int if_zebra_new_hook(struct interface *ifp) +{ + struct zebra_if *zebra_if; + + zebra_if = XCALLOC(MTYPE_ZINFO, sizeof(struct zebra_if)); + zebra_if->ifp = ifp; + + zebra_if->multicast = IF_ZEBRA_DATA_UNSPEC; + zebra_if->shutdown = IF_ZEBRA_DATA_OFF; + + zebra_if_nhg_dependents_init(zebra_if); + + zebra_ptm_if_init(zebra_if); + + ifp->ptm_enable = zebra_ptm_get_enable_state(); + + rtadv_if_init(zebra_if); + + memset(&zebra_if->neigh_mac[0], 0, 6); + + /* Initialize installed address chains tree. */ + zebra_if->ipv4_subnets = + route_table_init_with_delegate(&zebra_if_table_delegate); + + ifp->info = zebra_if; + + /* + * Some platforms are telling us that the interface is + * up and ready to go. When we check the speed we + * sometimes get the wrong value. Wait a couple + * of seconds and ask again. Hopefully it's all settled + * down upon startup. + */ + zebra_if->speed_update_count = 0; + thread_add_timer(zrouter.master, if_zebra_speed_update, ifp, 15, + &zebra_if->speed_update); + thread_ignore_late_timer(zebra_if->speed_update); + + return 0; +} + +static void if_nhg_dependents_check_valid(struct nhg_hash_entry *nhe) +{ + zebra_nhg_check_valid(nhe); +} + +static void if_down_nhg_dependents(const struct interface *ifp) +{ + struct nhg_connected *rb_node_dep = NULL; + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) + if_nhg_dependents_check_valid(rb_node_dep->nhe); +} + +static void if_nhg_dependents_release(const struct interface *ifp) +{ + struct nhg_connected *rb_node_dep = NULL; + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + frr_each(nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) { + rb_node_dep->nhe->ifp = NULL; /* Null it out */ + if_nhg_dependents_check_valid(rb_node_dep->nhe); + } +} + +/* Called when interface is deleted. */ +static int if_zebra_delete_hook(struct interface *ifp) +{ + struct zebra_if *zebra_if; + + if (ifp->info) { + zebra_if = ifp->info; + + /* If we set protodown, clear our reason now from the kernel */ + if (ZEBRA_IF_IS_PROTODOWN(zebra_if) && zebra_if->protodown_rc && + !ZEBRA_IF_IS_PROTODOWN_ONLY_EXTERNAL(zebra_if)) + zebra_if_update_protodown_rc(ifp, true, + (zebra_if->protodown_rc & + ~ZEBRA_PROTODOWN_ALL)); + + /* Free installed address chains tree. */ + if (zebra_if->ipv4_subnets) + route_table_finish(zebra_if->ipv4_subnets); + + rtadv_if_fini(zebra_if); + + zebra_evpn_if_cleanup(zebra_if); + zebra_evpn_mac_ifp_del(ifp); + + if_nhg_dependents_release(ifp); + zebra_if_nhg_dependents_free(zebra_if); + + XFREE(MTYPE_TMP, zebra_if->desc); + + THREAD_OFF(zebra_if->speed_update); + + XFREE(MTYPE_ZINFO, zebra_if); + } + + return 0; +} + +/* Build the table key */ +static void if_build_key(uint32_t ifindex, struct prefix *p) +{ + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + p->u.prefix4.s_addr = ifindex; +} + +/* Link an interface in a per NS interface tree */ +struct interface *if_link_per_ns(struct zebra_ns *ns, struct interface *ifp) +{ + struct prefix p; + struct route_node *rn; + + if (ifp->ifindex == IFINDEX_INTERNAL) + return NULL; + + if_build_key(ifp->ifindex, &p); + rn = route_node_get(ns->if_table, &p); + if (rn->info) { + ifp = (struct interface *)rn->info; + route_unlock_node(rn); /* get */ + return ifp; + } + + rn->info = ifp; + ifp->node = rn; + + return ifp; +} + +/* Delete a VRF. This is called in vrf_terminate(). */ +void if_unlink_per_ns(struct interface *ifp) +{ + ifp->node->info = NULL; + route_unlock_node(ifp->node); + ifp->node = NULL; +} + +/* Look up an interface by identifier within a NS */ +struct interface *if_lookup_by_index_per_ns(struct zebra_ns *ns, + uint32_t ifindex) +{ + struct prefix p; + struct route_node *rn; + struct interface *ifp = NULL; + + if_build_key(ifindex, &p); + rn = route_node_lookup(ns->if_table, &p); + if (rn) { + ifp = (struct interface *)rn->info; + route_unlock_node(rn); /* lookup */ + } + return ifp; +} + +/* Look up an interface by name within a NS */ +struct interface *if_lookup_by_name_per_ns(struct zebra_ns *ns, + const char *ifname) +{ + struct route_node *rn; + struct interface *ifp; + + for (rn = route_top(ns->if_table); rn; rn = route_next(rn)) { + ifp = (struct interface *)rn->info; + if (ifp && strcmp(ifp->name, ifname) == 0) { + route_unlock_node(rn); + return (ifp); + } + } + + return NULL; +} + +const char *ifindex2ifname_per_ns(struct zebra_ns *zns, unsigned int ifindex) +{ + struct interface *ifp; + + return ((ifp = if_lookup_by_index_per_ns(zns, ifindex)) != NULL) + ? ifp->name + : "unknown"; +} + +/* Tie an interface address to its derived subnet list of addresses. */ +int if_subnet_add(struct interface *ifp, struct connected *ifc) +{ + struct route_node *rn; + struct zebra_if *zebra_if; + struct prefix cp; + struct list *addr_list; + + assert(ifp && ifp->info && ifc); + zebra_if = ifp->info; + + /* Get address derived subnet node and associated address list, while + marking + address secondary attribute appropriately. */ + cp = *CONNECTED_PREFIX(ifc); + apply_mask(&cp); + rn = route_node_get(zebra_if->ipv4_subnets, &cp); + + if ((addr_list = rn->info)) + SET_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY); + else { + UNSET_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY); + rn->info = addr_list = list_new(); + route_lock_node(rn); + } + + /* Tie address at the tail of address list. */ + listnode_add(addr_list, ifc); + + /* Return list element count. */ + return (addr_list->count); +} + +/* Untie an interface address from its derived subnet list of addresses. */ +int if_subnet_delete(struct interface *ifp, struct connected *ifc) +{ + struct route_node *rn; + struct zebra_if *zebra_if; + struct list *addr_list; + struct prefix cp; + + assert(ifp && ifp->info && ifc); + zebra_if = ifp->info; + + cp = *CONNECTED_PREFIX(ifc); + apply_mask(&cp); + + /* Get address derived subnet node. */ + rn = route_node_lookup(zebra_if->ipv4_subnets, &cp); + if (!(rn && rn->info)) { + flog_warn(EC_ZEBRA_REMOVE_ADDR_UNKNOWN_SUBNET, + "Trying to remove an address from an unknown subnet. (please report this bug)"); + return -1; + } + route_unlock_node(rn); + + /* Untie address from subnet's address list. */ + addr_list = rn->info; + + /* Deleting an address that is not registered is a bug. + * In any case, we shouldn't decrement the lock counter if the address + * is unknown. */ + if (!listnode_lookup(addr_list, ifc)) { + flog_warn( + EC_ZEBRA_REMOVE_UNREGISTERED_ADDR, + "Trying to remove an address from a subnet where it is not currently registered. (please report this bug)"); + return -1; + } + + listnode_delete(addr_list, ifc); + route_unlock_node(rn); + + /* Return list element count, if not empty. */ + if (addr_list->count) { + /* If deleted address is primary, mark subsequent one as such + * and distribute. */ + if (!CHECK_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY)) { + ifc = listgetdata( + (struct listnode *)listhead(addr_list)); + zebra_interface_address_delete_update(ifp, ifc); + UNSET_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY); + /* XXX: Linux kernel removes all the secondary addresses + * when the primary + * address is removed. We could try to work around that, + * though this is + * non-trivial. */ + zebra_interface_address_add_update(ifp, ifc); + } + + return addr_list->count; + } + + /* Otherwise, free list and route node. */ + list_delete(&addr_list); + rn->info = NULL; + route_unlock_node(rn); + + return 0; +} + +/* if_flags_mangle: A place for hacks that require mangling + * or tweaking the interface flags. + * + * ******************** Solaris flags hacks ************************** + * + * Solaris IFF_UP flag reflects only the primary interface as the + * routing socket only sends IFINFO for the primary interface. Hence + * ~IFF_UP does not per se imply all the logical interfaces are also + * down - which we only know of as addresses. Instead we must determine + * whether the interface really is up or not according to how many + * addresses are still attached. (Solaris always sends RTM_DELADDR if + * an interface, logical or not, goes ~IFF_UP). + * + * Ie, we mangle IFF_UP to *additionally* reflect whether or not there + * are addresses left in struct connected, not just the actual underlying + * IFF_UP flag. + * + * We must hence remember the real state of IFF_UP, which we do in + * struct zebra_if.primary_state. + * + * Setting IFF_UP within zebra to administratively shutdown the + * interface will affect only the primary interface/address on Solaris. + ************************End Solaris flags hacks *********************** + */ +static void if_flags_mangle(struct interface *ifp, uint64_t *newflags) +{ + return; +} + +/* Update the flags field of the ifp with the new flag set provided. + * Take whatever actions are required for any changes in flags we care + * about. + * + * newflags should be the raw value, as obtained from the OS. + */ +void if_flags_update(struct interface *ifp, uint64_t newflags) +{ + if_flags_mangle(ifp, &newflags); + + if (if_is_no_ptm_operative(ifp)) { + /* operative -> inoperative? */ + ifp->flags = newflags; + if (!if_is_operative(ifp)) + if_down(ifp); + } else { + /* inoperative -> operative? */ + ifp->flags = newflags; + if (if_is_operative(ifp)) + if_up(ifp, true); + } +} + +/* Wake up configured address if it is not in current kernel + address. */ +void if_addr_wakeup(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct connected *ifc; + struct prefix *p; + enum zebra_dplane_result dplane_res; + + for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { + p = ifc->address; + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED) + && !CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED)) { + /* Address check. */ + if (p->family == AF_INET) { + if (!if_is_up(ifp)) { + /* Assume zebra is configured like + * following: + * + * interface gre0 + * ip addr 192.0.2.1/24 + * ! + * + * As soon as zebra becomes first aware + * that gre0 exists in the + * kernel, it will set gre0 up and + * configure its addresses. + * + * (This may happen at startup when the + * interface already exists + * or during runtime when the interface + * is added to the kernel) + * + * XXX: IRDP code is calling here via + * if_add_update - this seems + * somewhat weird. + * XXX: RUNNING is not a settable flag + * on any system + * I (paulj) am aware of. + */ + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == + ZEBRA_DPLANE_REQUEST_FAILURE) { + flog_err_sys( + EC_ZEBRA_IFACE_ADDR_ADD_FAILED, + "Can't set interface's address: %s", + dplane_res2str(dplane_res)); + continue; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra + * clients when the notification + * from the kernel has been received. + * It will also be added to the interface's + * subnet list then. */ + } + if (p->family == AF_INET6) { + if (!if_is_up(ifp)) { + /* See long comment above */ + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == + ZEBRA_DPLANE_REQUEST_FAILURE) { + flog_err_sys( + EC_ZEBRA_IFACE_ADDR_ADD_FAILED, + "Can't set interface's address: %s", + dplane_res2str(dplane_res)); + continue; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra + * clients when the notification + * from the kernel has been received. */ + } + } + } +} + +/* Handle interface addition */ +void if_add_update(struct interface *ifp) +{ + struct zebra_if *if_data; + struct zebra_ns *zns; + struct zebra_vrf *zvrf = ifp->vrf->info; + + /* case interface populate before vrf enabled */ + if (zvrf->zns) + zns = zvrf->zns; + else + zns = zebra_ns_lookup(NS_DEFAULT); + if_link_per_ns(zns, ifp); + if_data = ifp->info; + assert(if_data); + + if (if_data->multicast == IF_ZEBRA_DATA_ON) + if_set_flags(ifp, IFF_MULTICAST); + else if (if_data->multicast == IF_ZEBRA_DATA_OFF) + if_unset_flags(ifp, IFF_MULTICAST); + + zebra_ptm_if_set_ptm_state(ifp, if_data); + + zebra_interface_add_update(ifp); + + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + SET_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE); + + if (if_data->shutdown == IF_ZEBRA_DATA_ON) { + if (IS_ZEBRA_DEBUG_KERNEL) { + zlog_debug( + "interface %s vrf %s(%u) index %d is shutdown. Won't wake it up.", + ifp->name, ifp->vrf->name, + ifp->vrf->vrf_id, ifp->ifindex); + } + + return; + } + + if_addr_wakeup(ifp); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "interface %s vrf %s(%u) index %d becomes active.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + + } else { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s vrf %s(%u) index %d is added.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + } +} + +/* Install connected routes corresponding to an interface. */ +static void if_install_connected(struct interface *ifp) +{ + struct listnode *node; + struct listnode *next; + struct connected *ifc; + + if (ifp->connected) { + for (ALL_LIST_ELEMENTS(ifp->connected, node, next, ifc)) { + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + zebra_interface_address_add_update(ifp, ifc); + + connected_up(ifp, ifc); + } + } +} + +/* Uninstall connected routes corresponding to an interface. */ +static void if_uninstall_connected(struct interface *ifp) +{ + struct listnode *node; + struct listnode *next; + struct connected *ifc; + + if (ifp->connected) { + for (ALL_LIST_ELEMENTS(ifp->connected, node, next, ifc)) { + zebra_interface_address_delete_update(ifp, ifc); + connected_down(ifp, ifc); + } + } +} + +/* Uninstall and delete connected routes corresponding to an interface. */ +/* TODO - Check why IPv4 handling here is different from install or if_down */ +static void if_delete_connected(struct interface *ifp) +{ + struct connected *ifc; + struct prefix cp; + struct route_node *rn; + struct zebra_if *zebra_if; + struct listnode *node; + struct listnode *last = NULL; + + zebra_if = ifp->info; + + if (!ifp->connected) + return; + + while ((node = (last ? last->next : listhead(ifp->connected)))) { + ifc = listgetdata(node); + + cp = *CONNECTED_PREFIX(ifc); + apply_mask(&cp); + + if (cp.family == AF_INET + && (rn = route_node_lookup(zebra_if->ipv4_subnets, &cp))) { + struct listnode *anode; + struct listnode *next; + struct listnode *first; + struct list *addr_list; + + route_unlock_node(rn); + addr_list = (struct list *)rn->info; + + /* Remove addresses, secondaries first. */ + first = listhead(addr_list); + if (first) + for (anode = first->next; anode || first; + anode = next) { + if (!anode) { + anode = first; + first = NULL; + } + next = anode->next; + + ifc = listgetdata(anode); + connected_down(ifp, ifc); + + /* XXX: We have to send notifications + * here explicitly, because we destroy + * the ifc before receiving the + * notification about the address being + * deleted. + */ + zebra_interface_address_delete_update( + ifp, ifc); + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + /* Remove from subnet chain. */ + list_delete_node(addr_list, anode); + route_unlock_node(rn); + + /* Remove from interface address list + * (unconditionally). */ + if (!CHECK_FLAG(ifc->conf, + ZEBRA_IFC_CONFIGURED)) { + listnode_delete(ifp->connected, + ifc); + connected_free(&ifc); + } else + last = node; + } + + /* Free chain list and respective route node. */ + list_delete(&addr_list); + rn->info = NULL; + route_unlock_node(rn); + } else if (cp.family == AF_INET6) { + connected_down(ifp, ifc); + + zebra_interface_address_delete_update(ifp, ifc); + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_REAL); + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + last = node; + else { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + } + } else { + last = node; + } + } +} + +/* Handle an interface delete event */ +void if_delete_update(struct interface **pifp) +{ + struct zebra_if *zif; + struct interface *ifp = *pifp; + + if (if_is_up(ifp)) { + flog_err( + EC_LIB_INTERFACE, + "interface %s vrf %s(%u) index %d is still up while being deleted.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + return; + } + + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) + return; + + /* Mark interface as inactive */ + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s vrf %s(%u) index %d is now inactive.", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id, + ifp->ifindex); + + /* Delete connected routes from the kernel. */ + if_delete_connected(ifp); + + /* Send out notification on interface delete. */ + zebra_interface_delete_update(ifp); + + if_unlink_per_ns(ifp); + + /* Update ifindex after distributing the delete message. This is in + case any client needs to have the old value of ifindex available + while processing the deletion. Each client daemon is responsible + for setting ifindex to IFINDEX_INTERNAL after processing the + interface deletion message. */ + if_set_index(ifp, IFINDEX_INTERNAL); + ifp->node = NULL; + + /* if the ifp is in a vrf, move it to default so vrf can be deleted if + * desired. This operation is not done for netns implementation to avoid + * collision with interface with the same name in the default vrf (can + * occur with this implementation whereas it is not possible with + * vrf-lite). + */ + if (ifp->vrf->vrf_id && !vrf_is_backend_netns()) + if_handle_vrf_change(ifp, VRF_DEFAULT); + + /* Reset some zebra interface params to default values. */ + zif = ifp->info; + if (zif) { + zif->zif_type = ZEBRA_IF_OTHER; + zif->zif_slave_type = ZEBRA_IF_SLAVE_NONE; + memset(&zif->l2info, 0, sizeof(union zebra_l2if_info)); + memset(&zif->brslave_info, 0, + sizeof(struct zebra_l2info_brslave)); + zebra_evpn_if_cleanup(zif); + zebra_evpn_mac_ifp_del(ifp); + } + + if (!ifp->configured) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s is being deleted from the system", + ifp->name); + if_delete(pifp); + } +} + +/* VRF change for an interface */ +void if_handle_vrf_change(struct interface *ifp, vrf_id_t vrf_id) +{ + vrf_id_t old_vrf_id; + + old_vrf_id = ifp->vrf->vrf_id; + + /* Uninstall connected routes. */ + if_uninstall_connected(ifp); + + /* Delete any IPv4 neighbors created to implement RFC 5549 */ + if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(ifp); + + /* Delete all neighbor addresses learnt through IPv6 RA */ + if_down_del_nbr_connected(ifp); + + /* Send out notification on interface VRF change. */ + /* This is to issue an UPDATE or a DELETE, as appropriate. */ + zebra_interface_vrf_update_del(ifp, vrf_id); + + /* update VRF */ + if_update_to_new_vrf(ifp, vrf_id); + + /* Send out notification on interface VRF change. */ + /* This is to issue an ADD, if needed. */ + zebra_interface_vrf_update_add(ifp, old_vrf_id); +} + +static void ipv6_ll_address_to_mac(struct in6_addr *address, uint8_t *mac) +{ + mac[0] = address->s6_addr[8] ^ 0x02; + mac[1] = address->s6_addr[9]; + mac[2] = address->s6_addr[10]; + mac[3] = address->s6_addr[13]; + mac[4] = address->s6_addr[14]; + mac[5] = address->s6_addr[15]; +} + +void if_nbr_mac_to_ipv4ll_neigh_update(struct interface *ifp, + char mac[6], + struct in6_addr *address, + int add) +{ + struct zebra_vrf *zvrf = ifp->vrf->info; + struct zebra_if *zif = ifp->info; + char buf[16] = "169.254.0.1"; + struct in_addr ipv4_ll; + ns_id_t ns_id; + + inet_pton(AF_INET, buf, &ipv4_ll); + + ns_id = zvrf->zns->ns_id; + + /* + * Remove and re-add any existing neighbor entry for this address, + * since Netlink doesn't currently offer update message types. + */ + kernel_neigh_update(0, ifp->ifindex, (void *)&ipv4_ll.s_addr, mac, 6, + ns_id, AF_INET, true); + + /* Add new neighbor entry. + * + * We force installation even if current neighbor entry is the same. + * Since this function is used to refresh our MAC entries after an + * interface flap, if we don't force in our custom entries with their + * state set to PERMANENT or REACHABLE then the kernel will attempt to + * resolve our leftover entries, fail, mark them unreachable and then + * they'll be useless to us. + */ + if (add) + kernel_neigh_update(add, ifp->ifindex, (void *)&ipv4_ll.s_addr, + mac, 6, ns_id, AF_INET, true); + + memcpy(&zif->neigh_mac[0], &mac[0], 6); + + /* + * We need to note whether or not we originated a v6 + * neighbor entry for this interface. So that when + * someone unwisely accidentally deletes this entry + * we can shove it back in. + */ + zif->v6_2_v4_ll_neigh_entry = !!add; + memcpy(&zif->v6_2_v4_ll_addr6, address, sizeof(*address)); + + zvrf->neigh_updates++; +} + +void if_nbr_ipv6ll_to_ipv4ll_neigh_update(struct interface *ifp, + struct in6_addr *address, int add) +{ + + char mac[6]; + + ipv6_ll_address_to_mac(address, (uint8_t *)mac); + if_nbr_mac_to_ipv4ll_neigh_update(ifp, mac, address, add); +} + +static void if_nbr_ipv6ll_to_ipv4ll_neigh_add_all(struct interface *ifp) +{ + if (listhead(ifp->nbr_connected)) { + struct nbr_connected *nbr_connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, + nbr_connected)) + if_nbr_ipv6ll_to_ipv4ll_neigh_update( + ifp, &nbr_connected->address->u.prefix6, 1); + } +} + +void if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(struct interface *ifp) +{ + if (listhead(ifp->nbr_connected)) { + struct nbr_connected *nbr_connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, + nbr_connected)) + if_nbr_ipv6ll_to_ipv4ll_neigh_update( + ifp, &nbr_connected->address->u.prefix6, 0); + } +} + +static void if_down_del_nbr_connected(struct interface *ifp) +{ + struct nbr_connected *nbr_connected; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(ifp->nbr_connected, node, nnode, + nbr_connected)) { + listnode_delete(ifp->nbr_connected, nbr_connected); + nbr_connected_free(nbr_connected); + } +} + +void if_nhg_dependents_add(struct interface *ifp, struct nhg_hash_entry *nhe) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + nhg_connected_tree_add_nhe(&zif->nhg_dependents, nhe); + } +} + +void if_nhg_dependents_del(struct interface *ifp, struct nhg_hash_entry *nhe) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + nhg_connected_tree_del_nhe(&zif->nhg_dependents, nhe); + } +} + +unsigned int if_nhg_dependents_count(const struct interface *ifp) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + return nhg_connected_tree_count(&zif->nhg_dependents); + } + + return 0; +} + + +bool if_nhg_dependents_is_empty(const struct interface *ifp) +{ + if (ifp->info) { + struct zebra_if *zif = (struct zebra_if *)ifp->info; + + return nhg_connected_tree_is_empty(&zif->nhg_dependents); + } + + return false; +} + +/* Interface is up. */ +void if_up(struct interface *ifp, bool install_connected) +{ + struct zebra_if *zif; + struct interface *link_if; + struct zebra_vrf *zvrf = ifp->vrf->info; + + zif = ifp->info; + zif->up_count++; + frr_timestamp(2, zif->up_last, sizeof(zif->up_last)); + + /* Notify the protocol daemons. */ + if (ifp->ptm_enable && (ifp->ptm_status == ZEBRA_PTM_STATUS_DOWN)) { + flog_warn(EC_ZEBRA_PTM_NOT_READY, + "%s: interface %s hasn't passed ptm check", + __func__, ifp->name); + return; + } + zebra_interface_up_update(ifp); + + if_nbr_ipv6ll_to_ipv4ll_neigh_add_all(ifp); + + rtadv_if_up(zif); + + /* Install connected routes to the kernel. */ + if (install_connected) + if_install_connected(ifp); + + /* Handle interface up for specific types for EVPN. Non-VxLAN interfaces + * are checked to see if (remote) neighbor entries need to be installed + * on them for ARP suppression. + */ + if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_vxlan_if_up(ifp); + else if (IS_ZEBRA_IF_BRIDGE(ifp)) { + link_if = ifp; + zebra_vxlan_svi_up(ifp, link_if); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + link_if = if_lookup_by_index_per_ns(zvrf->zns, + zif->link_ifindex); + if (link_if) + zebra_vxlan_svi_up(ifp, link_if); + } else if (IS_ZEBRA_IF_MACVLAN(ifp)) { + zebra_vxlan_macvlan_up(ifp); + } + + if (zif->es_info.es) + zebra_evpn_es_if_oper_state_change(zif, true /*up*/); + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + zebra_evpn_mh_uplink_oper_update(zif); + + thread_add_timer(zrouter.master, if_zebra_speed_update, ifp, 0, + &zif->speed_update); + thread_ignore_late_timer(zif->speed_update); +} + +/* Interface goes down. We have to manage different behavior of based + OS. */ +void if_down(struct interface *ifp) +{ + struct zebra_if *zif; + struct interface *link_if; + struct zebra_vrf *zvrf = ifp->vrf->info; + + zif = ifp->info; + zif->down_count++; + frr_timestamp(2, zif->down_last, sizeof(zif->down_last)); + + if_down_nhg_dependents(ifp); + + /* Handle interface down for specific types for EVPN. Non-VxLAN + * interfaces + * are checked to see if (remote) neighbor entries need to be purged + * for ARP suppression. + */ + if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_vxlan_if_down(ifp); + else if (IS_ZEBRA_IF_BRIDGE(ifp)) { + link_if = ifp; + zebra_vxlan_svi_down(ifp, link_if); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + link_if = if_lookup_by_index_per_ns(zvrf->zns, + zif->link_ifindex); + if (link_if) + zebra_vxlan_svi_down(ifp, link_if); + } else if (IS_ZEBRA_IF_MACVLAN(ifp)) { + zebra_vxlan_macvlan_down(ifp); + } + + if (zif->es_info.es) + zebra_evpn_es_if_oper_state_change(zif, false /*up*/); + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + zebra_evpn_mh_uplink_oper_update(zif); + + /* Notify to the protocol daemons. */ + zebra_interface_down_update(ifp); + + /* Uninstall connected routes from the kernel. */ + if_uninstall_connected(ifp); + + if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(ifp); + + /* Delete all neighbor addresses learnt through IPv6 RA */ + if_down_del_nbr_connected(ifp); +} + +void if_refresh(struct interface *ifp) +{ +#ifndef GNU_LINUX + if_get_flags(ifp); +#endif +} + +void zebra_if_update_link(struct interface *ifp, ifindex_t link_ifindex, + ns_id_t ns_id) +{ + struct zebra_if *zif; + + if (IS_ZEBRA_IF_VETH(ifp)) + return; + zif = (struct zebra_if *)ifp->info; + zif->link_ifindex = link_ifindex; + zif->link = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + link_ifindex); +} + +/* + * during initial link dump kernel does not order lower devices before + * upper devices so we need to fixup link dependencies at the end of dump + */ +void zebra_if_update_all_links(struct zebra_ns *zns) +{ + struct route_node *rn; + struct interface *ifp; + struct zebra_if *zif; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_info("fixup link dependencies"); + + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + ifp = (struct interface *)rn->info; + if (!ifp) + continue; + zif = ifp->info; + /* update bond-member to bond linkages */ + if ((IS_ZEBRA_IF_BOND_SLAVE(ifp)) + && (zif->bondslave_info.bond_ifindex != IFINDEX_INTERNAL) + && !zif->bondslave_info.bond_if) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("bond mbr %s map to bond %d", + zif->ifp->name, + zif->bondslave_info.bond_ifindex); + zebra_l2_map_slave_to_bond(zif, ifp->vrf->vrf_id); + } + + /* update SVI linkages */ + if ((zif->link_ifindex != IFINDEX_INTERNAL) && !zif->link) { + zif->link = if_lookup_by_index_per_ns( + zns, zif->link_ifindex); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface %s/%d's lower fixup to %s/%d", + ifp->name, ifp->ifindex, + zif->link?zif->link->name:"unk", + zif->link_ifindex); + } + + /* Update VLAN<=>SVI map */ + if (IS_ZEBRA_IF_VLAN(ifp)) + zebra_evpn_acc_bd_svi_set(zif, NULL, + !!if_is_operative(ifp)); + } +} + +static bool if_ignore_set_protodown(const struct interface *ifp, bool new_down, + uint32_t new_protodown_rc) +{ + struct zebra_if *zif; + bool old_down, old_set_down, old_unset_down; + + zif = ifp->info; + + /* Current state as we know it */ + old_down = !!(ZEBRA_IF_IS_PROTODOWN(zif)); + old_set_down = !!CHECK_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + old_unset_down = !!CHECK_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); + + if (new_protodown_rc == zif->protodown_rc) { + /* Early return if already down & reason bitfield matches */ + if (new_down == old_down) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Ignoring request to set protodown %s for interface %s (%u): protodown %s is already set (reason bitfield: old 0x%x new 0x%x)", + new_down ? "on" : "off", ifp->name, + ifp->ifindex, new_down ? "on" : "off", + zif->protodown_rc, new_protodown_rc); + + return true; + } + + /* Early return if already set queued & reason bitfield matches + */ + if (new_down && old_set_down) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Ignoring request to set protodown %s for interface %s (%u): protodown %s is already queued to dplane (reason bitfield: old 0x%x new 0x%x)", + new_down ? "on" : "off", ifp->name, + ifp->ifindex, new_down ? "on" : "off", + zif->protodown_rc, new_protodown_rc); + + return true; + } + + /* Early return if already unset queued & reason bitfield + * matches */ + if (!new_down && old_unset_down) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Ignoring request to set protodown %s for interface %s (%u): protodown %s is already queued to dplane (reason bitfield: old 0x%x new 0x%x)", + new_down ? "on" : "off", ifp->name, + ifp->ifindex, new_down ? "on" : "off", + zif->protodown_rc, new_protodown_rc); + + return true; + } + } + + return false; +} + +int zebra_if_update_protodown_rc(struct interface *ifp, bool new_down, + uint32_t new_protodown_rc) +{ + struct zebra_if *zif; + + zif = ifp->info; + + /* Check if we already have this state or it's queued */ + if (if_ignore_set_protodown(ifp, new_down, new_protodown_rc)) + return 1; + + zlog_info( + "Setting protodown %s - interface %s (%u): reason bitfield change from 0x%x --> 0x%x", + new_down ? "on" : "off", ifp->name, ifp->ifindex, + zif->protodown_rc, new_protodown_rc); + + zif->protodown_rc = new_protodown_rc; + + if (new_down) + SET_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + else + SET_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); + +#ifdef HAVE_NETLINK + dplane_intf_update(ifp); +#else + zlog_warn("Protodown is not supported on this platform"); +#endif + return 0; +} + +int zebra_if_set_protodown(struct interface *ifp, bool new_down, + enum protodown_reasons new_reason) +{ + struct zebra_if *zif; + uint32_t new_protodown_rc; + + zif = ifp->info; + + if (new_down) + new_protodown_rc = zif->protodown_rc | new_reason; + else + new_protodown_rc = zif->protodown_rc & ~new_reason; + + return zebra_if_update_protodown_rc(ifp, new_down, new_protodown_rc); +} + +/* + * Handle an interface events based on info in a dplane context object. + * This runs in the main pthread, using the info in the context object to + * modify an interface. + */ +static void zebra_if_addr_update_ctx(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + uint8_t flags = 0; + const char *label = NULL; + uint32_t metric = METRIC_MAX; + const struct prefix *addr, *dest = NULL; + enum dplane_op_e op; + + op = dplane_ctx_get_op(ctx); + addr = dplane_ctx_get_intf_addr(ctx); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: ifindex %s(%u), addr %pFX", __func__, + dplane_op2str(dplane_ctx_get_op(ctx)), ifp->name, + ifp->ifindex, addr); + + /* Is there a peer or broadcast address? */ + dest = dplane_ctx_get_intf_dest(ctx); + if (dest->prefixlen == 0) + dest = NULL; + + if (dplane_ctx_intf_is_connected(ctx)) + SET_FLAG(flags, ZEBRA_IFA_PEER); + + /* Flags. */ + if (dplane_ctx_intf_is_secondary(ctx)) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* Label? */ + if (dplane_ctx_intf_has_label(ctx)) + label = dplane_ctx_get_intf_label(ctx); + + if (label && strcmp(ifp->name, label) == 0) + label = NULL; + + metric = dplane_ctx_get_intf_metric(ctx); + + /* Register interface address to the interface. */ + if (addr->family == AF_INET) { + if (op == DPLANE_OP_INTF_ADDR_ADD) + connected_add_ipv4( + ifp, flags, &addr->u.prefix4, addr->prefixlen, + dest ? &dest->u.prefix4 : NULL, label, metric); + else if (CHECK_FLAG(flags, ZEBRA_IFA_PEER)) { + /* Delete with a peer address */ + connected_delete_ipv4(ifp, flags, &addr->u.prefix4, + addr->prefixlen, + &dest->u.prefix4); + } else + connected_delete_ipv4(ifp, flags, &addr->u.prefix4, + addr->prefixlen, NULL); + } + + if (addr->family == AF_INET6) { + if (op == DPLANE_OP_INTF_ADDR_ADD) { + connected_add_ipv6(ifp, flags, &addr->u.prefix6, + dest ? &dest->u.prefix6 : NULL, + addr->prefixlen, label, metric); + } else + connected_delete_ipv6(ifp, &addr->u.prefix6, NULL, + addr->prefixlen); + } + + /* + * Linux kernel does not send route delete on interface down/addr del + * so we have to re-process routes it owns (i.e. kernel routes) + */ + if (op != DPLANE_OP_INTF_ADDR_ADD) + rib_update(RIB_UPDATE_KERNEL); +} + +static void zebra_if_update_ctx(struct zebra_dplane_ctx *ctx, + struct interface *ifp) +{ + enum zebra_dplane_result dp_res; + struct zebra_if *zif; + bool pd_reason_val; + bool down; + + dp_res = dplane_ctx_get_status(ctx); + pd_reason_val = dplane_ctx_get_intf_pd_reason_val(ctx); + down = dplane_ctx_intf_is_protodown(ctx); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s: if %s(%u) ctx-protodown %s ctx-reason %d", + __func__, dplane_op2str(dplane_ctx_get_op(ctx)), + ifp->name, ifp->ifindex, down ? "on" : "off", + pd_reason_val); + + zif = ifp->info; + if (!zif) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: if %s(%u) zebra info pointer is NULL", + __func__, ifp->name, ifp->ifindex); + return; + } + + if (dp_res != ZEBRA_DPLANE_REQUEST_SUCCESS) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: if %s(%u) dplane update failed", + __func__, ifp->name, ifp->ifindex); + goto done; + } + + /* Update our info */ + COND_FLAG(zif->flags, ZIF_FLAG_PROTODOWN, down); + +done: + /* Clear our dplane flags */ + UNSET_FLAG(zif->flags, ZIF_FLAG_SET_PROTODOWN); + UNSET_FLAG(zif->flags, ZIF_FLAG_UNSET_PROTODOWN); +} + +/* + * Handle netconf change from a dplane context object; runs in the main + * pthread so it can update zebra data structs. + */ +static void zebra_if_netconf_update_ctx(struct zebra_dplane_ctx *ctx, + struct interface *ifp, + ifindex_t ifindex) +{ + struct zebra_if *zif = NULL; + afi_t afi; + enum dplane_netconf_status_e mpls, mcast_on, linkdown; + bool *mcast_set, *linkdown_set; + + afi = dplane_ctx_get_afi(ctx); + mpls = dplane_ctx_get_netconf_mpls(ctx); + linkdown = dplane_ctx_get_netconf_linkdown(ctx); + mcast_on = dplane_ctx_get_netconf_mcast(ctx); + + if (ifindex == DPLANE_NETCONF_IFINDEX_ALL) { + if (afi == AFI_IP) { + mcast_set = &zrouter.all_mc_forwardingv4; + linkdown_set = &zrouter.all_linkdownv4; + } else { + mcast_set = &zrouter.all_mc_forwardingv6; + linkdown_set = &zrouter.all_linkdownv6; + } + } else if (ifindex == DPLANE_NETCONF_IFINDEX_DEFAULT) { + if (afi == AFI_IP) { + mcast_set = &zrouter.default_mc_forwardingv4; + linkdown_set = &zrouter.default_linkdownv4; + } else { + mcast_set = &zrouter.default_mc_forwardingv6; + linkdown_set = &zrouter.default_linkdownv6; + } + } else { + zif = ifp ? ifp->info : NULL; + if (!zif) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: if %s(%u) zebra info pointer is NULL", + __func__, ifp->name, ifp->ifindex); + return; + } + if (afi == AFI_IP) { + mcast_set = &zif->v4mcast_on; + linkdown_set = &zif->linkdown; + } else { + mcast_set = &zif->v6mcast_on; + linkdown_set = &zif->linkdownv6; + } + + /* + * mpls netconf data is neither v4 or v6 it's AF_MPLS! + */ + if (mpls == DPLANE_NETCONF_STATUS_ENABLED) { + zif->mpls = true; + zebra_mpls_turned_on(); + } else if (mpls == DPLANE_NETCONF_STATUS_DISABLED) + zif->mpls = false; + } + + if (linkdown == DPLANE_NETCONF_STATUS_ENABLED) + *linkdown_set = true; + else if (linkdown == DPLANE_NETCONF_STATUS_DISABLED) + *linkdown_set = false; + + if (mcast_on == DPLANE_NETCONF_STATUS_ENABLED) + *mcast_set = true; + else if (mcast_on == DPLANE_NETCONF_STATUS_DISABLED) + *mcast_set = false; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: afi: %d if %s, ifindex %d, mpls %s mc_forwarding: %s linkdown %s", + __func__, afi, ifp ? ifp->name : "Global", + ifp ? ifp->ifindex : ifindex, + (zif ? (zif->mpls ? "ON" : "OFF") : "OFF"), + (*mcast_set ? "ON" : "OFF"), + (*linkdown_set ? "ON" : "OFF")); +} + +void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx) +{ + struct zebra_ns *zns; + struct interface *ifp; + ns_id_t ns_id; + enum dplane_op_e op; + enum zebra_dplane_result dp_res; + ifindex_t ifindex; + + ns_id = dplane_ctx_get_ns_id(ctx); + dp_res = dplane_ctx_get_status(ctx); + op = dplane_ctx_get_op(ctx); + ifindex = dplane_ctx_get_ifindex(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Intf dplane ctx %p, op %s, ifindex (%u), result %s", + ctx, dplane_op2str(op), ifindex, + dplane_res2str(dp_res)); + + zns = zebra_ns_lookup(ns_id); + if (zns == NULL) { + /* No ns - deleted maybe? */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: can't find zns id %u", __func__, ns_id); + + return; + } + + ifp = if_lookup_by_index_per_ns(zns, ifindex); + if (ifp == NULL) { + if (op != DPLANE_OP_INTF_NETCONFIG || + (ifindex != -1 && ifindex != -2)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: can't find ifp at nsid %u index %d", + __func__, ns_id, ifindex); + + return; + } + } + + switch (op) { + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + zebra_if_addr_update_ctx(ctx, ifp); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + zebra_if_update_ctx(ctx, ifp); + break; + + case DPLANE_OP_INTF_NETCONFIG: + zebra_if_netconf_update_ctx(ctx, ifp, ifindex); + break; + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_NONE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; /* should never hit here */ + } +} + +/* Dump if address information to vty. */ +static void connected_dump_vty(struct vty *vty, json_object *json, + struct connected *connected) +{ + struct prefix *p; + json_object *json_addr = NULL; + + /* Print interface address. */ + p = connected->address; + + if (json) { + json_addr = json_object_new_object(); + json_object_array_add(json, json_addr); + json_object_string_addf(json_addr, "address", "%pFX", p); + } else { + vty_out(vty, " %s %pFX", prefix_family_str(p), p); + } + + /* If there is destination address, print it. */ + if (CONNECTED_PEER(connected) && connected->destination) { + if (json) { + json_object_string_addf(json_addr, "peer", "%pFX", + connected->destination); + } else { + vty_out(vty, " peer %pFX", connected->destination); + } + } + + if (json) + json_object_boolean_add( + json_addr, "secondary", + CHECK_FLAG(connected->flags, ZEBRA_IFA_SECONDARY)); + else if (CHECK_FLAG(connected->flags, ZEBRA_IFA_SECONDARY)) + vty_out(vty, " secondary"); + + if (json) + json_object_boolean_add( + json_addr, "unnumbered", + CHECK_FLAG(connected->flags, ZEBRA_IFA_UNNUMBERED)); + else if (CHECK_FLAG(connected->flags, ZEBRA_IFA_UNNUMBERED)) + vty_out(vty, " unnumbered"); + + if (connected->label) { + if (json) + json_object_string_add(json_addr, "label", + connected->label); + else + vty_out(vty, " %s", connected->label); + } + + if (!json) + vty_out(vty, "\n"); +} + +/* Dump interface neighbor address information to vty. */ +static void nbr_connected_dump_vty(struct vty *vty, json_object *json, + struct nbr_connected *connected) +{ + struct prefix *p; + char buf[PREFIX2STR_BUFFER]; + + /* Print interface address. */ + p = connected->address; + if (json) + json_array_string_add(json, prefix2str(p, buf, sizeof(buf))); + else + vty_out(vty, " %s %pFX\n", prefix_family_str(p), p); +} + +static const char * +zebra_zifslavetype_2str(enum zebra_slave_iftype zif_slave_type) +{ + switch (zif_slave_type) { + case ZEBRA_IF_SLAVE_BRIDGE: + return "Bridge"; + case ZEBRA_IF_SLAVE_VRF: + return "Vrf"; + case ZEBRA_IF_SLAVE_BOND: + return "Bond"; + case ZEBRA_IF_SLAVE_OTHER: + return "Other"; + case ZEBRA_IF_SLAVE_NONE: + return "None"; + } + return "None"; +} + +static const char *zebra_ziftype_2str(enum zebra_iftype zif_type) +{ + switch (zif_type) { + case ZEBRA_IF_OTHER: + return "Other"; + + case ZEBRA_IF_BRIDGE: + return "Bridge"; + + case ZEBRA_IF_VLAN: + return "Vlan"; + + case ZEBRA_IF_VXLAN: + return "Vxlan"; + + case ZEBRA_IF_VRF: + return "VRF"; + + case ZEBRA_IF_VETH: + return "VETH"; + + case ZEBRA_IF_BOND: + return "bond"; + + case ZEBRA_IF_BOND_SLAVE: + return "bond_slave"; + + case ZEBRA_IF_MACVLAN: + return "macvlan"; + + case ZEBRA_IF_GRE: + return "GRE"; + + default: + return "Unknown"; + } +} + +/* Interface's brief information print out to vty interface. */ +static void ifs_dump_brief_vty(struct vty *vty, struct vrf *vrf) +{ + struct connected *connected; + struct listnode *node; + struct route_node *rn; + struct zebra_if *zebra_if; + struct prefix *p; + struct interface *ifp; + bool print_header = true; + + FOR_ALL_INTERFACES (vrf, ifp) { + bool first_pfx_printed = false; + + if (print_header) { + vty_out(vty, "%-16s%-8s%-16s%s\n", "Interface", + "Status", "VRF", "Addresses"); + vty_out(vty, "%-16s%-8s%-16s%s\n", "---------", + "------", "---", "---------"); + print_header = false; /* We have at least 1 iface */ + } + zebra_if = ifp->info; + + vty_out(vty, "%-16s", ifp->name); + + if (if_is_up(ifp)) + vty_out(vty, "%-8s", "up"); + else + vty_out(vty, "%-8s", "down"); + + vty_out(vty, "%-16s", vrf->name); + + for (rn = route_top(zebra_if->ipv4_subnets); rn; + rn = route_next(rn)) { + if (!rn->info) + continue; + uint32_t list_size = listcount((struct list *)rn->info); + + for (ALL_LIST_ELEMENTS_RO((struct list *)rn->info, node, + connected)) { + if (!CHECK_FLAG(connected->flags, + ZEBRA_IFA_SECONDARY)) { + p = connected->address; + if (first_pfx_printed) { + /* padding to prepare row only + * for ip addr */ + vty_out(vty, "%-40s", ""); + if (list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } else { + if (list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } + first_pfx_printed = true; + break; + } + } + } + + uint32_t v6_list_size = 0; + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + v6_list_size++; + } + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && !CHECK_FLAG(connected->flags, + ZEBRA_IFA_SECONDARY) + && (connected->address->family == AF_INET6)) { + p = connected->address; + /* Don't print link local pfx */ + if (!IN6_IS_ADDR_LINKLOCAL(&p->u.prefix6)) { + if (first_pfx_printed) { + /* padding to prepare row only + * for ip addr */ + vty_out(vty, "%-40s", ""); + if (v6_list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } else { + if (v6_list_size > 1) + vty_out(vty, "+ "); + vty_out(vty, "%pFX\n", p); + } + first_pfx_printed = true; + break; + } + } + } + if (!first_pfx_printed) + vty_out(vty, "\n"); + } + vty_out(vty, "\n"); +} + +static void ifs_dump_brief_vty_json(json_object *json, struct vrf *vrf) +{ + struct connected *connected; + struct listnode *node; + struct interface *ifp; + + FOR_ALL_INTERFACES (vrf, ifp) { + json_object *json_if; + json_object *json_addrs; + + json_if = json_object_new_object(); + json_object_object_add(json, ifp->name, json_if); + + json_object_string_add(json_if, "status", + if_is_up(ifp) ? "up" : "down"); + json_object_string_add(json_if, "vrfName", vrf->name); + + json_addrs = json_object_new_array(); + json_object_object_add(json_if, "addresses", json_addrs); + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && !CHECK_FLAG(connected->flags, + ZEBRA_IFA_SECONDARY) + && !(connected->address->family == AF_INET6 + && IN6_IS_ADDR_LINKLOCAL( + &connected->address->u.prefix6))) { + char buf[PREFIX2STR_BUFFER]; + + json_array_string_add( + json_addrs, + prefix2str(connected->address, buf, + sizeof(buf))); + } + } + } +} + +const char *zebra_protodown_rc_str(uint32_t protodown_rc, char *pd_buf, + uint32_t pd_buf_len) +{ + pd_buf[0] = '\0'; + size_t len; + + strlcat(pd_buf, "(", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_EXTERNAL)) + strlcat(pd_buf, "external,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY)) + strlcat(pd_buf, "startup-delay,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN)) + strlcat(pd_buf, "uplinks-down,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_VRRP)) + strlcat(pd_buf, "vrrp,", pd_buf_len); + + if (CHECK_FLAG(protodown_rc, ZEBRA_PROTODOWN_SHARP)) + strlcat(pd_buf, "sharp,", pd_buf_len); + + len = strnlen(pd_buf, pd_buf_len); + + /* Remove trailing comma */ + if (pd_buf[len - 1] == ',') + pd_buf[len - 1] = '\0'; + + strlcat(pd_buf, ")", pd_buf_len); + + return pd_buf; +} + +static inline bool if_is_protodown_applicable(struct interface *ifp) +{ + if (IS_ZEBRA_IF_BOND(ifp)) + return false; + + return true; +} + +/* Interface's information print out to vty interface. */ +static void if_dump_vty(struct vty *vty, struct interface *ifp) +{ + struct connected *connected; + struct nbr_connected *nbr_connected; + struct listnode *node; + struct route_node *rn; + struct zebra_if *zebra_if; + char pd_buf[ZEBRA_PROTODOWN_RC_STR_LEN]; + + zebra_if = ifp->info; + + vty_out(vty, "Interface %s is ", ifp->name); + if (if_is_up(ifp)) { + vty_out(vty, "up, line protocol "); + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION)) { + if (if_is_running(ifp)) + vty_out(vty, "is up\n"); + else + vty_out(vty, "is down\n"); + } else { + vty_out(vty, "detection is disabled\n"); + } + } else { + vty_out(vty, "down\n"); + } + + vty_out(vty, " Link ups: %5u last: %s\n", zebra_if->up_count, + zebra_if->up_last[0] ? zebra_if->up_last : "(never)"); + vty_out(vty, " Link downs: %5u last: %s\n", zebra_if->down_count, + zebra_if->down_last[0] ? zebra_if->down_last : "(never)"); + + zebra_ptm_show_status(vty, NULL, ifp); + + vty_out(vty, " vrf: %s\n", ifp->vrf->name); + + if (ifp->desc) + vty_out(vty, " Description: %s\n", ifp->desc); + if (zebra_if->desc) + vty_out(vty, " OS Description: %s\n", zebra_if->desc); + + if (ifp->ifindex == IFINDEX_INTERNAL) { + vty_out(vty, " pseudo interface\n"); + return; + } else if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + vty_out(vty, " index %d inactive interface\n", ifp->ifindex); + return; + } + + vty_out(vty, " index %d metric %d mtu %d speed %u ", ifp->ifindex, + ifp->metric, ifp->mtu, ifp->speed); + if (ifp->mtu6 != ifp->mtu) + vty_out(vty, "mtu6 %d ", ifp->mtu6); + vty_out(vty, "\n flags: %s\n", if_flag_dump(ifp->flags)); + + if (zebra_if->mpls) + vty_out(vty, " MPLS enabled\n"); + + if (zebra_if->linkdown) + vty_out(vty, " Ignore all v4 routes with linkdown\n"); + if (zebra_if->linkdownv6) + vty_out(vty, " Ignore all v6 routes with linkdown\n"); + + if (zebra_if->v4mcast_on) + vty_out(vty, " v4 Multicast forwarding is on\n"); + if (zebra_if->v6mcast_on) + vty_out(vty, " v6 Multicast forwarding is on\n"); + + /* Hardware address. */ + vty_out(vty, " Type: %s\n", if_link_type_str(ifp->ll_type)); + if (ifp->hw_addr_len != 0) { + int i; + + vty_out(vty, " HWaddr: "); + for (i = 0; i < ifp->hw_addr_len; i++) + vty_out(vty, "%s%02x", i == 0 ? "" : ":", + ifp->hw_addr[i]); + vty_out(vty, "\n"); + } + + /* Bandwidth in Mbps */ + if (ifp->bandwidth != 0) { + vty_out(vty, " bandwidth %u Mbps", ifp->bandwidth); + vty_out(vty, "\n"); + } + + for (rn = route_top(zebra_if->ipv4_subnets); rn; rn = route_next(rn)) { + if (!rn->info) + continue; + + for (ALL_LIST_ELEMENTS_RO((struct list *)rn->info, node, + connected)) + connected_dump_vty(vty, NULL, connected); + } + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + connected_dump_vty(vty, NULL, connected); + } + + vty_out(vty, " Interface Type %s\n", + zebra_ziftype_2str(zebra_if->zif_type)); + vty_out(vty, " Interface Slave Type %s\n", + zebra_zifslavetype_2str(zebra_if->zif_slave_type)); + + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + struct zebra_l2info_bridge *bridge_info; + + bridge_info = &zebra_if->l2info.br; + vty_out(vty, " Bridge VLAN-aware: %s\n", + bridge_info->vlan_aware ? "yes" : "no"); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + struct zebra_l2info_vlan *vlan_info; + + vlan_info = &zebra_if->l2info.vl; + vty_out(vty, " VLAN Id %u\n", vlan_info->vid); + } else if (IS_ZEBRA_IF_VXLAN(ifp)) { + struct zebra_l2info_vxlan *vxlan_info; + + vxlan_info = &zebra_if->l2info.vxl; + vty_out(vty, " VxLAN Id %u", vxlan_info->vni); + if (vxlan_info->vtep_ip.s_addr != INADDR_ANY) + vty_out(vty, " VTEP IP: %pI4", + &vxlan_info->vtep_ip); + if (vxlan_info->access_vlan) + vty_out(vty, " Access VLAN Id %u\n", + vxlan_info->access_vlan); + if (vxlan_info->mcast_grp.s_addr != INADDR_ANY) + vty_out(vty, " Mcast Group %pI4", + &vxlan_info->mcast_grp); + if (vxlan_info->ifindex_link && + (vxlan_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(vxlan_info->link_nsid), + vxlan_info->ifindex_link); + vty_out(vty, " Link Interface %s", + ifp == NULL ? "Unknown" : + ifp->name); + } + vty_out(vty, "\n"); + } else if (IS_ZEBRA_IF_GRE(ifp)) { + struct zebra_l2info_gre *gre_info; + + gre_info = &zebra_if->l2info.gre; + if (gre_info->vtep_ip.s_addr != INADDR_ANY) { + vty_out(vty, " VTEP IP: %pI4", &gre_info->vtep_ip); + if (gre_info->vtep_ip_remote.s_addr != INADDR_ANY) + vty_out(vty, " , remote %pI4", + &gre_info->vtep_ip_remote); + vty_out(vty, "\n"); + } + if (gre_info->ifindex_link && + (gre_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(gre_info->link_nsid), + gre_info->ifindex_link); + vty_out(vty, " Link Interface %s\n", + ifp == NULL ? "Unknown" : + ifp->name); + } + } + + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) { + struct zebra_l2info_brslave *br_slave; + + br_slave = &zebra_if->brslave_info; + if (br_slave->bridge_ifindex != IFINDEX_INTERNAL) { + if (br_slave->br_if) + vty_out(vty, " Master interface: %s\n", + br_slave->br_if->name); + else + vty_out(vty, " Master ifindex: %u\n", + br_slave->bridge_ifindex); + } + } + + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) { + struct zebra_l2info_bondslave *bond_slave; + + bond_slave = &zebra_if->bondslave_info; + if (bond_slave->bond_ifindex != IFINDEX_INTERNAL) { + if (bond_slave->bond_if) + vty_out(vty, " Master interface: %s\n", + bond_slave->bond_if->name); + else + vty_out(vty, " Master ifindex: %u\n", + bond_slave->bond_ifindex); + } + } + + if (zebra_if->flags & ZIF_FLAG_LACP_BYPASS) + vty_out(vty, " LACP bypass: on\n"); + + zebra_evpn_if_es_print(vty, NULL, zebra_if); + vty_out(vty, " protodown: %s %s\n", + (ZEBRA_IF_IS_PROTODOWN(zebra_if)) ? "on" : "off", + if_is_protodown_applicable(ifp) ? "" : "(n/a)"); + if (zebra_if->protodown_rc) + vty_out(vty, " protodown reasons: %s\n", + zebra_protodown_rc_str(zebra_if->protodown_rc, pd_buf, + sizeof(pd_buf))); + + if (zebra_if->link_ifindex != IFINDEX_INTERNAL) { + if (zebra_if->link) + vty_out(vty, " Parent interface: %s\n", zebra_if->link->name); + else + vty_out(vty, " Parent ifindex: %d\n", zebra_if->link_ifindex); + } + + if (HAS_LINK_PARAMS(ifp)) { + int i; + struct if_link_params *iflp = ifp->link_params; + vty_out(vty, " Traffic Engineering Link Parameters:\n"); + if (IS_PARAM_SET(iflp, LP_TE_METRIC)) + vty_out(vty, " TE metric %u\n", iflp->te_metric); + if (IS_PARAM_SET(iflp, LP_MAX_BW)) + vty_out(vty, " Maximum Bandwidth %g (Byte/s)\n", + iflp->max_bw); + if (IS_PARAM_SET(iflp, LP_MAX_RSV_BW)) + vty_out(vty, + " Maximum Reservable Bandwidth %g (Byte/s)\n", + iflp->max_rsv_bw); + if (IS_PARAM_SET(iflp, LP_UNRSV_BW)) { + vty_out(vty, + " Unreserved Bandwidth per Class Type in Byte/s:\n"); + for (i = 0; i < MAX_CLASS_TYPE; i += 2) + vty_out(vty, + " [%d]: %g (Bytes/sec),\t[%d]: %g (Bytes/sec)\n", + i, iflp->unrsv_bw[i], i + 1, + iflp->unrsv_bw[i + 1]); + } + + if (IS_PARAM_SET(iflp, LP_ADM_GRP)) + vty_out(vty, " Administrative Group:%u\n", + iflp->admin_grp); + if (IS_PARAM_SET(iflp, LP_DELAY)) { + vty_out(vty, " Link Delay Average: %u (micro-sec.)", + iflp->av_delay); + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + vty_out(vty, " Min: %u (micro-sec.)", + iflp->min_delay); + vty_out(vty, " Max: %u (micro-sec.)", + iflp->max_delay); + } + vty_out(vty, "\n"); + } + if (IS_PARAM_SET(iflp, LP_DELAY_VAR)) + vty_out(vty, + " Link Delay Variation %u (micro-sec.)\n", + iflp->delay_var); + if (IS_PARAM_SET(iflp, LP_PKT_LOSS)) + vty_out(vty, " Link Packet Loss %g (in %%)\n", + iflp->pkt_loss); + if (IS_PARAM_SET(iflp, LP_AVA_BW)) + vty_out(vty, " Available Bandwidth %g (Byte/s)\n", + iflp->ava_bw); + if (IS_PARAM_SET(iflp, LP_RES_BW)) + vty_out(vty, " Residual Bandwidth %g (Byte/s)\n", + iflp->res_bw); + if (IS_PARAM_SET(iflp, LP_USE_BW)) + vty_out(vty, " Utilized Bandwidth %g (Byte/s)\n", + iflp->use_bw); + if (IS_PARAM_SET(iflp, LP_RMT_AS)) + vty_out(vty, " Neighbor ASBR IP: %pI4 AS: %u \n", + &iflp->rmt_ip, iflp->rmt_as); + } + + hook_call(zebra_if_extra_info, vty, ifp); + + if (listhead(ifp->nbr_connected)) + vty_out(vty, " Neighbor address(s):\n"); + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, nbr_connected)) + nbr_connected_dump_vty(vty, NULL, nbr_connected); + +#ifdef HAVE_PROC_NET_DEV + /* Statistics print out using proc file system. */ + vty_out(vty, + " %lu input packets (%lu multicast), %lu bytes, %lu dropped\n", + ifp->stats.rx_packets, ifp->stats.rx_multicast, + ifp->stats.rx_bytes, ifp->stats.rx_dropped); + + vty_out(vty, + " %lu input errors, %lu length, %lu overrun, %lu CRC, %lu frame\n", + ifp->stats.rx_errors, ifp->stats.rx_length_errors, + ifp->stats.rx_over_errors, ifp->stats.rx_crc_errors, + ifp->stats.rx_frame_errors); + + vty_out(vty, " %lu fifo, %lu missed\n", ifp->stats.rx_fifo_errors, + ifp->stats.rx_missed_errors); + + vty_out(vty, " %lu output packets, %lu bytes, %lu dropped\n", + ifp->stats.tx_packets, ifp->stats.tx_bytes, + ifp->stats.tx_dropped); + + vty_out(vty, + " %lu output errors, %lu aborted, %lu carrier, %lu fifo, %lu heartbeat\n", + ifp->stats.tx_errors, ifp->stats.tx_aborted_errors, + ifp->stats.tx_carrier_errors, ifp->stats.tx_fifo_errors, + ifp->stats.tx_heartbeat_errors); + + vty_out(vty, " %lu window, %lu collisions\n", + ifp->stats.tx_window_errors, ifp->stats.collisions); +#endif /* HAVE_PROC_NET_DEV */ + +#ifdef HAVE_NET_RT_IFLIST + /* Statistics print out using sysctl (). */ + vty_out(vty, + " input packets %llu, bytes %llu, dropped %llu, multicast packets %llu\n", + (unsigned long long)ifp->stats.ifi_ipackets, + (unsigned long long)ifp->stats.ifi_ibytes, + (unsigned long long)ifp->stats.ifi_iqdrops, + (unsigned long long)ifp->stats.ifi_imcasts); + + vty_out(vty, " input errors %llu\n", + (unsigned long long)ifp->stats.ifi_ierrors); + + vty_out(vty, + " output packets %llu, bytes %llu, multicast packets %llu\n", + (unsigned long long)ifp->stats.ifi_opackets, + (unsigned long long)ifp->stats.ifi_obytes, + (unsigned long long)ifp->stats.ifi_omcasts); + + vty_out(vty, " output errors %llu\n", + (unsigned long long)ifp->stats.ifi_oerrors); + + vty_out(vty, " collisions %llu\n", + (unsigned long long)ifp->stats.ifi_collisions); +#endif /* HAVE_NET_RT_IFLIST */ +} + +static void if_dump_vty_json(struct vty *vty, struct interface *ifp, + json_object *json) +{ + struct connected *connected; + struct nbr_connected *nbr_connected; + struct listnode *node; + struct route_node *rn; + struct zebra_if *zebra_if; + char pd_buf[ZEBRA_PROTODOWN_RC_STR_LEN]; + char buf[BUFSIZ]; + json_object *json_if; + json_object *json_addrs; + + json_if = json_object_new_object(); + json_object_object_add(json, ifp->name, json_if); + + if (if_is_up(ifp)) { + json_object_string_add(json_if, "administrativeStatus", "up"); + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION)) { + json_object_string_add(json_if, "operationalStatus", + if_is_running(ifp) ? "up" + : "down"); + json_object_boolean_add(json_if, "linkDetection", true); + } else { + json_object_boolean_add(json_if, "linkDetection", + false); + } + } else { + json_object_string_add(json_if, "administrativeStatus", "down"); + } + + zebra_if = ifp->info; + + json_object_int_add(json_if, "linkUps", zebra_if->up_count); + json_object_int_add(json_if, "linkDowns", zebra_if->down_count); + if (zebra_if->up_last[0]) + json_object_string_add(json_if, "lastLinkUp", + zebra_if->up_last); + if (zebra_if->down_last[0]) + json_object_string_add(json_if, "lastLinkDown", + zebra_if->down_last); + + zebra_ptm_show_status(vty, json, ifp); + + json_object_string_add(json_if, "vrfName", ifp->vrf->name); + + if (ifp->desc) + json_object_string_add(json_if, "description", ifp->desc); + if (zebra_if->desc) + json_object_string_add(json_if, "OsDescription", + zebra_if->desc); + + json_object_boolean_add(json_if, "mplsEnabled", zebra_if->mpls); + json_object_boolean_add(json_if, "linkDown", zebra_if->linkdown); + json_object_boolean_add(json_if, "linkDownV6", zebra_if->linkdownv6); + json_object_boolean_add(json_if, "mcForwardingV4", + zebra_if->v4mcast_on); + json_object_boolean_add(json_if, "mcForwardingV6", + zebra_if->v6mcast_on); + + if (ifp->ifindex == IFINDEX_INTERNAL) { + json_object_boolean_add(json_if, "pseudoInterface", true); + return; + } else if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + json_object_int_add(json_if, "index", ifp->ifindex); + return; + } + + json_object_boolean_add(json_if, "pseudoInterface", false); + json_object_int_add(json_if, "index", ifp->ifindex); + json_object_int_add(json_if, "metric", ifp->metric); + json_object_int_add(json_if, "mtu", ifp->mtu); + if (ifp->mtu6 != ifp->mtu) + json_object_int_add(json_if, "mtu6", ifp->mtu6); + json_object_int_add(json_if, "speed", ifp->speed); + json_object_string_add(json_if, "flags", if_flag_dump(ifp->flags)); + + /* Hardware address. */ + json_object_string_add(json_if, "type", if_link_type_str(ifp->ll_type)); + if (ifp->hw_addr_len != 0) { + char hwbuf[BUFSIZ]; + + hwbuf[0] = '\0'; + for (int i = 0; i < ifp->hw_addr_len; i++) { + snprintf(buf, sizeof(buf), "%s%02x", i == 0 ? "" : ":", + ifp->hw_addr[i]); + strlcat(hwbuf, buf, sizeof(hwbuf)); + } + json_object_string_add(json_if, "hardwareAddress", hwbuf); + } + + /* Bandwidth in Mbps */ + if (ifp->bandwidth != 0) + json_object_int_add(json_if, "bandwidth", ifp->bandwidth); + + + /* IP addresses. */ + json_addrs = json_object_new_array(); + json_object_object_add(json_if, "ipAddresses", json_addrs); + + for (rn = route_top(zebra_if->ipv4_subnets); rn; rn = route_next(rn)) { + if (!rn->info) + continue; + + for (ALL_LIST_ELEMENTS_RO((struct list *)rn->info, node, + connected)) + connected_dump_vty(vty, json_addrs, connected); + } + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) { + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + connected_dump_vty(vty, json_addrs, connected); + } + + json_object_string_add(json_if, "interfaceType", + zebra_ziftype_2str(zebra_if->zif_type)); + json_object_string_add( + json_if, "interfaceSlaveType", + zebra_zifslavetype_2str(zebra_if->zif_slave_type)); + + if (IS_ZEBRA_IF_BRIDGE(ifp)) { + struct zebra_l2info_bridge *bridge_info; + + bridge_info = &zebra_if->l2info.br; + json_object_boolean_add(json_if, "bridgeVlanAware", + bridge_info->vlan_aware); + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + struct zebra_l2info_vlan *vlan_info; + + vlan_info = &zebra_if->l2info.vl; + json_object_int_add(json_if, "vlanId", vlan_info->vid); + } else if (IS_ZEBRA_IF_VXLAN(ifp)) { + struct zebra_l2info_vxlan *vxlan_info; + + vxlan_info = &zebra_if->l2info.vxl; + json_object_int_add(json_if, "vxlanId", vxlan_info->vni); + if (vxlan_info->vtep_ip.s_addr != INADDR_ANY) + json_object_string_addf(json_if, "vtepIp", "%pI4", + &vxlan_info->vtep_ip); + if (vxlan_info->access_vlan) + json_object_int_add(json_if, "accessVlanId", + vxlan_info->access_vlan); + if (vxlan_info->mcast_grp.s_addr != INADDR_ANY) + json_object_string_addf(json_if, "mcastGroup", "%pI4", + &vxlan_info->mcast_grp); + if (vxlan_info->ifindex_link + && (vxlan_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(vxlan_info->link_nsid), + vxlan_info->ifindex_link); + json_object_string_add(json_if, "linkInterface", + ifp == NULL ? "Unknown" + : ifp->name); + } + } else if (IS_ZEBRA_IF_GRE(ifp)) { + struct zebra_l2info_gre *gre_info; + + gre_info = &zebra_if->l2info.gre; + if (gre_info->vtep_ip.s_addr != INADDR_ANY) { + json_object_string_addf(json_if, "vtepIp", "%pI4", + &gre_info->vtep_ip); + if (gre_info->vtep_ip_remote.s_addr != INADDR_ANY) + json_object_string_addf( + json_if, "vtepRemoteIp", "%pI4", + &gre_info->vtep_ip_remote); + } + if (gre_info->ifindex_link + && (gre_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(gre_info->link_nsid), + gre_info->ifindex_link); + json_object_string_add(json_if, "linkInterface", + ifp == NULL ? "Unknown" + : ifp->name); + } + } + + if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) { + struct zebra_l2info_brslave *br_slave; + + br_slave = &zebra_if->brslave_info; + if (br_slave->bridge_ifindex != IFINDEX_INTERNAL) { + if (br_slave->br_if) + json_object_string_add(json_if, + "masterInterface", + br_slave->br_if->name); + else + json_object_int_add(json_if, "masterIfindex", + br_slave->bridge_ifindex); + } + } + + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) { + struct zebra_l2info_bondslave *bond_slave; + + bond_slave = &zebra_if->bondslave_info; + if (bond_slave->bond_ifindex != IFINDEX_INTERNAL) { + if (bond_slave->bond_if) + json_object_string_add( + json_if, "masterInterface", + bond_slave->bond_if->name); + else + json_object_int_add(json_if, "masterIfindex", + bond_slave->bond_ifindex); + } + } + + json_object_boolean_add( + json_if, "lacpBypass", + CHECK_FLAG(zebra_if->flags, ZIF_FLAG_LACP_BYPASS)); + + zebra_evpn_if_es_print(vty, json_if, zebra_if); + + if (if_is_protodown_applicable(ifp)) { + json_object_string_add( + json_if, "protodown", + (ZEBRA_IF_IS_PROTODOWN(zebra_if)) ? "on" : "off"); + if (zebra_if->protodown_rc) + json_object_string_add( + json_if, "protodownReason", + zebra_protodown_rc_str(zebra_if->protodown_rc, + pd_buf, sizeof(pd_buf))); + } + + if (zebra_if->link_ifindex != IFINDEX_INTERNAL) { + if (zebra_if->link) + json_object_string_add(json_if, "parentInterface", + zebra_if->link->name); + else + json_object_int_add(json_if, "parentIfindex", + zebra_if->link_ifindex); + } + + if (HAS_LINK_PARAMS(ifp)) { + struct if_link_params *iflp = ifp->link_params; + json_object *json_te; + + json_te = json_object_new_object(); + json_object_object_add( + json_if, "trafficEngineeringLinkParameters", json_te); + + if (IS_PARAM_SET(iflp, LP_TE_METRIC)) + json_object_int_add(json_te, "teMetric", + iflp->te_metric); + if (IS_PARAM_SET(iflp, LP_MAX_BW)) + json_object_double_add(json_te, "maximumBandwidth", + iflp->max_bw); + if (IS_PARAM_SET(iflp, LP_MAX_RSV_BW)) + json_object_double_add(json_te, + "maximumReservableBandwidth", + iflp->max_rsv_bw); + if (IS_PARAM_SET(iflp, LP_UNRSV_BW)) { + json_object *json_bws; + + json_bws = json_object_new_object(); + json_object_object_add(json_te, "unreservedBandwidth", + json_bws); + for (unsigned int i = 0; i < MAX_CLASS_TYPE; ++i) { + char buf_ct[64]; + + snprintf(buf_ct, sizeof(buf_ct), "classType%u", + i); + json_object_double_add(json_bws, buf_ct, + iflp->unrsv_bw[i]); + } + } + + if (IS_PARAM_SET(iflp, LP_ADM_GRP)) + json_object_int_add(json_te, "administrativeGroup", + iflp->admin_grp); + if (IS_PARAM_SET(iflp, LP_DELAY)) { + json_object_int_add(json_te, "linkDelayAverage", + iflp->av_delay); + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + json_object_int_add(json_te, "linkDelayMinimum", + iflp->min_delay); + json_object_int_add(json_te, "linkDelayMaximum", + iflp->max_delay); + } + } + if (IS_PARAM_SET(iflp, LP_DELAY_VAR)) + json_object_int_add(json_te, "linkDelayVariation", + iflp->delay_var); + if (IS_PARAM_SET(iflp, LP_PKT_LOSS)) + json_object_double_add(json_te, "linkPacketLoss", + iflp->pkt_loss); + if (IS_PARAM_SET(iflp, LP_AVA_BW)) + json_object_double_add(json_te, "availableBandwidth", + iflp->ava_bw); + if (IS_PARAM_SET(iflp, LP_RES_BW)) + json_object_double_add(json_te, "residualBandwidth", + iflp->res_bw); + if (IS_PARAM_SET(iflp, LP_USE_BW)) + json_object_double_add(json_te, "utilizedBandwidth", + iflp->use_bw); + if (IS_PARAM_SET(iflp, LP_RMT_AS)) + json_object_string_addf(json_te, "neighborAsbrIp", + "%pI4", &iflp->rmt_ip); + json_object_int_add(json_te, "neighborAsbrAs", iflp->rmt_as); + } + + if (listhead(ifp->nbr_connected)) { + json_object *json_nbr_addrs; + + json_nbr_addrs = json_object_new_array(); + json_object_object_add(json_if, "neighborIpAddresses", + json_nbr_addrs); + + for (ALL_LIST_ELEMENTS_RO(ifp->nbr_connected, node, + nbr_connected)) + nbr_connected_dump_vty(vty, json_nbr_addrs, + nbr_connected); + } + +#ifdef HAVE_PROC_NET_DEV + json_object_int_add(json_if, "inputPackets", stats.rx_packets); + json_object_int_add(json_if, "inputBytes", ifp->stats.rx_bytes); + json_object_int_add(json_if, "inputDropped", ifp->stats.rx_dropped); + json_object_int_add(json_if, "inputMulticastPackets", + ifp->stats.rx_multicast); + json_object_int_add(json_if, "inputErrors", ifp->stats.rx_errors); + json_object_int_add(json_if, "inputLengthErrors", + ifp->stats.rx_length_errors); + json_object_int_add(json_if, "inputOverrunErrors", + ifp->stats.rx_over_errors); + json_object_int_add(json_if, "inputCrcErrors", + ifp->stats.rx_crc_errors); + json_object_int_add(json_if, "inputFrameErrors", + ifp->stats.rx_frame_errors); + json_object_int_add(json_if, "inputFifoErrors", + ifp->stats.rx_fifo_errors); + json_object_int_add(json_if, "inputMissedErrors", + ifp->stats.rx_missed_errors); + json_object_int_add(json_if, "outputPackets", ifp->stats.tx_packets); + json_object_int_add(json_if, "outputBytes", ifp->stats.tx_bytes); + json_object_int_add(json_if, "outputDroppedPackets", + ifp->stats.tx_dropped); + json_object_int_add(json_if, "outputErrors", ifp->stats.tx_errors); + json_object_int_add(json_if, "outputAbortedErrors", + ifp->stats.tx_aborted_errors); + json_object_int_add(json_if, "outputCarrierErrors", + ifp->stats.tx_carrier_errors); + json_object_int_add(json_if, "outputFifoErrors", + ifp->stats.tx_fifo_errors); + json_object_int_add(json_if, "outputHeartbeatErrors", + ifp->stats.tx_heartbeat_errors); + json_object_int_add(json_if, "outputWindowErrors", + ifp->stats.tx_window_errors); + json_object_int_add(json_if, "collisions", ifp->stats.collisions); +#endif /* HAVE_PROC_NET_DEV */ + +#ifdef HAVE_NET_RT_IFLIST + json_object_int_add(json_if, "inputPackets", ifp->stats.ifi_ipackets); + json_object_int_add(json_if, "inputBytes", ifp->stats.ifi_ibytes); + json_object_int_add(json_if, "inputDropd", ifp->stats.ifi_iqdrops); + json_object_int_add(json_if, "inputMulticastPackets", + ifp->stats.ifi_imcasts); + json_object_int_add(json_if, "inputErrors", ifp->stats.ifi_ierrors); + json_object_int_add(json_if, "outputPackets", ifp->stats.ifi_opackets); + json_object_int_add(json_if, "outputBytes", ifp->stats.ifi_obytes); + json_object_int_add(json_if, "outputMulticastPackets", + ifp->stats.ifi_omcasts); + json_object_int_add(json_if, "outputErrors", ifp->stats.ifi_oerrors); + json_object_int_add(json_if, "collisions", ifp->stats.ifi_collisions); +#endif /* HAVE_NET_RT_IFLIST */ +} + +static void interface_update_stats(void) +{ +#ifdef HAVE_PROC_NET_DEV + /* If system has interface statistics via proc file system, update + statistics. */ + ifstat_update_proc(); +#endif /* HAVE_PROC_NET_DEV */ +#ifdef HAVE_NET_RT_IFLIST + ifstat_update_sysctl(); +#endif /* HAVE_NET_RT_IFLIST */ +} + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/interface_clippy.c" +#endif +/* Show all interfaces to vty. */ +DEFPY(show_interface, show_interface_cmd, + "show interface vrf NAME$vrf_name [brief$brief] [json$uj]", + SHOW_STR + "Interface status and configuration\n" + VRF_CMD_HELP_STR + "Interface status and configuration summary\n" + JSON_STR) +{ + struct vrf *vrf; + struct interface *ifp; + json_object *json = NULL; + + interface_update_stats(); + + vrf = vrf_lookup_by_name(vrf_name); + if (!vrf) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VRF %s not found\n", vrf_name); + return CMD_WARNING; + } + + if (uj) + json = json_object_new_object(); + + if (brief) { + if (json) + ifs_dump_brief_vty_json(json, vrf); + else + ifs_dump_brief_vty(vty, vrf); + } else { + FOR_ALL_INTERFACES (vrf, ifp) { + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + } + } + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + + +/* Show all interfaces to vty. */ +DEFPY (show_interface_vrf_all, + show_interface_vrf_all_cmd, + "show interface [vrf all] [brief$brief] [json$uj]", + SHOW_STR + "Interface status and configuration\n" + VRF_ALL_CMD_HELP_STR + "Interface status and configuration summary\n" + JSON_STR) +{ + struct vrf *vrf; + struct interface *ifp; + json_object *json = NULL; + + interface_update_stats(); + + if (uj) + json = json_object_new_object(); + + /* All interface print. */ + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if (brief) { + if (json) + ifs_dump_brief_vty_json(json, vrf); + else + ifs_dump_brief_vty(vty, vrf); + } else { + FOR_ALL_INTERFACES (vrf, ifp) { + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + } + } + } + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + +/* Show specified interface to vty. */ + +DEFPY (show_interface_name_vrf, + show_interface_name_vrf_cmd, + "show interface IFNAME$ifname vrf NAME$vrf_name [json$uj]", + SHOW_STR + "Interface status and configuration\n" + "Interface name\n" + VRF_CMD_HELP_STR + JSON_STR) +{ + struct interface *ifp; + struct vrf *vrf; + json_object *json = NULL; + + interface_update_stats(); + + vrf = vrf_lookup_by_name(vrf_name); + if (!vrf) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VRF %s not found\n", vrf_name); + return CMD_WARNING; + } + + ifp = if_lookup_by_name_vrf(ifname, vrf); + if (ifp == NULL) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% Can't find interface %s\n", ifname); + return CMD_WARNING; + } + + if (uj) + json = json_object_new_object(); + + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + +/* Show specified interface to vty. */ +DEFPY (show_interface_name_vrf_all, + show_interface_name_vrf_all_cmd, + "show interface IFNAME$ifname [vrf all] [json$uj]", + SHOW_STR + "Interface status and configuration\n" + "Interface name\n" + VRF_ALL_CMD_HELP_STR + JSON_STR) +{ + struct interface *ifp = NULL; + struct interface *ifptmp; + struct vrf *vrf; + json_object *json = NULL; + int count = 0; + + interface_update_stats(); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + ifptmp = if_lookup_by_name_vrf(ifname, vrf); + if (ifptmp) { + ifp = ifptmp; + count++; + if (!vrf_is_backend_netns()) + break; + } + } + + if (ifp == NULL) { + if (uj) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% Can't find interface %s\n", ifname); + return CMD_WARNING; + } + if (count > 1) { + if (uj) { + vty_out(vty, "{}\n"); + } else { + vty_out(vty, + "%% There are multiple interfaces with name %s\n", + ifname); + vty_out(vty, "%% You must specify the VRF name\n"); + } + return CMD_WARNING; + } + + if (uj) + json = json_object_new_object(); + + if (json) + if_dump_vty_json(vty, ifp, json); + else + if_dump_vty(vty, ifp); + + if (json) + vty_json(vty, json); + + return CMD_SUCCESS; +} + +static void if_show_description(struct vty *vty, struct vrf *vrf) +{ + struct interface *ifp; + + vty_out(vty, "Interface Status Protocol Description\n"); + FOR_ALL_INTERFACES (vrf, ifp) { + int len; + struct zebra_if *zif; + bool intf_desc; + + intf_desc = false; + + len = vty_out(vty, "%s", ifp->name); + vty_out(vty, "%*s", (16 - len), " "); + + if (if_is_up(ifp)) { + vty_out(vty, "up "); + if (CHECK_FLAG(ifp->status, + ZEBRA_INTERFACE_LINKDETECTION)) { + if (if_is_running(ifp)) + vty_out(vty, "up "); + else + vty_out(vty, "down "); + } else { + vty_out(vty, "unknown "); + } + } else { + vty_out(vty, "down down "); + } + + if (ifp->desc) { + intf_desc = true; + vty_out(vty, "%s", ifp->desc); + } + zif = ifp->info; + if (zif && zif->desc) { + vty_out(vty, "%s%s", + intf_desc + ? "\n " + : "", + zif->desc); + } + + vty_out(vty, "\n"); + } +} + +DEFUN (show_interface_desc, + show_interface_desc_cmd, + "show interface description vrf NAME", + SHOW_STR + "Interface status and configuration\n" + "Interface description\n" + VRF_CMD_HELP_STR) +{ + struct vrf *vrf; + + vrf = vrf_lookup_by_name(argv[4]->arg); + if (!vrf) { + vty_out(vty, "%% VRF %s not found\n", argv[4]->arg); + return CMD_WARNING; + } + + if_show_description(vty, vrf); + + return CMD_SUCCESS; +} + + +DEFUN (show_interface_desc_vrf_all, + show_interface_desc_vrf_all_cmd, + "show interface description [vrf all]", + SHOW_STR + "Interface status and configuration\n" + "Interface description\n" + VRF_ALL_CMD_HELP_STR) +{ + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + if (!RB_EMPTY(if_name_head, &vrf->ifaces_by_name)) { + vty_out(vty, "\n\tVRF %s(%u)\n\n", VRF_LOGNAME(vrf), + vrf->vrf_id); + if_show_description(vty, vrf); + } + + return CMD_SUCCESS; +} + +int if_multicast_set(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + if (if_set_flags(ifp, IFF_MULTICAST) < 0) { + zlog_debug("Can't set multicast flag on interface %s", + ifp->name); + return -1; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_ON; + + return 0; +} + +DEFUN (multicast, + multicast_cmd, + "multicast", + "Set multicast flag to interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + ret = if_set_flags(ifp, IFF_MULTICAST); + if (ret < 0) { + vty_out(vty, "Can't set multicast flag\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_ON; + + return CMD_SUCCESS; +} + +DEFPY (mpls, + mpls_cmd, + "[no] mpls enable", + NO_STR + MPLS_STR + "Set mpls to be on for the interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *if_data = ifp->info; + + if (no) { + dplane_intf_mpls_modify_state(ifp, false); + if_data->mpls = IF_ZEBRA_DATA_UNSPEC; + } else { + dplane_intf_mpls_modify_state(ifp, true); + if_data->mpls = IF_ZEBRA_DATA_ON; + } + + return CMD_SUCCESS; +} + +int if_multicast_unset(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + if (if_unset_flags(ifp, IFF_MULTICAST) < 0) { + zlog_debug("Can't unset multicast flag on interface %s", + ifp->name); + return -1; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_OFF; + + return 0; +} + +DEFUN (no_multicast, + no_multicast_cmd, + "no multicast", + NO_STR + "Unset multicast flag to interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + ret = if_unset_flags(ifp, IFF_MULTICAST); + if (ret < 0) { + vty_out(vty, "Can't unset multicast flag\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->multicast = IF_ZEBRA_DATA_OFF; + + return CMD_SUCCESS; +} + +int if_linkdetect(struct interface *ifp, bool detect) +{ + int if_was_operative; + + if_was_operative = if_is_no_ptm_operative(ifp); + if (detect) { + SET_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION); + + /* When linkdetection is enabled, if might come down */ + if (!if_is_no_ptm_operative(ifp) && if_was_operative) + if_down(ifp); + } else { + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION); + + /* Interface may come up after disabling link detection */ + if (if_is_operative(ifp) && !if_was_operative) + if_up(ifp, true); + } + /* FIXME: Will defer status change forwarding if interface + does not come down! */ + return 0; +} + +DEFUN(linkdetect, linkdetect_cmd, "link-detect", + "Enable link detection on interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + if_linkdetect(ifp, true); + + return CMD_SUCCESS; +} + + +DEFUN (no_linkdetect, + no_linkdetect_cmd, + "no link-detect", + NO_STR + "Disable link detection on interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + if_linkdetect(ifp, false); + + return CMD_SUCCESS; +} + +int if_shutdown(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv_stop_ra(ifp); + if (if_unset_flags(ifp, IFF_UP) < 0) { + zlog_debug("Can't shutdown interface %s", ifp->name); + return -1; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_ON; + + return 0; +} + +DEFUN (shutdown_if, + shutdown_if_cmd, + "shutdown", + "Shutdown the selected interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv_stop_ra(ifp); + ret = if_unset_flags(ifp, IFF_UP); + if (ret < 0) { + vty_out(vty, "Can't shutdown interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + } + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_ON; + + return CMD_SUCCESS; +} + +int if_no_shutdown(struct interface *ifp) +{ + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + if (if_set_flags(ifp, IFF_UP | IFF_RUNNING) < 0) { + zlog_debug("Can't up interface %s", ifp->name); + return -1; + } + if_refresh(ifp); + + /* Some addresses (in particular, IPv6 addresses on Linux) get + * removed when the interface goes down. They need to be + * readded. + */ + if_addr_wakeup(ifp); + } + + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_OFF; + + return 0; +} + +DEFUN (no_shutdown_if, + no_shutdown_if_cmd, + "no shutdown", + NO_STR + "Shutdown the selected interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int ret; + struct zebra_if *if_data; + + if (ifp->ifindex != IFINDEX_INTERNAL) { + ret = if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if (ret < 0) { + vty_out(vty, "Can't up interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if_refresh(ifp); + + /* Some addresses (in particular, IPv6 addresses on Linux) get + * removed when the interface goes down. They need to be + * readded. + */ + if_addr_wakeup(ifp); + } + + if_data = ifp->info; + if_data->shutdown = IF_ZEBRA_DATA_OFF; + + return CMD_SUCCESS; +} + +DEFUN (bandwidth_if, + bandwidth_if_cmd, + "bandwidth (1-100000)", + "Set bandwidth informational parameter\n" + "Bandwidth in megabits\n") +{ + int idx_number = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + unsigned int bandwidth; + + bandwidth = strtol(argv[idx_number]->arg, NULL, 10); + + /* bandwidth range is <1-100000> */ + if (bandwidth < 1 || bandwidth > 100000) { + vty_out(vty, "Bandwidth is invalid\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ifp->bandwidth = bandwidth; + + /* force protocols to recalculate routes due to cost change */ + if (if_is_operative(ifp)) + zebra_interface_up_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (no_bandwidth_if, + no_bandwidth_if_cmd, + "no bandwidth [(1-100000)]", + NO_STR + "Set bandwidth informational parameter\n" + "Bandwidth in megabits\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + ifp->bandwidth = 0; + + /* force protocols to recalculate routes due to cost change */ + if (if_is_operative(ifp)) + zebra_interface_up_update(ifp); + + return CMD_SUCCESS; +} + + +struct cmd_node link_params_node = { + .name = "link-params", + .node = LINK_PARAMS_NODE, + .parent_node = INTERFACE_NODE, + .prompt = "%s(config-link-params)# ", + .no_xpath = true, +}; + +static void link_param_cmd_set_uint32(struct interface *ifp, uint32_t *field, + uint32_t type, uint32_t value) +{ + /* Update field as needed */ + if (IS_PARAM_UNSET(ifp->link_params, type) || *field != value) { + *field = value; + SET_PARAM(ifp->link_params, type); + + /* force protocols to update LINK STATE due to parameters change + */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + } +} +static void link_param_cmd_set_float(struct interface *ifp, float *field, + uint32_t type, float value) +{ + + /* Update field as needed */ + if (IS_PARAM_UNSET(ifp->link_params, type) || *field != value) { + *field = value; + SET_PARAM(ifp->link_params, type); + + /* force protocols to update LINK STATE due to parameters change + */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + } +} + +static void link_param_cmd_unset(struct interface *ifp, uint32_t type) +{ + if (ifp->link_params == NULL) + return; + + /* Unset field */ + UNSET_PARAM(ifp->link_params, type); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); +} + +DEFUN_NOSH (link_params, + link_params_cmd, + "link-params", + LINK_PARAMS_STR) +{ + /* vty->qobj_index stays the same @ interface pointer */ + vty->node = LINK_PARAMS_NODE; + + return CMD_SUCCESS; +} + +DEFUN_NOSH (exit_link_params, + exit_link_params_cmd, + "exit-link-params", + "Exit from Link Params configuration mode\n") +{ + if (vty->node == LINK_PARAMS_NODE) + vty->node = INTERFACE_NODE; + return CMD_SUCCESS; +} + +/* Specific Traffic Engineering parameters commands */ +DEFUN (link_params_enable, + link_params_enable_cmd, + "enable", + "Activate link parameters on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* This command could be issue at startup, when activate MPLS TE */ + /* on a new interface or after a ON / OFF / ON toggle */ + /* In all case, TE parameters are reset to their default factory */ + if (IS_ZEBRA_DEBUG_EVENT || IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "Link-params: enable TE link parameters on interface %s", + ifp->name); + + if (!if_link_params_get(ifp)) { + if (IS_ZEBRA_DEBUG_EVENT || IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "Link-params: failed to init TE link parameters %s", + ifp->name); + + return CMD_WARNING_CONFIG_FAILED; + } + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_enable, + no_link_params_enable_cmd, + "no enable", + NO_STR + "Disable link parameters on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + if (IS_ZEBRA_DEBUG_EVENT || IS_ZEBRA_DEBUG_MPLS) + zlog_debug("MPLS-TE: disable TE link parameters on interface %s", + ifp->name); + + if_link_params_free(ifp); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +/* STANDARD TE metrics */ +DEFUN (link_params_metric, + link_params_metric_cmd, + "metric (0-4294967295)", + "Link metric for MPLS-TE purpose\n" + "Metric value in decimal\n") +{ + int idx_number = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + uint32_t metric; + + metric = strtoul(argv[idx_number]->arg, NULL, 10); + + /* Update TE metric if needed */ + link_param_cmd_set_uint32(ifp, &iflp->te_metric, LP_TE_METRIC, metric); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_metric, + no_link_params_metric_cmd, + "no metric", + NO_STR + "Disable Link Metric on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset TE Metric */ + link_param_cmd_unset(ifp, LP_TE_METRIC); + + return CMD_SUCCESS; +} + +DEFUN (link_params_maxbw, + link_params_maxbw_cmd, + "max-bw BANDWIDTH", + "Maximum bandwidth that can be used\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_maxbw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that Maximum bandwidth is not lower than other bandwidth + * parameters */ + if ((bw <= iflp->max_rsv_bw) || (bw <= iflp->unrsv_bw[0]) + || (bw <= iflp->unrsv_bw[1]) || (bw <= iflp->unrsv_bw[2]) + || (bw <= iflp->unrsv_bw[3]) || (bw <= iflp->unrsv_bw[4]) + || (bw <= iflp->unrsv_bw[5]) || (bw <= iflp->unrsv_bw[6]) + || (bw <= iflp->unrsv_bw[7]) || (bw <= iflp->ava_bw) + || (bw <= iflp->res_bw) || (bw <= iflp->use_bw)) { + vty_out(vty, + "Maximum Bandwidth could not be lower than others bandwidth\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Maximum Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->max_bw, LP_MAX_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (link_params_max_rsv_bw, + link_params_max_rsv_bw_cmd, + "max-rsv-bw BANDWIDTH", + "Maximum bandwidth that may be reserved\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_max_rsv_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (bw > iflp->max_bw) { + vty_out(vty, + "Maximum Reservable Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Maximum Reservable Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->max_rsv_bw, LP_MAX_RSV_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (link_params_unrsv_bw, + link_params_unrsv_bw_cmd, + "unrsv-bw (0-7) BANDWIDTH", + "Unreserved bandwidth at each priority level\n" + "Priority\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_number = 1; + int idx_bandwidth = 2; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + int priority; + float bw; + + /* We don't have to consider about range check here. */ + if (sscanf(argv[idx_number]->arg, "%d", &priority) != 1) { + vty_out(vty, "link_params_unrsv_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_unrsv_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (bw > iflp->max_bw) { + vty_out(vty, + "UnReserved Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Unreserved Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->unrsv_bw[priority], LP_UNRSV_BW, + bw); + + return CMD_SUCCESS; +} + +DEFUN (link_params_admin_grp, + link_params_admin_grp_cmd, + "admin-grp BITPATTERN", + "Administrative group membership\n" + "32-bit Hexadecimal value (e.g. 0xa1)\n") +{ + int idx_bitpattern = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + unsigned long value; + + if (sscanf(argv[idx_bitpattern]->arg, "0x%lx", &value) != 1) { + vty_out(vty, "link_params_admin_grp: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Administrative Group if needed */ + link_param_cmd_set_uint32(ifp, &iflp->admin_grp, LP_ADM_GRP, value); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_admin_grp, + no_link_params_admin_grp_cmd, + "no admin-grp", + NO_STR + "Disable Administrative group membership on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Admin Group */ + link_param_cmd_unset(ifp, LP_ADM_GRP); + + return CMD_SUCCESS; +} + +/* RFC5392 & RFC5316: INTER-AS */ +DEFUN (link_params_inter_as, + link_params_inter_as_cmd, + "neighbor A.B.C.D as (1-4294967295)", + "Configure remote ASBR information (Neighbor IP address and AS number)\n" + "Remote IP address in dot decimal A.B.C.D\n" + "Remote AS number\n" + "AS number in the range <1-4294967295>\n") +{ + int idx_ipv4 = 1; + int idx_number = 3; + + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + struct in_addr addr; + uint32_t as; + + if (!inet_aton(argv[idx_ipv4]->arg, &addr)) { + vty_out(vty, "Please specify Router-Addr by A.B.C.D\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + as = strtoul(argv[idx_number]->arg, NULL, 10); + + /* Update Remote IP and Remote AS fields if needed */ + if (IS_PARAM_UNSET(iflp, LP_RMT_AS) || iflp->rmt_as != as + || iflp->rmt_ip.s_addr != addr.s_addr) { + + iflp->rmt_as = as; + iflp->rmt_ip.s_addr = addr.s_addr; + SET_PARAM(iflp, LP_RMT_AS); + + /* force protocols to update LINK STATE due to parameters change + */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + } + return CMD_SUCCESS; +} + +DEFUN (no_link_params_inter_as, + no_link_params_inter_as_cmd, + "no neighbor", + NO_STR + "Remove Neighbor IP address and AS number for Inter-AS TE\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + + /* Reset Remote IP and AS neighbor */ + iflp->rmt_as = 0; + iflp->rmt_ip.s_addr = 0; + UNSET_PARAM(iflp, LP_RMT_AS); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +/* RFC7471: OSPF Traffic Engineering (TE) Metric extensions & + * draft-ietf-isis-metric-extensions-07.txt */ +DEFUN (link_params_delay, + link_params_delay_cmd, + "delay (0-16777215) [min (0-16777215) max (0-16777215)]", + "Unidirectional Average Link Delay\n" + "Average delay in micro-second as decimal (0...16777215)\n" + "Minimum delay\n" + "Minimum delay in micro-second as decimal (0...16777215)\n" + "Maximum delay\n" + "Maximum delay in micro-second as decimal (0...16777215)\n") +{ + /* Get and Check new delay values */ + uint32_t delay = 0, low = 0, high = 0; + delay = strtoul(argv[1]->arg, NULL, 10); + if (argc == 6) { + low = strtoul(argv[3]->arg, NULL, 10); + high = strtoul(argv[5]->arg, NULL, 10); + } + + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + uint8_t update = 0; + + if (argc == 2) { + /* + * Check new delay value against old Min and Max delays if set + * + * RFC 7471 Section 4.2.7: + * It is possible for min delay and max delay to be + * the same value. + * + * Therefore, it is also allowed that the average + * delay be equal to the min delay or max delay. + */ + if (IS_PARAM_SET(iflp, LP_MM_DELAY) + && (delay < iflp->min_delay || delay > iflp->max_delay)) { + vty_out(vty, + "Average delay should be in range Min (%d) - Max (%d) delay\n", + iflp->min_delay, iflp->max_delay); + return CMD_WARNING_CONFIG_FAILED; + } + /* Update delay if value is not set or change */ + if (IS_PARAM_UNSET(iflp, LP_DELAY) || iflp->av_delay != delay) { + iflp->av_delay = delay; + SET_PARAM(iflp, LP_DELAY); + update = 1; + } + /* Unset Min and Max delays if already set */ + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + iflp->min_delay = 0; + iflp->max_delay = 0; + UNSET_PARAM(iflp, LP_MM_DELAY); + update = 1; + } + } else { + /* + * Check new delays value coherency. See above note + * regarding average delay equal to min/max allowed + */ + if (delay < low || delay > high) { + vty_out(vty, + "Average delay should be in range Min (%d) - Max (%d) delay\n", + low, high); + return CMD_WARNING_CONFIG_FAILED; + } + /* Update Delays if needed */ + if (IS_PARAM_UNSET(iflp, LP_DELAY) + || IS_PARAM_UNSET(iflp, LP_MM_DELAY) + || iflp->av_delay != delay || iflp->min_delay != low + || iflp->max_delay != high) { + iflp->av_delay = delay; + SET_PARAM(iflp, LP_DELAY); + iflp->min_delay = low; + iflp->max_delay = high; + SET_PARAM(iflp, LP_MM_DELAY); + update = 1; + } + } + + /* force protocols to update LINK STATE due to parameters change */ + if (update == 1 && if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_delay, + no_link_params_delay_cmd, + "no delay", + NO_STR + "Disable Unidirectional Average, Min & Max Link Delay on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + + /* Unset Delays */ + iflp->av_delay = 0; + UNSET_PARAM(iflp, LP_DELAY); + iflp->min_delay = 0; + iflp->max_delay = 0; + UNSET_PARAM(iflp, LP_MM_DELAY); + + /* force protocols to update LINK STATE due to parameters change */ + if (if_is_operative(ifp)) + zebra_interface_parameters_update(ifp); + + return CMD_SUCCESS; +} + +DEFUN (link_params_delay_var, + link_params_delay_var_cmd, + "delay-variation (0-16777215)", + "Unidirectional Link Delay Variation\n" + "delay variation in micro-second as decimal (0...16777215)\n") +{ + int idx_number = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + uint32_t value; + + value = strtoul(argv[idx_number]->arg, NULL, 10); + + /* Update Delay Variation if needed */ + link_param_cmd_set_uint32(ifp, &iflp->delay_var, LP_DELAY_VAR, value); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_delay_var, + no_link_params_delay_var_cmd, + "no delay-variation", + NO_STR + "Disable Unidirectional Delay Variation on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Delay Variation */ + link_param_cmd_unset(ifp, LP_DELAY_VAR); + + return CMD_SUCCESS; +} + +DEFUN (link_params_pkt_loss, + link_params_pkt_loss_cmd, + "packet-loss PERCENTAGE", + "Unidirectional Link Packet Loss\n" + "percentage of total traffic by 0.000003% step and less than 50.331642%\n") +{ + int idx_percentage = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float fval; + + if (sscanf(argv[idx_percentage]->arg, "%g", &fval) != 1) { + vty_out(vty, "link_params_pkt_loss: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + if (fval > MAX_PKT_LOSS) + fval = MAX_PKT_LOSS; + + /* Update Packet Loss if needed */ + link_param_cmd_set_float(ifp, &iflp->pkt_loss, LP_PKT_LOSS, fval); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_pkt_loss, + no_link_params_pkt_loss_cmd, + "no packet-loss", + NO_STR + "Disable Unidirectional Link Packet Loss on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Packet Loss */ + link_param_cmd_unset(ifp, LP_PKT_LOSS); + + return CMD_SUCCESS; +} + +DEFUN (link_params_res_bw, + link_params_res_bw_cmd, + "res-bw BANDWIDTH", + "Unidirectional Residual Bandwidth\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_res_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (bw > iflp->max_bw) { + vty_out(vty, + "Residual Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Residual Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->res_bw, LP_RES_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_res_bw, + no_link_params_res_bw_cmd, + "no res-bw", + NO_STR + "Disable Unidirectional Residual Bandwidth on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Residual Bandwidth */ + link_param_cmd_unset(ifp, LP_RES_BW); + + return CMD_SUCCESS; +} + +DEFUN (link_params_ava_bw, + link_params_ava_bw_cmd, + "ava-bw BANDWIDTH", + "Unidirectional Available Bandwidth\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_ava_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (bw > iflp->max_bw) { + vty_out(vty, + "Available Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Residual Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->ava_bw, LP_AVA_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_ava_bw, + no_link_params_ava_bw_cmd, + "no ava-bw", + NO_STR + "Disable Unidirectional Available Bandwidth on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Available Bandwidth */ + link_param_cmd_unset(ifp, LP_AVA_BW); + + return CMD_SUCCESS; +} + +DEFUN (link_params_use_bw, + link_params_use_bw_cmd, + "use-bw BANDWIDTH", + "Unidirectional Utilised Bandwidth\n" + "Bytes/second (IEEE floating point format)\n") +{ + int idx_bandwidth = 1; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct if_link_params *iflp = if_link_params_get(ifp); + float bw; + + if (sscanf(argv[idx_bandwidth]->arg, "%g", &bw) != 1) { + vty_out(vty, "link_params_use_bw: fscanf: %s\n", + safe_strerror(errno)); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check that bandwidth is not greater than maximum bandwidth parameter + */ + if (bw > iflp->max_bw) { + vty_out(vty, + "Utilised Bandwidth could not be greater than Maximum Bandwidth (%g)\n", + iflp->max_bw); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Update Utilized Bandwidth if needed */ + link_param_cmd_set_float(ifp, &iflp->use_bw, LP_USE_BW, bw); + + return CMD_SUCCESS; +} + +DEFUN (no_link_params_use_bw, + no_link_params_use_bw_cmd, + "no use-bw", + NO_STR + "Disable Unidirectional Utilised Bandwidth on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + /* Unset Utilised Bandwidth */ + link_param_cmd_unset(ifp, LP_USE_BW); + + return CMD_SUCCESS; +} + +int if_ip_address_install(struct interface *ifp, struct prefix *prefix, + const char *label, struct prefix *pp) +{ + struct zebra_if *if_data; + struct prefix_ipv4 lp; + struct prefix_ipv4 *p; + struct connected *ifc; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + lp.family = prefix->family; + lp.prefix = prefix->u.prefix4; + lp.prefixlen = prefix->prefixlen; + apply_mask_ipv4(&lp); + + ifc = connected_check_ptp(ifp, &lp, pp ? pp : NULL); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv4_new(); + *p = lp; + ifc->address = (struct prefix *)p; + + if (pp) { + SET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + p = prefix_ipv4_new(); + *p = *(struct prefix_ipv4 *)pp; + ifc->destination = (struct prefix *)p; + } + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + zlog_debug( + "dplane can't set interface IP address: %s.", + dplane_res2str(dplane_res)); + return NB_ERR; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. + * It will also be added to the subnet chain list, then. */ + } + + return 0; +} + +static int ip_address_install(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct zebra_if *if_data; + struct prefix_ipv4 lp, pp; + struct connected *ifc; + struct prefix_ipv4 *p; + int ret; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + ret = str2prefix_ipv4(addr_str, &lp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (ipv4_martian(&lp.prefix)) { + vty_out(vty, "%% Invalid address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (peer_str) { + if (lp.prefixlen != IPV4_MAX_BITLEN) { + vty_out(vty, + "%% Local prefix length for P-t-P address must be /32\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = str2prefix_ipv4(peer_str, &pp); + if (ret <= 0) { + vty_out(vty, "%% Malformed peer address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + ifc = connected_check_ptp(ifp, &lp, peer_str ? &pp : NULL); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv4_new(); + *p = lp; + ifc->address = (struct prefix *)p; + + if (peer_str) { + SET_FLAG(ifc->flags, ZEBRA_IFA_PEER); + p = prefix_ipv4_new(); + *p = pp; + ifc->destination = (struct prefix *)p; + } + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't set interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. + * It will also be added to the subnet chain list, then. */ + } + + return CMD_SUCCESS; +} + +int if_ip_address_uinstall(struct interface *ifp, struct prefix *prefix) +{ + struct connected *ifc = NULL; + enum zebra_dplane_result dplane_res; + + if (prefix->family == AF_INET) { + /* Check current interface address. */ + ifc = connected_check_ptp(ifp, prefix, NULL); + if (!ifc) { + zlog_debug("interface %s Can't find address", + ifp->name); + return -1; + } + + } else if (prefix->family == AF_INET6) { + /* Check current interface address. */ + ifc = connected_check(ifp, prefix); + } + + if (!ifc) { + zlog_debug("interface %s Can't find address", ifp->name); + return -1; + } + UNSET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is real route. */ + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + zlog_debug("Can't unset interface IP address: %s.", + dplane_res2str(dplane_res)); + return -1; + } + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + + return 0; +} + +static int ip_address_uninstall(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct prefix_ipv4 lp, pp; + struct connected *ifc; + int ret; + enum zebra_dplane_result dplane_res; + + /* Convert to prefix structure. */ + ret = str2prefix_ipv4(addr_str, &lp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (peer_str) { + if (lp.prefixlen != IPV4_MAX_BITLEN) { + vty_out(vty, + "%% Local prefix length for P-t-P address must be /32\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = str2prefix_ipv4(peer_str, &pp); + if (ret <= 0) { + vty_out(vty, "%% Malformed peer address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + /* Check current interface address. */ + ifc = connected_check_ptp(ifp, &lp, peer_str ? &pp : NULL); + if (!ifc) { + vty_out(vty, "%% Can't find address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is not configured address. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + return CMD_WARNING_CONFIG_FAILED; + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is real route. */ + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't unset interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* we will receive a kernel notification about this route being removed. + * this will trigger its removal from the connected list. */ + return CMD_SUCCESS; +} + +DEFUN (ip_address, + ip_address_cmd, + "ip address A.B.C.D/M", + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP address (e.g. 10.0.0.1/8)\n") +{ + int idx_ipv4_prefixlen = 2; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_install(vty, ifp, argv[idx_ipv4_prefixlen]->arg, NULL, + NULL); +} + +DEFUN (no_ip_address, + no_ip_address_cmd, + "no ip address A.B.C.D/M", + NO_STR + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP Address (e.g. 10.0.0.1/8)\n") +{ + int idx_ipv4_prefixlen = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_uninstall(vty, ifp, argv[idx_ipv4_prefixlen]->arg, + NULL, NULL); +} + +DEFUN(ip_address_peer, + ip_address_peer_cmd, + "ip address A.B.C.D peer A.B.C.D/M", + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "Local IP (e.g. 10.0.0.1) for P-t-P address\n" + "Specify P-t-P address\n" + "Peer IP address (e.g. 10.0.0.1/8)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_install(vty, ifp, argv[2]->arg, argv[4]->arg, NULL); +} + +DEFUN(no_ip_address_peer, + no_ip_address_peer_cmd, + "no ip address A.B.C.D peer A.B.C.D/M", + NO_STR + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "Local IP (e.g. 10.0.0.1) for P-t-P address\n" + "Specify P-t-P address\n" + "Peer IP address (e.g. 10.0.0.1/8)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_uninstall(vty, ifp, argv[3]->arg, argv[5]->arg, NULL); +} + +#ifdef HAVE_NETLINK +DEFUN (ip_address_label, + ip_address_label_cmd, + "ip address A.B.C.D/M label LINE", + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP address (e.g. 10.0.0.1/8)\n" + "Label of this address\n" + "Label\n") +{ + int idx_ipv4_prefixlen = 2; + int idx_line = 4; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_install(vty, ifp, argv[idx_ipv4_prefixlen]->arg, NULL, + argv[idx_line]->arg); +} + +DEFUN (no_ip_address_label, + no_ip_address_label_cmd, + "no ip address A.B.C.D/M label LINE", + NO_STR + "Interface Internet Protocol config commands\n" + "Set the IP address of an interface\n" + "IP address (e.g. 10.0.0.1/8)\n" + "Label of this address\n" + "Label\n") +{ + int idx_ipv4_prefixlen = 3; + int idx_line = 5; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ip_address_uninstall(vty, ifp, argv[idx_ipv4_prefixlen]->arg, + NULL, argv[idx_line]->arg); +} +#endif /* HAVE_NETLINK */ + +int if_ipv6_address_install(struct interface *ifp, struct prefix *prefix, + const char *label) +{ + struct zebra_if *if_data; + struct prefix_ipv6 cp; + struct connected *ifc; + struct prefix_ipv6 *p; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + cp.family = prefix->family; + cp.prefixlen = prefix->prefixlen; + cp.prefix = prefix->u.prefix6; + apply_mask_ipv6(&cp); + + ifc = connected_check(ifp, (struct prefix *)&cp); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv6_new(); + *p = cp; + ifc->address = (struct prefix *)p; + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + zlog_debug( + "dplane can't set interface IP address: %s.", + dplane_res2str(dplane_res)); + return NB_ERR; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. */ + } + + return 0; +} + +static int ipv6_address_install(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct zebra_if *if_data; + struct prefix_ipv6 cp; + struct connected *ifc; + struct prefix_ipv6 *p; + int ret; + enum zebra_dplane_result dplane_res; + + if_data = ifp->info; + + ret = str2prefix_ipv6(addr_str, &cp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (ipv6_martian(&cp.prefix)) { + vty_out(vty, "%% Invalid address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ifc = connected_check(ifp, (struct prefix *)&cp); + if (!ifc) { + ifc = connected_new(); + ifc->ifp = ifp; + + /* Address. */ + p = prefix_ipv6_new(); + *p = cp; + ifc->address = (struct prefix *)p; + + /* Label. */ + if (label) + ifc->label = XSTRDUP(MTYPE_CONNECTED_LABEL, label); + + /* Add to linked list. */ + listnode_add(ifp->connected, ifc); + } + + /* This address is configured from zebra. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + SET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* In case of this route need to install kernel. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) && + CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE) && + !(if_data && if_data->shutdown == IF_ZEBRA_DATA_ON)) { + /* Some system need to up the interface to set IP address. */ + if (!if_is_up(ifp)) { + if_set_flags(ifp, IFF_UP | IFF_RUNNING); + if_refresh(ifp); + } + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't set interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + + SET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* The address will be advertised to zebra clients when the + * notification + * from the kernel has been received. */ + } + + return CMD_SUCCESS; +} + +/* Return true if an ipv6 address is configured on ifp */ +int ipv6_address_configured(struct interface *ifp) +{ + struct connected *connected; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) + if (CHECK_FLAG(connected->conf, ZEBRA_IFC_REAL) + && (connected->address->family == AF_INET6)) + return 1; + + return 0; +} + +static int ipv6_address_uninstall(struct vty *vty, struct interface *ifp, + const char *addr_str, const char *peer_str, + const char *label) +{ + struct prefix_ipv6 cp; + struct connected *ifc; + int ret; + enum zebra_dplane_result dplane_res; + + /* Convert to prefix structure. */ + ret = str2prefix_ipv6(addr_str, &cp); + if (ret <= 0) { + vty_out(vty, "%% Malformed address \n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* Check current interface address. */ + ifc = connected_check(ifp, (struct prefix *)&cp); + if (!ifc) { + vty_out(vty, "%% Can't find address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is not configured address. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) + return CMD_WARNING_CONFIG_FAILED; + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED); + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return CMD_WARNING_CONFIG_FAILED; + } + + /* This is real route. */ + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { + vty_out(vty, "%% Can't unset interface IP address: %s.\n", + dplane_res2str(dplane_res)); + return CMD_WARNING_CONFIG_FAILED; + } + + UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); + /* This information will be propagated to the zclients when the + * kernel notification is received. */ + return CMD_SUCCESS; +} + +DEFUN (ipv6_address, + ipv6_address_cmd, + "ipv6 address X:X::X:X/M", + "Interface IPv6 config commands\n" + "Set the IP address of an interface\n" + "IPv6 address (e.g. 3ffe:506::1/48)\n") +{ + int idx_ipv6_prefixlen = 2; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ipv6_address_install(vty, ifp, argv[idx_ipv6_prefixlen]->arg, + NULL, NULL); +} + +DEFUN (no_ipv6_address, + no_ipv6_address_cmd, + "no ipv6 address X:X::X:X/M", + NO_STR + "Interface IPv6 config commands\n" + "Set the IP address of an interface\n" + "IPv6 address (e.g. 3ffe:506::1/48)\n") +{ + int idx_ipv6_prefixlen = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + return ipv6_address_uninstall(vty, ifp, argv[idx_ipv6_prefixlen]->arg, + NULL, NULL); +} + +static int link_params_config_write(struct vty *vty, struct interface *ifp) +{ + int i; + + if ((ifp == NULL) || !HAS_LINK_PARAMS(ifp)) + return -1; + + struct if_link_params *iflp = ifp->link_params; + + vty_out(vty, " link-params\n"); + vty_out(vty, " enable\n"); + if (IS_PARAM_SET(iflp, LP_TE_METRIC) && iflp->te_metric != ifp->metric) + vty_out(vty, " metric %u\n", iflp->te_metric); + if (IS_PARAM_SET(iflp, LP_MAX_BW) && iflp->max_bw != iflp->default_bw) + vty_out(vty, " max-bw %g\n", iflp->max_bw); + if (IS_PARAM_SET(iflp, LP_MAX_RSV_BW) + && iflp->max_rsv_bw != iflp->default_bw) + vty_out(vty, " max-rsv-bw %g\n", iflp->max_rsv_bw); + if (IS_PARAM_SET(iflp, LP_UNRSV_BW)) { + for (i = 0; i < 8; i++) + if (iflp->unrsv_bw[i] != iflp->default_bw) + vty_out(vty, " unrsv-bw %d %g\n", i, + iflp->unrsv_bw[i]); + } + if (IS_PARAM_SET(iflp, LP_ADM_GRP)) + vty_out(vty, " admin-grp 0x%x\n", iflp->admin_grp); + if (IS_PARAM_SET(iflp, LP_DELAY)) { + vty_out(vty, " delay %u", iflp->av_delay); + if (IS_PARAM_SET(iflp, LP_MM_DELAY)) { + vty_out(vty, " min %u", iflp->min_delay); + vty_out(vty, " max %u", iflp->max_delay); + } + vty_out(vty, "\n"); + } + if (IS_PARAM_SET(iflp, LP_DELAY_VAR)) + vty_out(vty, " delay-variation %u\n", iflp->delay_var); + if (IS_PARAM_SET(iflp, LP_PKT_LOSS)) + vty_out(vty, " packet-loss %g\n", iflp->pkt_loss); + if (IS_PARAM_SET(iflp, LP_AVA_BW)) + vty_out(vty, " ava-bw %g\n", iflp->ava_bw); + if (IS_PARAM_SET(iflp, LP_RES_BW)) + vty_out(vty, " res-bw %g\n", iflp->res_bw); + if (IS_PARAM_SET(iflp, LP_USE_BW)) + vty_out(vty, " use-bw %g\n", iflp->use_bw); + if (IS_PARAM_SET(iflp, LP_RMT_AS)) + vty_out(vty, " neighbor %pI4 as %u\n", &iflp->rmt_ip, + iflp->rmt_as); + vty_out(vty, " exit-link-params\n"); + return 0; +} + +static int if_config_write(struct vty *vty) +{ + struct vrf *vrf; + struct interface *ifp; + + zebra_ptm_write(vty); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + FOR_ALL_INTERFACES (vrf, ifp) { + struct zebra_if *if_data; + struct listnode *addrnode; + struct connected *ifc; + struct prefix *p; + + if_data = ifp->info; + + if_vty_config_start(vty, ifp); + + if (if_data) { + if (if_data->shutdown == IF_ZEBRA_DATA_ON) + vty_out(vty, " shutdown\n"); + + zebra_ptm_if_write(vty, if_data); + } + + if (ifp->desc) + vty_out(vty, " description %s\n", ifp->desc); + + /* Assign bandwidth here to avoid unnecessary interface + flap + while processing config script */ + if (ifp->bandwidth != 0) + vty_out(vty, " bandwidth %u\n", ifp->bandwidth); + + if (!CHECK_FLAG(ifp->status, + ZEBRA_INTERFACE_LINKDETECTION)) + vty_out(vty, " no link-detect\n"); + + for (ALL_LIST_ELEMENTS_RO(ifp->connected, addrnode, + ifc)) { + if (CHECK_FLAG(ifc->conf, + ZEBRA_IFC_CONFIGURED)) { + char buf[INET6_ADDRSTRLEN]; + p = ifc->address; + vty_out(vty, " ip%s address %s", + p->family == AF_INET ? "" + : "v6", + inet_ntop(p->family, + &p->u.prefix, buf, + sizeof(buf))); + if (CONNECTED_PEER(ifc)) { + p = ifc->destination; + vty_out(vty, " peer %s", + inet_ntop(p->family, + &p->u.prefix, + buf, + sizeof(buf))); + } + vty_out(vty, "/%d", p->prefixlen); + + if (ifc->label) + vty_out(vty, " label %s", + ifc->label); + + vty_out(vty, "\n"); + } + } + + if (if_data) { + if (if_data->multicast != IF_ZEBRA_DATA_UNSPEC) + vty_out(vty, " %smulticast\n", + if_data->multicast == + IF_ZEBRA_DATA_ON + ? "" + : "no "); + if (if_data->mpls == IF_ZEBRA_DATA_ON) + vty_out(vty, " mpls enable\n"); + } + + hook_call(zebra_if_config_wr, vty, ifp); + zebra_evpn_mh_if_write(vty, ifp); + link_params_config_write(vty, ifp); + + if_vty_config_end(vty); + } + return 0; +} + +/* Allocate and initialize interface vector. */ +void zebra_if_init(void) +{ + /* Initialize interface and new hook. */ + hook_register_prio(if_add, 0, if_zebra_new_hook); + hook_register_prio(if_del, 0, if_zebra_delete_hook); + + /* Install configuration write function. */ + if_cmd_init(if_config_write); + install_node(&link_params_node); + /* + * This is *intentionally* setting this to NULL, signaling + * that interface creation for zebra acts differently + */ + if_zapi_callbacks(NULL, NULL, NULL, NULL); + + install_element(VIEW_NODE, &show_interface_cmd); + install_element(VIEW_NODE, &show_interface_vrf_all_cmd); + install_element(VIEW_NODE, &show_interface_name_vrf_cmd); + install_element(VIEW_NODE, &show_interface_name_vrf_all_cmd); + + install_element(ENABLE_NODE, &show_interface_desc_cmd); + install_element(ENABLE_NODE, &show_interface_desc_vrf_all_cmd); + install_element(INTERFACE_NODE, &multicast_cmd); + install_element(INTERFACE_NODE, &no_multicast_cmd); + install_element(INTERFACE_NODE, &mpls_cmd); + install_element(INTERFACE_NODE, &linkdetect_cmd); + install_element(INTERFACE_NODE, &no_linkdetect_cmd); + install_element(INTERFACE_NODE, &shutdown_if_cmd); + install_element(INTERFACE_NODE, &no_shutdown_if_cmd); + install_element(INTERFACE_NODE, &bandwidth_if_cmd); + install_element(INTERFACE_NODE, &no_bandwidth_if_cmd); + install_element(INTERFACE_NODE, &ip_address_cmd); + install_element(INTERFACE_NODE, &no_ip_address_cmd); + install_element(INTERFACE_NODE, &ip_address_peer_cmd); + install_element(INTERFACE_NODE, &no_ip_address_peer_cmd); + install_element(INTERFACE_NODE, &ipv6_address_cmd); + install_element(INTERFACE_NODE, &no_ipv6_address_cmd); +#ifdef HAVE_NETLINK + install_element(INTERFACE_NODE, &ip_address_label_cmd); + install_element(INTERFACE_NODE, &no_ip_address_label_cmd); +#endif /* HAVE_NETLINK */ + install_element(INTERFACE_NODE, &link_params_cmd); + install_default(LINK_PARAMS_NODE); + install_element(LINK_PARAMS_NODE, &link_params_enable_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_enable_cmd); + install_element(LINK_PARAMS_NODE, &link_params_metric_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_metric_cmd); + install_element(LINK_PARAMS_NODE, &link_params_maxbw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_max_rsv_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_unrsv_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_admin_grp_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_admin_grp_cmd); + install_element(LINK_PARAMS_NODE, &link_params_inter_as_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_inter_as_cmd); + install_element(LINK_PARAMS_NODE, &link_params_delay_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_delay_cmd); + install_element(LINK_PARAMS_NODE, &link_params_delay_var_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_delay_var_cmd); + install_element(LINK_PARAMS_NODE, &link_params_pkt_loss_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_pkt_loss_cmd); + install_element(LINK_PARAMS_NODE, &link_params_ava_bw_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_ava_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_res_bw_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_res_bw_cmd); + install_element(LINK_PARAMS_NODE, &link_params_use_bw_cmd); + install_element(LINK_PARAMS_NODE, &no_link_params_use_bw_cmd); + install_element(LINK_PARAMS_NODE, &exit_link_params_cmd); + + /* setup EVPN MH elements */ + zebra_evpn_interface_init(); +} diff --git a/zebra/interface.h b/zebra/interface.h new file mode 100644 index 0000000..801078e --- /dev/null +++ b/zebra/interface.h @@ -0,0 +1,352 @@ + +/* Interface function header. + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_INTERFACE_H +#define _ZEBRA_INTERFACE_H + +#include "redistribute.h" +#include "vrf.h" +#include "hook.h" +#include "bitfield.h" + +#include "zebra/zebra_l2.h" +#include "zebra/zebra_nhg_private.h" +#include "zebra/zebra_router.h" +#include "zebra/rtadv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* For interface configuration. */ +#define IF_ZEBRA_DATA_UNSPEC 0 +#define IF_ZEBRA_DATA_ON 1 +#define IF_ZEBRA_DATA_OFF 2 + +#define IF_VLAN_BITMAP_MAX 4096 + +/* Zebra interface type - ones of interest. */ +enum zebra_iftype { + ZEBRA_IF_OTHER = 0, /* Anything else */ + ZEBRA_IF_VXLAN, /* VxLAN interface */ + ZEBRA_IF_VRF, /* VRF device */ + ZEBRA_IF_BRIDGE, /* bridge device */ + ZEBRA_IF_VLAN, /* VLAN sub-interface */ + ZEBRA_IF_MACVLAN, /* MAC VLAN interface*/ + ZEBRA_IF_VETH, /* VETH interface*/ + ZEBRA_IF_BOND, /* Bond */ + ZEBRA_IF_BOND_SLAVE, /* Bond */ + ZEBRA_IF_GRE, /* GRE interface */ +}; + +/* Zebra "slave" interface type */ +enum zebra_slave_iftype { + ZEBRA_IF_SLAVE_NONE, /* Not a slave */ + ZEBRA_IF_SLAVE_VRF, /* Member of a VRF */ + ZEBRA_IF_SLAVE_BRIDGE, /* Member of a bridge */ + ZEBRA_IF_SLAVE_BOND, /* Bond member */ + ZEBRA_IF_SLAVE_OTHER, /* Something else - e.g., bond slave */ +}; + +struct irdp_interface; + +/* Ethernet segment info used for setting up EVPN multihoming */ +struct zebra_evpn_es; +struct zebra_es_if_info { + /* type-3 esi config */ + struct ethaddr sysmac; + uint32_t lid; /* local-id; has to be unique per-ES-sysmac */ + + esi_t esi; + + uint16_t df_pref; + uint8_t flags; +#define ZIF_CFG_ES_FLAG_BYPASS (1 << 0) + + struct zebra_evpn_es *es; /* local ES */ +}; + +enum zebra_if_flags { + /* device has been configured as an uplink for + * EVPN multihoming + */ + ZIF_FLAG_EVPN_MH_UPLINK = (1 << 0), + ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP = (1 << 1), + + /* Dataplane protodown-on */ + ZIF_FLAG_PROTODOWN = (1 << 2), + /* Dataplane protodown-on Queued to the dplane */ + ZIF_FLAG_SET_PROTODOWN = (1 << 3), + /* Dataplane protodown-off Queued to the dplane */ + ZIF_FLAG_UNSET_PROTODOWN = (1 << 4), + + /* LACP bypass state is set by the dataplane on a bond member + * and inherited by the bond (if one or more bond members are in + * a bypass state the bond is placed in a bypass state) + */ + ZIF_FLAG_LACP_BYPASS = (1 << 5) +}; + +#define ZEBRA_IF_IS_PROTODOWN(zif) ((zif)->flags & ZIF_FLAG_PROTODOWN) +#define ZEBRA_IF_IS_PROTODOWN_ONLY_EXTERNAL(zif) \ + ((zif)->protodown_rc == ZEBRA_PROTODOWN_EXTERNAL) + +/* `zebra' daemon local interface structure. */ +struct zebra_if { + /* back pointer to the interface */ + struct interface *ifp; + + enum zebra_if_flags flags; + + /* Shutdown configuration. */ + uint8_t shutdown; + + /* Multicast configuration. */ + uint8_t multicast; + + /* MPLS status. */ + bool mpls; + + /* Linkdown status */ + bool linkdown, linkdownv6; + + /* Is Multicast Forwarding on? */ + bool v4mcast_on, v6mcast_on; + + /* Router advertise configuration. */ + uint8_t rtadv_enable; + + /* Installed addresses chains tree. */ + struct route_table *ipv4_subnets; + + /* Nexthops pointing to this interface */ + /** + * Any nexthop that we get should have an + * interface. When an interface goes down, + * we will use this list to update the nexthops + * pointing to it with that info. + */ + struct nhg_connected_tree_head nhg_dependents; + + /* Information about up/down changes */ + unsigned int up_count; + char up_last[FRR_TIMESTAMP_LEN]; + unsigned int down_count; + char down_last[FRR_TIMESTAMP_LEN]; + + struct rtadvconf rtadv; + unsigned int ra_sent, ra_rcvd; + + struct irdp_interface *irdp; + +#ifdef HAVE_STRUCT_SOCKADDR_DL + union { + /* note that sdl_storage is never accessed, it only exists to + * make space. + * all actual uses refer to sdl - but use sizeof(sdl_storage)! + * this fits + * best with C aliasing rules. */ + struct sockaddr_dl sdl; + struct sockaddr_storage sdl_storage; + }; +#endif + + /* ptm enable configuration */ + uint8_t ptm_enable; + + /* Zebra interface and "slave" interface type */ + enum zebra_iftype zif_type; + enum zebra_slave_iftype zif_slave_type; + + /* Additional L2 info, depends on zif_type */ + union zebra_l2if_info l2info; + + /* For members of a bridge, link to bridge. */ + /* Note: If additional fields become necessary, this can be modified to + * be a pointer to a dynamically allocd struct. + */ + struct zebra_l2info_brslave brslave_info; + + struct zebra_l2info_bondslave bondslave_info; + struct zebra_l2info_bond bond_info; + + /* ethernet segment */ + struct zebra_es_if_info es_info; + + /* bitmap of vlans associated with this interface */ + bitfield_t vlan_bitmap; + + /* An interface can be error-disabled if a protocol (such as EVPN or + * VRRP) detects a problem with keeping it operationally-up. + * If any of the protodown bits are set protodown-on is programmed + * in the dataplane. This results in a carrier/L1 down on the + * physical device. + */ + uint32_t protodown_rc; + + /* list of zebra_mac entries using this interface as destination */ + struct list *mac_list; + + /* Link fields - for sub-interfaces. */ + ifindex_t link_ifindex; + struct interface *link; + + uint8_t speed_update_count; + struct thread *speed_update; + + /* + * Does this interface have a v6 to v4 ll neighbor entry + * for bgp unnumbered? + */ + bool v6_2_v4_ll_neigh_entry; + char neigh_mac[6]; + struct in6_addr v6_2_v4_ll_addr6; + + /* The description of the interface */ + char *desc; +}; + +DECLARE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp), + (vty, ifp)); +DECLARE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp), + (vty, ifp)); + +#define IS_ZEBRA_IF_VRF(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VRF) + +#define IS_ZEBRA_IF_BRIDGE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_BRIDGE) + +#define IS_ZEBRA_IF_VLAN(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VLAN) + +#define IS_ZEBRA_IF_VXLAN(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VXLAN) + +#define IS_ZEBRA_IF_MACVLAN(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_MACVLAN) + +#define IS_ZEBRA_IF_VETH(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VETH) + +#define IS_ZEBRA_IF_BOND(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_BOND) + +#define IS_ZEBRA_IF_GRE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_GRE) + +#define IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_slave_type \ + == ZEBRA_IF_SLAVE_BRIDGE) + +#define IS_ZEBRA_IF_VRF_SLAVE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_slave_type == ZEBRA_IF_SLAVE_VRF) + +#define IS_ZEBRA_IF_BOND_SLAVE(ifp) \ + (((struct zebra_if *)(ifp->info))->zif_slave_type \ + == ZEBRA_IF_SLAVE_BOND) + +extern void zebra_if_init(void); + +extern struct interface *if_lookup_by_index_per_ns(struct zebra_ns *, uint32_t); +extern struct interface *if_lookup_by_name_per_ns(struct zebra_ns *, + const char *); +extern struct interface *if_link_per_ns(struct zebra_ns *, struct interface *); +extern const char *ifindex2ifname_per_ns(struct zebra_ns *, unsigned int); + +extern void if_unlink_per_ns(struct interface *); +extern void if_nbr_mac_to_ipv4ll_neigh_update(struct interface *fip, + char mac[6], + struct in6_addr *address, + int add); +extern void if_nbr_ipv6ll_to_ipv4ll_neigh_update(struct interface *ifp, + struct in6_addr *address, + int add); +extern void if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(struct interface *ifp); +extern void if_delete_update(struct interface **ifp); +extern void if_add_update(struct interface *ifp); +extern void if_up(struct interface *ifp, bool install_connected); +extern void if_down(struct interface *); +extern void if_refresh(struct interface *); +extern void if_flags_update(struct interface *, uint64_t); +extern int if_subnet_add(struct interface *, struct connected *); +extern int if_subnet_delete(struct interface *, struct connected *); +extern int ipv6_address_configured(struct interface *ifp); +extern void if_handle_vrf_change(struct interface *ifp, vrf_id_t vrf_id); +extern void zebra_if_update_link(struct interface *ifp, ifindex_t link_ifindex, + ns_id_t ns_id); +extern void zebra_if_update_all_links(struct zebra_ns *zns); +/** + * Directly update entire protodown & reason code bitfield. + */ +extern int zebra_if_update_protodown_rc(struct interface *ifp, bool new_down, + uint32_t new_protodown_rc); +/** + * Set protodown with single reason. + */ +extern int zebra_if_set_protodown(struct interface *ifp, bool down, + enum protodown_reasons new_reason); +extern int if_ip_address_install(struct interface *ifp, struct prefix *prefix, + const char *label, struct prefix *pp); +extern int if_ipv6_address_install(struct interface *ifp, struct prefix *prefix, + const char *label); +extern int if_ip_address_uinstall(struct interface *ifp, struct prefix *prefix); +extern int if_shutdown(struct interface *ifp); +extern int if_no_shutdown(struct interface *ifp); +extern int if_multicast_set(struct interface *ifp); +extern int if_multicast_unset(struct interface *ifp); +extern int if_linkdetect(struct interface *ifp, bool detect); +extern void if_addr_wakeup(struct interface *ifp); + +/* Nexthop group connected functions */ +extern void if_nhg_dependents_add(struct interface *ifp, + struct nhg_hash_entry *nhe); +extern void if_nhg_dependents_del(struct interface *ifp, + struct nhg_hash_entry *nhe); +extern unsigned int if_nhg_dependents_count(const struct interface *ifp); +extern bool if_nhg_dependents_is_empty(const struct interface *ifp); + +extern void vrf_add_update(struct vrf *vrfp); +extern void zebra_l2_map_slave_to_bond(struct zebra_if *zif, vrf_id_t vrf); +extern void zebra_l2_unmap_slave_from_bond(struct zebra_if *zif); +extern const char *zebra_protodown_rc_str(uint32_t protodown_rc, char *pd_buf, + uint32_t pd_buf_len); +void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx); + +#ifdef HAVE_PROC_NET_DEV +extern void ifstat_update_proc(void); +#endif /* HAVE_PROC_NET_DEV */ +#ifdef HAVE_NET_RT_IFLIST +extern void ifstat_update_sysctl(void); + +#endif /* HAVE_NET_RT_IFLIST */ +#ifdef HAVE_PROC_NET_DEV +extern int interface_list_proc(void); +#endif /* HAVE_PROC_NET_DEV */ +#ifdef HAVE_PROC_NET_IF_INET6 +extern int ifaddr_proc_ipv6(void); +#endif /* HAVE_PROC_NET_IF_INET6 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_INTERFACE_H */ diff --git a/zebra/ioctl.c b/zebra/ioctl.c new file mode 100644 index 0000000..6311400 --- /dev/null +++ b/zebra/ioctl.c @@ -0,0 +1,658 @@ +/* + * Common ioctl functions. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "linklist.h" +#include "if.h" +#include "prefix.h" +#include "ioctl.h" +#include "log.h" +#include "privs.h" +#include "lib_errors.h" + +#include "vty.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/zebra_errors.h" +#include "zebra/debug.h" + +#ifdef HAVE_BSD_LINK_DETECT +#include <net/if_media.h> +#endif /* HAVE_BSD_LINK_DETECT*/ + +extern struct zebra_privs_t zserv_privs; + +/* clear and set interface name string */ +void ifreq_set_name(struct ifreq *ifreq, struct interface *ifp) +{ + strlcpy(ifreq->ifr_name, ifp->name, sizeof(ifreq->ifr_name)); +} + +#ifndef HAVE_NETLINK +/* call ioctl system call */ +int if_ioctl(unsigned long request, caddr_t buffer) +{ + int sock; + int ret; + int err = 0; + + frr_with_privs(&zserv_privs) { + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + zlog_err("Cannot create UDP socket: %s", + safe_strerror(errno)); + exit(1); + } + if ((ret = ioctl(sock, request, buffer)) < 0) + err = errno; + } + close(sock); + + if (ret < 0) { + errno = err; + return ret; + } + return 0; +} +#endif + +/* call ioctl system call */ +int vrf_if_ioctl(unsigned long request, caddr_t buffer, vrf_id_t vrf_id) +{ + int sock; + int ret; + int err = 0; + + frr_with_privs(&zserv_privs) { + sock = vrf_socket(AF_INET, SOCK_DGRAM, 0, vrf_id, NULL); + if (sock < 0) { + zlog_err("Cannot create UDP socket: %s", + safe_strerror(errno)); + exit(1); + } + ret = vrf_ioctl(vrf_id, sock, request, buffer); + if (ret < 0) + err = errno; + } + close(sock); + + if (ret < 0) { + errno = err; + return ret; + } + return 0; +} + +#ifndef HAVE_NETLINK +static int if_ioctl_ipv6(unsigned long request, caddr_t buffer) +{ + int sock; + int ret; + int err = 0; + + frr_with_privs(&zserv_privs) { + sock = socket(AF_INET6, SOCK_DGRAM, 0); + if (sock < 0) { + zlog_err("Cannot create IPv6 datagram socket: %s", + safe_strerror(errno)); + exit(1); + } + + if ((ret = ioctl(sock, request, buffer)) < 0) + err = errno; + } + close(sock); + + if (ret < 0) { + errno = err; + return ret; + } + return 0; +} + +/* + * get interface metric + * -- if value is not avaliable set -1 + */ +void if_get_metric(struct interface *ifp) +{ +#ifdef SIOCGIFMETRIC + struct ifreq ifreq = {}; + + ifreq_set_name(&ifreq, ifp); + + if (vrf_if_ioctl(SIOCGIFMETRIC, (caddr_t)&ifreq, ifp->vrf->vrf_id) < 0) + return; + ifp->metric = ifreq.ifr_metric; + if (ifp->metric == 0) + ifp->metric = 1; +#else /* SIOCGIFMETRIC */ + ifp->metric = -1; +#endif /* SIOCGIFMETRIC */ +} + +/* get interface MTU */ +void if_get_mtu(struct interface *ifp) +{ + struct ifreq ifreq = {}; + + ifreq_set_name(&ifreq, ifp); + +#if defined(SIOCGIFMTU) + if (vrf_if_ioctl(SIOCGIFMTU, (caddr_t)&ifreq, ifp->vrf->vrf_id) < 0) { + zlog_info("Can't lookup mtu by ioctl(SIOCGIFMTU) for %s(%u)", + ifp->name, ifp->vrf->vrf_id); + ifp->mtu6 = ifp->mtu = -1; + return; + } + + ifp->mtu6 = ifp->mtu = ifreq.ifr_mtu; + + /* propogate */ + zebra_interface_up_update(ifp); + +#else + zlog_info("Can't lookup mtu on this system for %s(%u)", ifp->name, + ifp->vrf->vrf_id); + ifp->mtu6 = ifp->mtu = -1; +#endif +} +#endif /* ! HAVE_NETLINK */ + +/* + * Handler for interface address programming via the zebra dplane, + * for non-netlink platforms. This handler dispatches to per-platform + * helpers, based on the operation requested. + */ +#ifndef HAVE_NETLINK + +/* Prototypes: these are placed in this block so that they're only seen + * on non-netlink platforms. + */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx) +{ + int ret = -1; + const struct prefix *p; + + p = dplane_ctx_get_intf_addr(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) { + if (p->family == AF_INET) + ret = if_set_prefix_ctx(ctx); + else + ret = if_set_prefix6_ctx(ctx); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_UNINSTALL) { + if (p->family == AF_INET) + ret = if_unset_prefix_ctx(ctx); + else + ret = if_unset_prefix6_ctx(ctx); + } else { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Invalid op in interface-addr install"); + } + + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); +} + +#ifdef HAVE_STRUCT_IFALIASREQ + +/* + * Helper for interface-addr install, non-netlink + */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifaliasreq addreq; + struct sockaddr_in addr, mask, peer; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, dplane_ctx_get_ifname(ctx), + sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin_addr = p->prefix; + addr.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in)); + + if (dplane_ctx_intf_is_connected(ctx)) { + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_dest(ctx); + memset(&mask, 0, sizeof(mask)); + peer.sin_addr = p->prefix; + peer.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + peer.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_broadaddr, &peer, + sizeof(struct sockaddr_in)); + } + + memset(&mask, 0, sizeof(mask)); + masklen2ip(p->prefixlen, &mask.sin_addr); + mask.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_mask, &mask, sizeof(struct sockaddr_in)); + + ret = if_ioctl(SIOCAIFADDR, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; + +} + +/* + * Helper for interface-addr un-install, non-netlink + */ +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifaliasreq addreq; + struct sockaddr_in addr, mask, peer; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, dplane_ctx_get_ifname(ctx), + sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin_addr = p->prefix; + addr.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in)); + + if (dplane_ctx_intf_is_connected(ctx)) { + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_dest(ctx); + memset(&mask, 0, sizeof(mask)); + peer.sin_addr = p->prefix; + peer.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + peer.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_broadaddr, &peer, + sizeof(struct sockaddr_in)); + } + + memset(&mask, 0, sizeof(mask)); + masklen2ip(p->prefixlen, &mask.sin_addr); + mask.sin_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin_len = sizeof(struct sockaddr_in); +#endif + memcpy(&addreq.ifra_mask, &mask, sizeof(struct sockaddr_in)); + + ret = if_ioctl(SIOCDIFADDR, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; +} +#else +/* Set up interface's address, netmask (and broadcas? ). Linux or + Solaris uses ifname:number semantics to set IP address aliases. */ +int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifreq ifreq; + struct sockaddr_in addr; + struct sockaddr_in broad; + struct sockaddr_in mask; + struct prefix_ipv4 ifaddr; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + ifaddr = *p; + + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); + + addr.sin_addr = p->prefix; + addr.sin_family = p->family; + memcpy(&ifreq.ifr_addr, &addr, sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFADDR, (caddr_t)&ifreq); + if (ret < 0) + return ret; + + /* We need mask for make broadcast addr. */ + masklen2ip(p->prefixlen, &mask.sin_addr); + + if (dplane_ctx_intf_is_broadcast(ctx)) { + apply_mask_ipv4(&ifaddr); + addr.sin_addr = ifaddr.prefix; + + broad.sin_addr.s_addr = + (addr.sin_addr.s_addr | ~mask.sin_addr.s_addr); + broad.sin_family = p->family; + + memcpy(&ifreq.ifr_broadaddr, &broad, + sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFBRDADDR, (caddr_t)&ifreq); + if (ret < 0) + return ret; + } + + mask.sin_family = p->family; + memcpy(&ifreq.ifr_addr, &mask, sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFNETMASK, (caddr_t)&ifreq); + if (ret < 0) + return ret; + + return 0; +} + +/* Set up interface's address, netmask (and broadcas? ). Linux or + Solaris uses ifname:number semantics to set IP address aliases. */ +int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct ifreq ifreq; + struct sockaddr_in addr; + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); + + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = p->family; + memcpy(&ifreq.ifr_addr, &addr, sizeof(struct sockaddr_in)); + ret = if_ioctl(SIOCSIFADDR, (caddr_t)&ifreq); + if (ret < 0) + return ret; + + return 0; +} +#endif /* HAVE_STRUCT_IFALIASREQ */ +#endif /* HAVE_NETLINK */ + +/* get interface flags */ +void if_get_flags(struct interface *ifp) +{ + int ret; + struct ifreq ifreqflags = {}; + struct ifreq ifreqdata = {}; + + ifreq_set_name(&ifreqflags, ifp); + ifreq_set_name(&ifreqdata, ifp); + + ret = vrf_if_ioctl(SIOCGIFFLAGS, (caddr_t)&ifreqflags, + ifp->vrf->vrf_id); + if (ret < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "vrf_if_ioctl(SIOCGIFFLAGS %s) failed: %s", + ifp->name, safe_strerror(errno)); + return; + } + + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_LINKDETECTION)) + goto out; + + /* Per-default, IFF_RUNNING is held high, unless link-detect + * says otherwise - we abuse IFF_RUNNING inside zebra as a + * link-state flag, following practice on Linux and Solaris + * kernels + */ + +#ifdef SIOCGIFDATA + /* + * BSD gets link state from ifi_link_link in struct if_data. + * All BSD's have this in getifaddrs(3) ifa_data for AF_LINK + * addresses. We can also access it via SIOCGIFDATA. + */ + +#ifdef __NetBSD__ + struct ifdatareq ifdr = {.ifdr_data.ifi_link_state = 0}; + struct if_data *ifdata = &ifdr.ifdr_data; + + strlcpy(ifdr.ifdr_name, ifp->name, sizeof(ifdr.ifdr_name)); + ret = vrf_if_ioctl(SIOCGIFDATA, (caddr_t)&ifdr, ifp->vrf->vrf_id); +#else + struct if_data ifd = {.ifi_link_state = 0}; + struct if_data *ifdata = &ifd; + + ifreqdata.ifr_data = (caddr_t)ifdata; + ret = vrf_if_ioctl(SIOCGIFDATA, (caddr_t)&ifreqdata, ifp->vrf->vrf_id); +#endif + + if (ret == -1) + /* Very unlikely. Did the interface disappear? */ + flog_err_sys(EC_LIB_SYSTEM_CALL, + "if_ioctl(SIOCGIFDATA %s) failed: %s", ifp->name, + safe_strerror(errno)); + else { + if (ifdata->ifi_link_state >= LINK_STATE_UP) + SET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + else if (ifdata->ifi_link_state == LINK_STATE_UNKNOWN) + /* BSD traditionally treats UNKNOWN as UP */ + SET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + else + UNSET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + } + +#elif defined(HAVE_BSD_LINK_DETECT) + /* + * This is only needed for FreeBSD older than FreeBSD-13. + * Valid and active media generally means the link state is + * up, but this is not always the case. + * For example, some BSD's with a net80211 interface in MONITOR + * mode will treat the media as valid and active but the + * link state is down - because we cannot send anything. + * Also, virtual interfaces such as PPP, VLAN, etc generally + * don't support media at all, so the ioctl will just fail. + */ + struct ifmediareq ifmr = {.ifm_status = 0}; + + strlcpy(ifmr.ifm_name, ifp->name, sizeof(ifmr.ifm_name)); + + if (if_ioctl(SIOCGIFMEDIA, (caddr_t)&ifmr) == -1) { + if (errno != EINVAL) + flog_err_sys(EC_LIB_SYSTEM_CALL, + "if_ioctl(SIOCGIFMEDIA %s) failed: %s", + ifp->name, safe_strerror(errno)); + } else if (ifmr.ifm_status & IFM_AVALID) { /* media state is valid */ + if (ifmr.ifm_status & IFM_ACTIVE) /* media is active */ + SET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + else + UNSET_FLAG(ifreqflags.ifr_flags, IFF_RUNNING); + } +#endif /* HAVE_BSD_LINK_DETECT */ + +out: + if_flags_update(ifp, (ifreqflags.ifr_flags & 0x0000ffff)); +} + +/* Set interface flags */ +int if_set_flags(struct interface *ifp, uint64_t flags) +{ + int ret; + struct ifreq ifreq; + + memset(&ifreq, 0, sizeof(ifreq)); + ifreq_set_name(&ifreq, ifp); + + ifreq.ifr_flags = ifp->flags; + ifreq.ifr_flags |= flags; + + ret = vrf_if_ioctl(SIOCSIFFLAGS, (caddr_t)&ifreq, ifp->vrf->vrf_id); + + if (ret < 0) { + zlog_info("can't set interface %s(%u) flags %" PRIu64, + ifp->name, ifp->vrf->vrf_id, flags); + return ret; + } + return 0; +} + +/* Unset interface's flag. */ +int if_unset_flags(struct interface *ifp, uint64_t flags) +{ + int ret; + struct ifreq ifreq; + + memset(&ifreq, 0, sizeof(ifreq)); + ifreq_set_name(&ifreq, ifp); + + ifreq.ifr_flags = ifp->flags; + ifreq.ifr_flags &= ~flags; + + ret = vrf_if_ioctl(SIOCSIFFLAGS, (caddr_t)&ifreq, ifp->vrf->vrf_id); + + if (ret < 0) { + zlog_warn("can't unset interface %s(%u) flags %" PRIu64, + ifp->name, ifp->vrf->vrf_id, flags); + return ret; + } + return 0; +} + +#ifndef LINUX_IPV6 /* Netlink has its own code */ + +#ifdef HAVE_STRUCT_IN6_ALIASREQ +#ifndef ND6_INFINITE_LIFETIME +#define ND6_INFINITE_LIFETIME 0xffffffffL +#endif /* ND6_INFINITE_LIFETIME */ + +/* + * Helper for interface-addr install, non-netlink + */ +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct in6_aliasreq addreq; + struct sockaddr_in6 addr; + struct sockaddr_in6 mask; + struct prefix_ipv6 *p; + + p = (struct prefix_ipv6 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, + dplane_ctx_get_ifname(ctx), sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin6_addr = p->prefix; + addr.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in6)); + + memset(&mask, 0, sizeof(mask)); + masklen2ip6(p->prefixlen, &mask.sin6_addr); + mask.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_prefixmask, &mask, sizeof(struct sockaddr_in6)); + + addreq.ifra_lifetime.ia6t_vltime = 0xffffffff; + addreq.ifra_lifetime.ia6t_pltime = 0xffffffff; + +#ifdef HAVE_STRUCT_IF6_ALIASREQ_IFRA_LIFETIME + addreq.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + addreq.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; +#endif + + ret = if_ioctl_ipv6(SIOCAIFADDR_IN6, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; +} + +/* + * Helper for interface-addr un-install, non-netlink + */ +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + int ret; + struct in6_aliasreq addreq; + struct sockaddr_in6 addr; + struct sockaddr_in6 mask; + struct prefix_ipv6 *p; + + p = (struct prefix_ipv6 *)dplane_ctx_get_intf_addr(ctx); + + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, + dplane_ctx_get_ifname(ctx), sizeof(addreq.ifra_name)); + + memset(&addr, 0, sizeof(addr)); + addr.sin6_addr = p->prefix; + addr.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + addr.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in6)); + + memset(&mask, 0, sizeof(mask)); + masklen2ip6(p->prefixlen, &mask.sin6_addr); + mask.sin6_family = p->family; +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + mask.sin6_len = sizeof(struct sockaddr_in6); +#endif + memcpy(&addreq.ifra_prefixmask, &mask, sizeof(struct sockaddr_in6)); + +#ifdef HAVE_STRUCT_IF6_ALIASREQ_IFRA_LIFETIME + addreq.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + addreq.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; +#endif + + ret = if_ioctl_ipv6(SIOCDIFADDR_IN6, (caddr_t)&addreq); + if (ret < 0) + return ret; + return 0; +} +#else +/* The old, pre-dataplane code here just returned, so we're retaining that + * choice. + */ +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + return 0; +} + +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) +{ + return 0; +} +#endif /* HAVE_STRUCT_IN6_ALIASREQ */ + +#endif /* LINUX_IPV6 */ diff --git a/zebra/ioctl.h b/zebra/ioctl.h new file mode 100644 index 0000000..debe5ba --- /dev/null +++ b/zebra/ioctl.h @@ -0,0 +1,47 @@ +/* + * Common ioctl functions. + * Copyright (C) 1998 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_IOCTL_H +#define _ZEBRA_IOCTL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prototypes. */ +extern void ifreq_set_name(struct ifreq *, struct interface *); +extern int if_ioctl(unsigned long, caddr_t); +extern int vrf_if_ioctl(unsigned long request, caddr_t buffer, vrf_id_t vrf_id); + +extern int if_set_flags(struct interface *, uint64_t); +extern int if_unset_flags(struct interface *, uint64_t); +extern void if_get_flags(struct interface *); + +extern void if_get_metric(struct interface *); +extern void if_get_mtu(struct interface *); + +#define AF_IOCTL(af, request, buffer) if_ioctl(request, buffer) + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_IOCTL_H */ diff --git a/zebra/ipforward.h b/zebra/ipforward.h new file mode 100644 index 0000000..9884678 --- /dev/null +++ b/zebra/ipforward.h @@ -0,0 +1,40 @@ +/* IP forward settings. + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_IPFORWARD_H +#define _ZEBRA_IPFORWARD_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern int ipforward(void); +extern int ipforward_on(void); +extern int ipforward_off(void); + +extern int ipforward_ipv6(void); +extern int ipforward_ipv6_on(void); +extern int ipforward_ipv6_off(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_IPFORWARD_H */ diff --git a/zebra/ipforward_proc.c b/zebra/ipforward_proc.c new file mode 100644 index 0000000..4bd160d --- /dev/null +++ b/zebra/ipforward_proc.c @@ -0,0 +1,184 @@ +/* + * Fetch ipforward value by reading /proc filesystem. + * Copyright (C) 1997 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef GNU_LINUX + +#include "log.h" +#include "privs.h" + +#include "zebra/ipforward.h" + +extern struct zebra_privs_t zserv_privs; + +static const char proc_net_snmp[] = "/proc/net/snmp"; + +static void dropline(FILE *fp) +{ + while (getc(fp) != '\n') + ; +} + +int ipforward(void) +{ + int ret = 0; + FILE *fp; + int ipforwarding = 0; + char buf[10]; + + fp = fopen(proc_net_snmp, "r"); + + if (fp == NULL) + return -1; + + /* We don't care about the first line. */ + dropline(fp); + + /* Get ip_statistics.IpForwarding : + 1 => ip forwarding enabled + 2 => ip forwarding off. */ + if (fgets(buf, 6, fp)) + ret = sscanf(buf, "Ip: %d", &ipforwarding); + + fclose(fp); + + if (ret == 1 && ipforwarding == 1) + return 1; + + return 0; +} + +/* char proc_ipv4_forwarding[] = "/proc/sys/net/ipv4/conf/all/forwarding"; */ +static const char proc_ipv4_forwarding[] = "/proc/sys/net/ipv4/ip_forward"; + +int ipforward_on(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv4_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "1\n"); + + fclose(fp); + + } + + return ipforward(); +} + +int ipforward_off(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv4_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "0\n"); + + fclose(fp); + + } + + return ipforward(); +} + +static const char proc_ipv6_forwarding[] = + "/proc/sys/net/ipv6/conf/all/forwarding"; + +int ipforward_ipv6(void) +{ + int ret = 0; + FILE *fp; + char buf[5]; + int ipforwarding = 0; + + fp = fopen(proc_ipv6_forwarding, "r"); + + if (fp == NULL) + return -1; + + if (fgets(buf, 2, fp)) + ret = sscanf(buf, "%d", &ipforwarding); + + fclose(fp); + + if (ret != 1) + return 0; + + return ipforwarding; +} + +int ipforward_ipv6_on(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv6_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "1\n"); + + fclose(fp); + + } + + return ipforward_ipv6(); +} + + +int ipforward_ipv6_off(void) +{ + FILE *fp; + + frr_with_privs(&zserv_privs) { + + fp = fopen(proc_ipv6_forwarding, "w"); + + if (fp == NULL) { + return -1; + } + + fprintf(fp, "0\n"); + + fclose(fp); + + } + + return ipforward_ipv6(); +} + +#endif /* GNU_LINUX */ diff --git a/zebra/ipforward_sysctl.c b/zebra/ipforward_sysctl.c new file mode 100644 index 0000000..0015018 --- /dev/null +++ b/zebra/ipforward_sysctl.c @@ -0,0 +1,144 @@ +/* IP forward control by sysctl function. + * Copyright (C) 1997, 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#if !defined(GNU_LINUX) + +#include "privs.h" +#include "zebra/ipforward.h" +#include "zebra/zebra_errors.h" + +#include "log.h" +#include "lib_errors.h" + +#define MIB_SIZ 4 + +extern struct zebra_privs_t zserv_privs; + +/* IPv4 forwarding control MIB. */ +int mib[MIB_SIZ] = {CTL_NET, PF_INET, IPPROTO_IP, IPCTL_FORWARDING}; + +int ipforward(void) +{ + size_t len; + int ipforwarding = 0; + + len = sizeof(ipforwarding); + if (sysctl(mib, MIB_SIZ, &ipforwarding, &len, 0, 0) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "Can't get ipforwarding value"); + return -1; + } + return ipforwarding; +} + +int ipforward_on(void) +{ + size_t len; + int ipforwarding = 1; + + len = sizeof(ipforwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib, MIB_SIZ, NULL, NULL, &ipforwarding, len) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "Can't set ipforwarding on"); + return -1; + } + } + return ipforwarding; +} + +int ipforward_off(void) +{ + size_t len; + int ipforwarding = 0; + + len = sizeof(ipforwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib, MIB_SIZ, NULL, NULL, &ipforwarding, len) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "Can't set ipforwarding on"); + return -1; + } + } + return ipforwarding; +} + +/* IPv6 forwarding control MIB. */ +int mib_ipv6[MIB_SIZ] = {CTL_NET, PF_INET6, +#if defined(BSD_V6_SYSCTL) + IPPROTO_IPV6, IPV6CTL_FORWARDING +#else /* NOT BSD_V6_SYSCTL */ + IPPROTO_IP, IP6CTL_FORWARDING +#endif /* BSD_V6_SYSCTL */ +}; + +int ipforward_ipv6(void) +{ + size_t len; + int ip6forwarding = 0; + + len = sizeof(ip6forwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib_ipv6, MIB_SIZ, &ip6forwarding, &len, 0, 0) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "can't get ip6forwarding value"); + return -1; + } + } + return ip6forwarding; +} + +int ipforward_ipv6_on(void) +{ + size_t len; + int ip6forwarding = 1; + + len = sizeof(ip6forwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib_ipv6, MIB_SIZ, NULL, NULL, &ip6forwarding, len) + < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "can't get ip6forwarding value"); + return -1; + } + } + return ip6forwarding; +} + +int ipforward_ipv6_off(void) +{ + size_t len; + int ip6forwarding = 0; + + len = sizeof(ip6forwarding); + frr_with_privs(&zserv_privs) { + if (sysctl(mib_ipv6, MIB_SIZ, NULL, NULL, &ip6forwarding, len) + < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "can't get ip6forwarding value"); + return -1; + } + } + return ip6forwarding; +} + +#endif /* !defined(GNU_LINUX) */ diff --git a/zebra/irdp.h b/zebra/irdp.h new file mode 100644 index 0000000..19f549c --- /dev/null +++ b/zebra/irdp.h @@ -0,0 +1,158 @@ +/* ICMP Router Discovery Messages + * Copyright (C) 1997, 2000 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * This file is modified and completed for the Zebra IRDP implementation + * by Robert Olsson, Swedish University of Agricultural Sciences + */ + +#ifndef _IRDP_H +#define _IRDP_H + +#include "lib/vty.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* ICMP Messages */ +#ifndef ICMP_ROUTERADVERT +#define ICMP_ROUTERADVERT 9 +#endif /* ICMP_ROUTERADVERT */ + +#ifndef ICMP_ROUTERSOLICIT +#define ICMP_ROUTERSOLICIT 10 +#endif /* ICMP_ROUTERSOLICT */ + +/* Multicast groups */ +#ifndef INADDR_ALLHOSTS_GROUP +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#endif /* INADDR_ALLHOSTS_GROUP */ + +#ifndef INADDR_ALLRTRS_GROUP +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#endif /* INADDR_ALLRTRS_GROUP */ + +/* Default irdp packet interval */ +#define IRDP_DEFAULT_INTERVAL 300 + +/* Router constants from RFC1256 */ +#define MAX_INITIAL_ADVERT_INTERVAL 16 +#define MAX_INITIAL_ADVERTISEMENTS 3 +#define MAX_RESPONSE_DELAY 2 + +#define IRDP_MAXADVERTINTERVAL 600 +#define IRDP_MINADVERTINTERVAL 450 /* 0.75*600 */ +#define IRDP_LIFETIME 1350 /* 3*450 */ +#define IRDP_PREFERENCE 0 + +#define ICMP_MINLEN 8 + +#define IRDP_LAST_ADVERT_MESSAGES 2 /* The last adverts with Holdtime 0 */ + +#define IRDP_RX_BUF 1500 + +/* + Comments comes from RFC1256 ICMP Router Discovery Messages. + + The IP destination address to be used for multicast Router + Advertisements sent from the interface. The only permissible + values are the all-systems multicast address, 224.0.0.1, or the + limited-broadcast address, 255.255.255.255. (The all-systems + address is preferred wherever possible, i.e., on any link where + all listening hosts support IP multicast.) + + Default: 224.0.0.1 if the router supports IP multicast on the + interface, else 255.255.255.255 + + The maximum time allowed between sending multicast Router + Advertisements from the interface, in seconds. Must be no less + than 4 seconds and no greater than 1800 seconds. + + Default: 600 seconds + + The minimum time allowed between sending unsolicited multicast + Router Advertisements from the interface, in seconds. Must be no + less than 3 seconds and no greater than MaxAdvertisementInterval. + + Default: 0.75 * MaxAdvertisementInterval + + The value to be placed in the Lifetime field of Router + Advertisements sent from the interface, in seconds. Must be no + less than MaxAdvertisementInterval and no greater than 9000 + seconds. + + Default: 3 * MaxAdvertisementInterval + + The preferability of the address as a default router address, + relative to other router addresses on the same subnet. A 32-bit, + signed, twos-complement integer, with higher values meaning more + preferable. The minimum value (hex 80000000) is used to indicate + that the address, even though it may be advertised, is not to be + used by neighboring hosts as a default router address. + + Default: 0 +*/ + +struct irdp_interface { + bool started; + + unsigned long MaxAdvertInterval; + unsigned long MinAdvertInterval; + unsigned long Preference; + + uint32_t flags; + +#define IF_ACTIVE (1<<0) /* ICMP Active */ +#define IF_BROADCAST (1<<1) /* 255.255.255.255 */ +#define IF_SOLICIT (1<<2) /* Solicit active */ +#define IF_DEBUG_MESSAGES (1<<3) +#define IF_DEBUG_PACKET (1<<4) +#define IF_DEBUG_MISC (1<<5) +#define IF_SHUTDOWN (1<<6) + + struct interface *ifp; + struct thread *t_advertise; + unsigned long irdp_sent; + uint16_t Lifetime; + + struct list *AdvPrefList; +}; + +struct Adv { + struct in_addr ip; + int pref; +}; + +extern void irdp_if_init(void); +extern int irdp_sock_init(void); +extern int irdp_config_write(struct vty *, struct interface *); +extern void irdp_send_thread(struct thread *t_advert); +extern void irdp_advert_off(struct interface *ifp); +extern void process_solicit(struct interface *ifp); +extern void irdp_read_raw(struct thread *r); +extern void send_packet(struct interface *ifp, struct stream *s, uint32_t dst, + struct prefix *p, uint32_t ttl); + +#ifdef __cplusplus +} +#endif + +#endif /* _IRDP_H */ diff --git a/zebra/irdp_interface.c b/zebra/irdp_interface.c new file mode 100644 index 0000000..28db2ad --- /dev/null +++ b/zebra/irdp_interface.c @@ -0,0 +1,731 @@ +/* + * + * Copyright (C) 1997, 2000 + * Portions: + * Swedish University of Agricultural Sciences + * Robert Olsson + * Kunihiro Ishiguro + * + * Thanks to Jens Laas at Swedish University of Agricultural Sciences + * for reviewing and tests. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "if.h" +#include "vty.h" +#include "sockunion.h" +#include "prefix.h" +#include "command.h" +#include "memory.h" +#include "stream.h" +#include "ioctl.h" +#include "connected.h" +#include "log.h" +#include "zclient.h" +#include "thread.h" +#include "lib_errors.h" +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/irdp.h" +#include "zebra/zebra_errors.h" +#include <netinet/ip_icmp.h> +#include "if.h" +#include "sockunion.h" +#include "log.h" +#include "network.h" + +extern int irdp_sock; + +DEFINE_MTYPE_STATIC(ZEBRA, IRDP_IF, "IRDP interface data"); + +#define IRDP_CONFIGED \ + do { \ + if (!irdp) { \ + vty_out(vty, \ + "Please Configure IRDP before using this command\n"); \ + return CMD_WARNING_CONFIG_FAILED; \ + } \ + } while (0) + +static struct irdp_interface *irdp_if_get(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + + if (!zi) + return NULL; + + if (!zi->irdp) + zi->irdp = XCALLOC(MTYPE_IRDP_IF, sizeof(*zi->irdp)); + + if (!zi->irdp->started) + return NULL; + + return zi->irdp; +} + +static int irdp_if_delete(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + if (!zi) + return 0; + XFREE(MTYPE_IRDP_IF, zi->irdp); + return 0; +} + +static const char *inet_2a(uint32_t a, char *b, size_t b_len) +{ + snprintf(b, b_len, "%u.%u.%u.%u", (a)&0xFF, (a >> 8) & 0xFF, + (a >> 16) & 0xFF, (a >> 24) & 0xFF); + return b; +} + + +static struct prefix *irdp_get_prefix(struct interface *ifp) +{ + struct listnode *node; + struct connected *ifc; + + if (ifp->connected) + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) + return ifc->address; + + return NULL; +} + +/* Join to the add/leave multicast group. */ +static int if_group(struct interface *ifp, int sock, uint32_t group, + int add_leave) +{ + struct ip_mreq m; + struct prefix *p; + int ret; + char b1[INET_ADDRSTRLEN]; + + memset(&m, 0, sizeof(m)); + m.imr_multiaddr.s_addr = htonl(group); + p = irdp_get_prefix(ifp); + + if (!p) { + flog_warn(EC_ZEBRA_NO_IFACE_ADDR, + "IRDP: can't get address for %s", ifp->name); + return 1; + } + + m.imr_interface = p->u.prefix4; + + ret = setsockopt(sock, IPPROTO_IP, add_leave, (char *)&m, + sizeof(struct ip_mreq)); + if (ret < 0) + flog_err_sys(EC_LIB_SOCKET, "IRDP: %s can't setsockopt %s: %s", + add_leave == IP_ADD_MEMBERSHIP ? "join group" + : "leave group", + inet_2a(group, b1, sizeof(b1)), + safe_strerror(errno)); + + return ret; +} + +static int if_add_group(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + int ret; + char b1[INET_ADDRSTRLEN]; + + if (!irdp) + return -1; + + ret = if_group(ifp, irdp_sock, INADDR_ALLRTRS_GROUP, IP_ADD_MEMBERSHIP); + if (ret < 0) { + return ret; + } + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: Adding group %s for %s", + inet_2a(htonl(INADDR_ALLRTRS_GROUP), b1, sizeof(b1)), + ifp->name); + return 0; +} + +static int if_drop_group(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + int ret; + char b1[INET_ADDRSTRLEN]; + + if (!irdp) + return -1; + + ret = if_group(ifp, irdp_sock, INADDR_ALLRTRS_GROUP, + IP_DROP_MEMBERSHIP); + if (ret < 0) + return ret; + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: Leaving group %s for %s", + inet_2a(htonl(INADDR_ALLRTRS_GROUP), b1, sizeof(b1)), + ifp->name); + return 0; +} + +static void if_set_defaults(struct irdp_interface *irdp) +{ + irdp->MaxAdvertInterval = IRDP_MAXADVERTINTERVAL; + irdp->MinAdvertInterval = IRDP_MINADVERTINTERVAL; + irdp->Preference = IRDP_PREFERENCE; + irdp->Lifetime = IRDP_LIFETIME; +} + + +static struct Adv *Adv_new(void) +{ + return XCALLOC(MTYPE_TMP, sizeof(struct Adv)); +} + +static void Adv_free(struct Adv *adv) +{ + XFREE(MTYPE_TMP, adv); +} + +static void irdp_if_start(struct interface *ifp, int multicast, + int set_defaults) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct listnode *node; + struct connected *ifc; + uint32_t timer, seed; + + assert(irdp); + + irdp->started = true; + if (irdp->flags & IF_ACTIVE) { + zlog_debug("IRDP: Interface is already active %s", ifp->name); + return; + } + if ((irdp_sock < 0) && ((irdp_sock = irdp_sock_init()) < 0)) { + flog_warn(EC_ZEBRA_IRDP_CANNOT_ACTIVATE_IFACE, + "IRDP: Cannot activate interface %s (cannot create IRDP socket)", + ifp->name); + return; + } + irdp->flags |= IF_ACTIVE; + + if (!multicast) + irdp->flags |= IF_BROADCAST; + + if_add_update(ifp); + + if (!(ifp->flags & IFF_UP)) { + flog_warn(EC_ZEBRA_IRDP_IFACE_DOWN, + "IRDP: Interface is down %s", ifp->name); + } + + /* Shall we cancel if_start if if_add_group fails? */ + + if (multicast) { + if_add_group(ifp); + + if (!(ifp->flags & (IFF_MULTICAST | IFF_ALLMULTI))) { + flog_warn(EC_ZEBRA_IRDP_IFACE_MCAST_DISABLED, + "IRDP: Interface not multicast enabled %s", + ifp->name); + } + } + + if (set_defaults) + if_set_defaults(irdp); + + irdp->irdp_sent = 0; + + /* The spec suggests this for randomness */ + + seed = 0; + if (ifp->connected) + for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, ifc)) { + seed = ifc->address->u.prefix4.s_addr; + break; + } + + srandom(seed); + timer = (frr_weak_random() % IRDP_DEFAULT_INTERVAL) + 1; + + irdp->AdvPrefList = list_new(); + irdp->AdvPrefList->del = (void (*)(void *))Adv_free; /* Destructor */ + + + /* And this for startup. Speed limit from 1991 :-). But it's OK*/ + + if (irdp->irdp_sent < MAX_INITIAL_ADVERTISEMENTS + && timer > MAX_INITIAL_ADVERT_INTERVAL) + timer = MAX_INITIAL_ADVERT_INTERVAL; + + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: Init timer for %s set to %u", ifp->name, + timer); + + irdp->t_advertise = NULL; + thread_add_timer(zrouter.master, irdp_send_thread, ifp, timer, + &irdp->t_advertise); +} + +static void irdp_if_stop(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + + if (irdp == NULL) { + zlog_debug("Interface %s structure is NULL", ifp->name); + return; + } + + if (!(irdp->flags & IF_ACTIVE)) { + zlog_debug("Interface is not active %s", ifp->name); + return; + } + + if (!(irdp->flags & IF_BROADCAST)) + if_drop_group(ifp); + + irdp_advert_off(ifp); + + list_delete(&irdp->AdvPrefList); + + irdp->flags = 0; +} + + +static void irdp_if_shutdown(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + + if (!irdp) + return; + + if (irdp->flags & IF_SHUTDOWN) { + zlog_debug("IRDP: Interface is already shutdown %s", ifp->name); + return; + } + + irdp->flags |= IF_SHUTDOWN; + irdp->flags &= ~IF_ACTIVE; + + if (!(irdp->flags & IF_BROADCAST)) + if_drop_group(ifp); + + /* Tell the hosts we are out of service */ + irdp_advert_off(ifp); +} + +static void irdp_if_no_shutdown(struct interface *ifp) +{ + struct irdp_interface *irdp = irdp_if_get(ifp); + + if (!irdp) + return; + + if (!(irdp->flags & IF_SHUTDOWN)) { + zlog_debug("IRDP: Interface is not shutdown %s", ifp->name); + return; + } + + irdp->flags &= ~IF_SHUTDOWN; + + irdp_if_start(ifp, irdp->flags & IF_BROADCAST ? false : true, false); +} + + +/* Write configuration to user */ + +int irdp_config_write(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct Adv *adv; + struct listnode *node; + char b1[INET_ADDRSTRLEN]; + + if (!irdp) + return 0; + + if (irdp->flags & IF_ACTIVE || irdp->flags & IF_SHUTDOWN) { + + if (irdp->flags & IF_SHUTDOWN) + vty_out(vty, " ip irdp shutdown \n"); + + if (irdp->flags & IF_BROADCAST) + vty_out(vty, " ip irdp broadcast\n"); + else + vty_out(vty, " ip irdp multicast\n"); + + vty_out(vty, " ip irdp preference %ld\n", irdp->Preference); + + for (ALL_LIST_ELEMENTS_RO(irdp->AdvPrefList, node, adv)) + vty_out(vty, " ip irdp address %s preference %d\n", + inet_2a(adv->ip.s_addr, b1, sizeof(b1)), + adv->pref); + + vty_out(vty, " ip irdp holdtime %d\n", irdp->Lifetime); + + vty_out(vty, " ip irdp minadvertinterval %ld\n", + irdp->MinAdvertInterval); + + vty_out(vty, " ip irdp maxadvertinterval %ld\n", + irdp->MaxAdvertInterval); + } + return 0; +} + + +DEFUN (ip_irdp_multicast, + ip_irdp_multicast_cmd, + "ip irdp multicast", + IP_STR + "ICMP Router discovery on this interface\n" + "Use multicast mode\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + irdp_if_get(ifp); + + irdp_if_start(ifp, true, true); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_broadcast, + ip_irdp_broadcast_cmd, + "ip irdp broadcast", + IP_STR + "ICMP Router discovery on this interface\n" + "Use broadcast mode\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + irdp_if_get(ifp); + + irdp_if_start(ifp, false, true); + return CMD_SUCCESS; +} + +DEFUN (no_ip_irdp, + no_ip_irdp_cmd, + "no ip irdp", + NO_STR + IP_STR + "Disable ICMP Router discovery on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + irdp_if_stop(ifp); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_shutdown, + ip_irdp_shutdown_cmd, + "ip irdp shutdown", + IP_STR + "ICMP Router discovery on this interface\n" + "ICMP Router discovery shutdown on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + irdp_if_shutdown(ifp); + return CMD_SUCCESS; +} + +DEFUN (no_ip_irdp_shutdown, + no_ip_irdp_shutdown_cmd, + "no ip irdp shutdown", + NO_STR + IP_STR + "ICMP Router discovery on this interface\n" + "ICMP Router discovery no shutdown on this interface\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + + irdp_if_no_shutdown(ifp); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_holdtime, + ip_irdp_holdtime_cmd, + "ip irdp holdtime (0-9000)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set holdtime value\n" + "Holdtime value in seconds. Default is 1800 seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->Lifetime = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_minadvertinterval, + ip_irdp_minadvertinterval_cmd, + "ip irdp minadvertinterval (3-1800)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set minimum time between advertisement\n" + "Minimum advertisement interval in seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + if ((unsigned)atoi(argv[idx_number]->arg) <= irdp->MaxAdvertInterval) { + irdp->MinAdvertInterval = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; + } else { + vty_out(vty, + "%% MinAdvertInterval must be less than or equal to MaxAdvertInterval\n"); + return CMD_WARNING_CONFIG_FAILED; + } +} + +DEFUN (ip_irdp_maxadvertinterval, + ip_irdp_maxadvertinterval_cmd, + "ip irdp maxadvertinterval (4-1800)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set maximum time between advertisement\n" + "Maximum advertisement interval in seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + if (irdp->MinAdvertInterval <= (unsigned)atoi(argv[idx_number]->arg)) { + irdp->MaxAdvertInterval = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; + } else { + vty_out(vty, + "%% MaxAdvertInterval must be greater than or equal to MinAdvertInterval\n"); + return CMD_WARNING_CONFIG_FAILED; + } +} + +/* DEFUN needs to be fixed for negative ranages... + * "ip irdp preference <-2147483648-2147483647>", + * Be positive for now. :-) + */ + +DEFUN (ip_irdp_preference, + ip_irdp_preference_cmd, + "ip irdp preference (0-2147483647)", + IP_STR + "ICMP Router discovery on this interface\n" + "Set default preference level for this interface\n" + "Preference level\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->Preference = atoi(argv[idx_number]->arg); + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_address_preference, + ip_irdp_address_preference_cmd, + "ip irdp address A.B.C.D preference (0-2147483647)", + IP_STR + "Alter ICMP Router discovery preference on this interface\n" + "Set IRDP address for advertise\n" + "IPv4 address\n" + "Specify IRDP non-default preference to advertise\n" + "Preference level\n") +{ + int idx_ipv4 = 3; + int idx_number = 5; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + struct listnode *node; + struct in_addr ip; + int pref; + int ret; + struct Adv *adv; + + IRDP_CONFIGED; + + ret = inet_aton(argv[idx_ipv4]->arg, &ip); + if (!ret) + return CMD_WARNING_CONFIG_FAILED; + + pref = atoi(argv[idx_number]->arg); + + for (ALL_LIST_ELEMENTS_RO(irdp->AdvPrefList, node, adv)) + if (adv->ip.s_addr == ip.s_addr) + return CMD_SUCCESS; + + adv = Adv_new(); + adv->ip = ip; + adv->pref = pref; + listnode_add(irdp->AdvPrefList, adv); + + return CMD_SUCCESS; +} + +DEFUN (no_ip_irdp_address_preference, + no_ip_irdp_address_preference_cmd, + "no ip irdp address A.B.C.D preference (0-2147483647)", + NO_STR + IP_STR + "Alter ICMP Router discovery preference on this interface\n" + "Select IRDP address\n" + "IPv4 address\n" + "Reset ICMP Router discovery preference on this interface\n" + "Old preference level\n") +{ + int idx_ipv4 = 4; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + struct listnode *node, *nnode; + struct in_addr ip; + int ret; + struct Adv *adv; + + IRDP_CONFIGED; + + ret = inet_aton(argv[idx_ipv4]->arg, &ip); + if (!ret) + return CMD_WARNING_CONFIG_FAILED; + + for (ALL_LIST_ELEMENTS(irdp->AdvPrefList, node, nnode, adv)) { + if (adv->ip.s_addr == ip.s_addr) { + listnode_delete(irdp->AdvPrefList, adv); + break; + } + } + + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_debug_messages, + ip_irdp_debug_messages_cmd, + "ip irdp debug messages", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Enable debugging for IRDP messages\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags |= IF_DEBUG_MESSAGES; + + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_debug_misc, + ip_irdp_debug_misc_cmd, + "ip irdp debug misc", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Enable debugging for miscellaneous IRDP events\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags |= IF_DEBUG_MISC; + + return CMD_SUCCESS; +} + +DEFUN (ip_irdp_debug_packet, + ip_irdp_debug_packet_cmd, + "ip irdp debug packet", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Enable debugging for IRDP packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags |= IF_DEBUG_PACKET; + + return CMD_SUCCESS; +} + + +DEFUN (ip_irdp_debug_disable, + ip_irdp_debug_disable_cmd, + "ip irdp debug disable", + IP_STR + "ICMP Router discovery debug Averts. and Solicits (short)\n" + "IRDP debugging options\n" + "Disable debugging for all IRDP events\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct irdp_interface *irdp = irdp_if_get(ifp); + + IRDP_CONFIGED; + + irdp->flags &= ~IF_DEBUG_PACKET; + irdp->flags &= ~IF_DEBUG_MESSAGES; + irdp->flags &= ~IF_DEBUG_MISC; + + return CMD_SUCCESS; +} + +void irdp_if_init(void) +{ + hook_register(zebra_if_config_wr, irdp_config_write); + hook_register(if_del, irdp_if_delete); + + install_element(INTERFACE_NODE, &ip_irdp_broadcast_cmd); + install_element(INTERFACE_NODE, &ip_irdp_multicast_cmd); + install_element(INTERFACE_NODE, &no_ip_irdp_cmd); + install_element(INTERFACE_NODE, &ip_irdp_shutdown_cmd); + install_element(INTERFACE_NODE, &no_ip_irdp_shutdown_cmd); + install_element(INTERFACE_NODE, &ip_irdp_holdtime_cmd); + install_element(INTERFACE_NODE, &ip_irdp_maxadvertinterval_cmd); + install_element(INTERFACE_NODE, &ip_irdp_minadvertinterval_cmd); + install_element(INTERFACE_NODE, &ip_irdp_preference_cmd); + install_element(INTERFACE_NODE, &ip_irdp_address_preference_cmd); + install_element(INTERFACE_NODE, &no_ip_irdp_address_preference_cmd); + + install_element(INTERFACE_NODE, &ip_irdp_debug_messages_cmd); + install_element(INTERFACE_NODE, &ip_irdp_debug_misc_cmd); + install_element(INTERFACE_NODE, &ip_irdp_debug_packet_cmd); + install_element(INTERFACE_NODE, &ip_irdp_debug_disable_cmd); +} diff --git a/zebra/irdp_main.c b/zebra/irdp_main.c new file mode 100644 index 0000000..65aad49 --- /dev/null +++ b/zebra/irdp_main.c @@ -0,0 +1,350 @@ +/* + * + * Copyright (C) 2000 Robert Olsson. + * Swedish University of Agricultural Sciences + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * This work includes work with the following copywrite: + * + * Copyright (C) 1997, 2000 Kunihiro Ishiguro + * + */ + +/* + * Thanks to Jens Laas at Swedish University of Agricultural Sciences + * for reviewing and tests. + */ + + +#include <zebra.h> + +#include "if.h" +#include "vty.h" +#include "sockunion.h" +#include "sockopt.h" +#include "prefix.h" +#include "command.h" +#include "memory.h" +#include "stream.h" +#include "ioctl.h" +#include "connected.h" +#include "log.h" +#include "zclient.h" +#include "thread.h" +#include "privs.h" +#include "libfrr.h" +#include "lib_errors.h" +#include "lib/version.h" +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/irdp.h" +#include "zebra/zebra_errors.h" +#include <netinet/ip_icmp.h> + +#include "checksum.h" +#include "if.h" +#include "sockunion.h" +#include "log.h" +#include "network.h" + +/* GLOBAL VARS */ + +extern struct zebra_privs_t zserv_privs; + +struct thread *t_irdp_raw; + +/* Timer interval of irdp. */ +int irdp_timer_interval = IRDP_DEFAULT_INTERVAL; + +int irdp_sock_init(void) +{ + int ret, i; + int save_errno; + int sock; + + frr_with_privs(&zserv_privs) { + + sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + save_errno = errno; + + } + + if (sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "IRDP: can't create irdp socket %s", + safe_strerror(save_errno)); + return sock; + }; + + i = 1; + ret = setsockopt(sock, IPPROTO_IP, IP_TTL, (void *)&i, sizeof(i)); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, "IRDP: can't do irdp sockopt %s", + safe_strerror(errno)); + close(sock); + return ret; + }; + + ret = setsockopt_ifindex(AF_INET, sock, 1); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, "IRDP: can't do irdp sockopt %s", + safe_strerror(errno)); + close(sock); + return ret; + }; + + thread_add_read(zrouter.master, irdp_read_raw, NULL, sock, &t_irdp_raw); + + return sock; +} + + +static int get_pref(struct irdp_interface *irdp, struct prefix *p) +{ + struct listnode *node; + struct Adv *adv; + + /* Use default preference or use the override pref */ + + if (irdp->AdvPrefList == NULL) + return irdp->Preference; + + for (ALL_LIST_ELEMENTS_RO(irdp->AdvPrefList, node, adv)) + if (p->u.prefix4.s_addr == adv->ip.s_addr) + return adv->pref; + + return irdp->Preference; +} + +/* Make ICMP Router Advertisement Message. */ +static int make_advertisement_packet(struct interface *ifp, struct prefix *p, + struct stream *s) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + int size; + int pref; + uint16_t checksum; + + pref = get_pref(irdp, p); + + stream_putc(s, ICMP_ROUTERADVERT); /* Type. */ + stream_putc(s, 0); /* Code. */ + stream_putw(s, 0); /* Checksum. */ + stream_putc(s, 1); /* Num address. */ + stream_putc(s, 2); /* Address Entry Size. */ + + if (irdp->flags & IF_SHUTDOWN) + stream_putw(s, 0); + else + stream_putw(s, irdp->Lifetime); + + stream_putl(s, htonl(p->u.prefix4.s_addr)); /* Router address. */ + stream_putl(s, pref); + + /* in_cksum return network byte order value */ + size = 16; + checksum = in_cksum(s->data, size); + stream_putw_at(s, 2, htons(checksum)); + + return size; +} + +static void irdp_send(struct interface *ifp, struct prefix *p, struct stream *s) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + uint32_t dst; + uint32_t ttl = 1; + + if (!irdp) + return; + if (!(ifp->flags & IFF_UP)) + return; + + if (irdp->flags & IF_BROADCAST) + dst = INADDR_BROADCAST; + else + dst = htonl(INADDR_ALLHOSTS_GROUP); + + if (irdp->flags & IF_DEBUG_MESSAGES) + zlog_debug( + "IRDP: TX Advert on %s %pFX Holdtime=%d Preference=%d", + ifp->name, p, + irdp->flags & IF_SHUTDOWN ? 0 : irdp->Lifetime, + get_pref(irdp, p)); + + send_packet(ifp, s, dst, p, ttl); +} + +static void irdp_advertisement(struct interface *ifp, struct prefix *p) +{ + struct stream *s; + s = stream_new(128); + make_advertisement_packet(ifp, p, s); + irdp_send(ifp, p, s); + stream_free(s); +} + +void irdp_send_thread(struct thread *t_advert) +{ + uint32_t timer, tmp; + struct interface *ifp = THREAD_ARG(t_advert); + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct prefix *p; + struct listnode *node, *nnode; + struct connected *ifc; + + if (!irdp) + return; + + irdp->flags &= ~IF_SOLICIT; + + if (ifp->connected) + for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { + p = ifc->address; + + if (p->family != AF_INET) + continue; + + irdp_advertisement(ifp, p); + irdp->irdp_sent++; + } + + tmp = irdp->MaxAdvertInterval - irdp->MinAdvertInterval; + timer = frr_weak_random() % (tmp + 1); + timer = irdp->MinAdvertInterval + timer; + + if (irdp->irdp_sent < MAX_INITIAL_ADVERTISEMENTS + && timer > MAX_INITIAL_ADVERT_INTERVAL) + timer = MAX_INITIAL_ADVERT_INTERVAL; + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: New timer for %s set to %u", ifp->name, + timer); + + irdp->t_advertise = NULL; + thread_add_timer(zrouter.master, irdp_send_thread, ifp, timer, + &irdp->t_advertise); +} + +void irdp_advert_off(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + struct listnode *node, *nnode; + int i; + struct connected *ifc; + struct prefix *p; + + if (!irdp) + return; + + THREAD_OFF(irdp->t_advertise); + + if (ifp->connected) + for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { + p = ifc->address; + + /* Output some packets with Lifetime 0 + we should add a wait... + */ + + for (i = 0; i < IRDP_LAST_ADVERT_MESSAGES; i++) { + irdp->irdp_sent++; + irdp_advertisement(ifp, p); + } + } +} + + +void process_solicit(struct interface *ifp) +{ + struct zebra_if *zi = ifp->info; + struct irdp_interface *irdp = zi->irdp; + uint32_t timer; + + if (!irdp) + return; + + /* When SOLICIT is active we reject further incoming solicits + this keeps down the answering rate so we don't have think + about DoS attacks here. */ + + if (irdp->flags & IF_SOLICIT) + return; + + irdp->flags |= IF_SOLICIT; + THREAD_OFF(irdp->t_advertise); + + timer = (frr_weak_random() % MAX_RESPONSE_DELAY) + 1; + + irdp->t_advertise = NULL; + thread_add_timer(zrouter.master, irdp_send_thread, ifp, timer, + &irdp->t_advertise); +} + +static int irdp_finish(void) +{ + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *zi; + struct irdp_interface *irdp; + + zlog_info("IRDP: Received shutdown notification."); + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) + FOR_ALL_INTERFACES (vrf, ifp) { + zi = ifp->info; + + if (!zi) + continue; + irdp = zi->irdp; + if (!irdp) + continue; + + if (irdp->flags & IF_ACTIVE) { + irdp->flags |= IF_SHUTDOWN; + irdp_advert_off(ifp); + } + } + return 0; +} + +static int irdp_init(struct thread_master *master) +{ + irdp_if_init(); + + hook_register(frr_early_fini, irdp_finish); + return 0; +} + +static int irdp_module_init(void) +{ + hook_register(frr_late_init, irdp_init); + return 0; +} + +FRR_MODULE_SETUP(.name = "zebra_irdp", .version = FRR_VERSION, + .description = "zebra IRDP module", .init = irdp_module_init, +); diff --git a/zebra/irdp_packet.c b/zebra/irdp_packet.c new file mode 100644 index 0000000..c27d97b --- /dev/null +++ b/zebra/irdp_packet.c @@ -0,0 +1,367 @@ +/* + * + * Copyright (C) 2000 Robert Olsson. + * Swedish University of Agricultural Sciences + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * This work includes work with the following copywrite: + * + * Copyright (C) 1997, 2000 Kunihiro Ishiguro + * + */ + +/* + * Thanks to Jens Laas at Swedish University of Agricultural Sciences + * for reviewing and tests. + */ + + +#include <zebra.h> +#include <netinet/ip_icmp.h> + +#include "checksum.h" +#include "command.h" +#include "connected.h" +#include "if.h" +#include "ioctl.h" +#include "log.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "sockopt.h" +#include "sockunion.h" +#include "sockunion.h" +#include "stream.h" +#include "thread.h" +#include "vty.h" +#include "zclient.h" +#include "lib_errors.h" + +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/irdp.h" +#include "zebra/zebra_errors.h" + + +/* GLOBAL VARS */ + +int irdp_sock = -1; + +extern struct thread *t_irdp_raw; + +static void parse_irdp_packet(char *p, int len, struct interface *ifp) +{ + struct ip *ip = (struct ip *)p; + struct icmphdr *icmp; + struct in_addr src; + int ip_hlen, iplen, datalen; + struct zebra_if *zi; + struct irdp_interface *irdp; + uint16_t saved_chksum; + char buf[PREFIX_STRLEN]; + + zi = ifp->info; + if (!zi) + return; + + irdp = zi->irdp; + if (!irdp) + return; + + ip_hlen = ip->ip_hl << 2; + + sockopt_iphdrincl_swab_systoh(ip); + + iplen = ip->ip_len; + datalen = len - ip_hlen; + src = ip->ip_src; + + if (len != iplen) { + flog_err(EC_ZEBRA_IRDP_LEN_MISMATCH, + "IRDP: RX length doesn't match IP length"); + return; + } + + if (iplen < ICMP_MINLEN) { + flog_err(EC_ZEBRA_IRDP_LEN_MISMATCH, + "IRDP: RX ICMP packet too short from %pI4", + &src); + return; + } + + /* XXX: RAW doesn't receive link-layer, surely? ??? */ + /* Check so we don't checksum packets longer than oure RX_BUF - (ethlen + + + len of IP-header) 14+20 */ + if (iplen > IRDP_RX_BUF - 34) { + flog_err(EC_ZEBRA_IRDP_LEN_MISMATCH, + "IRDP: RX ICMP packet too long from %pI4", + &src); + return; + } + + icmp = (struct icmphdr *)(p + ip_hlen); + + saved_chksum = icmp->checksum; + icmp->checksum = 0; + /* check icmp checksum */ + if (in_cksum(icmp, datalen) != saved_chksum) { + flog_warn( + EC_ZEBRA_IRDP_BAD_CHECKSUM, + "IRDP: RX ICMP packet from %pI4 Bad checksum, silently ignored", + &src); + return; + } + + /* Handle just only IRDP */ + if (!(icmp->type == ICMP_ROUTERADVERT + || icmp->type == ICMP_ROUTERSOLICIT)) + return; + + if (icmp->code != 0) { + flog_warn( + EC_ZEBRA_IRDP_BAD_TYPE_CODE, + "IRDP: RX packet type %d from %pI4 Bad ICMP type code, silently ignored", + icmp->type, &src); + return; + } + + if (!((ntohl(ip->ip_dst.s_addr) == INADDR_BROADCAST) + && (irdp->flags & IF_BROADCAST)) + || (ntohl(ip->ip_dst.s_addr) == INADDR_ALLRTRS_GROUP + && !(irdp->flags & IF_BROADCAST))) { + flog_warn( + EC_ZEBRA_IRDP_BAD_RX_FLAGS, + "IRDP: RX illegal from %pI4 to %s while %s operates in %s; Please correct settings", + &src, + ntohl(ip->ip_dst.s_addr) == INADDR_ALLRTRS_GROUP + ? "multicast" + : inet_ntop(AF_INET, &ip->ip_dst, + buf, sizeof(buf)), + ifp->name, + irdp->flags & IF_BROADCAST ? "broadcast" : "multicast"); + return; + } + + switch (icmp->type) { + case ICMP_ROUTERADVERT: + break; + + case ICMP_ROUTERSOLICIT: + + if (irdp->flags & IF_DEBUG_MESSAGES) + zlog_debug("IRDP: RX Solicit on %s from %pI4", + ifp->name, &src); + + process_solicit(ifp); + break; + + default: + flog_warn( + EC_ZEBRA_IRDP_BAD_TYPE_CODE, + "IRDP: RX packet type %d from %pI4 Bad ICMP type code, silently ignored", + icmp->type, &src); + } +} + +static int irdp_recvmsg(int sock, uint8_t *buf, int size, int *ifindex) +{ + struct msghdr msg; + struct iovec iov; + char adata[CMSG_SPACE(SOPT_SIZE_CMSG_PKTINFO_IPV4())]; + int ret; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void *)0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (void *)adata; + msg.msg_controllen = sizeof(adata); + + iov.iov_base = buf; + iov.iov_len = size; + + ret = recvmsg(sock, &msg, 0); + if (ret < 0) { + flog_warn(EC_LIB_SOCKET, "IRDP: recvmsg: read error %s", + safe_strerror(errno)); + return ret; + } + + if (msg.msg_flags & MSG_TRUNC) { + flog_warn(EC_LIB_SOCKET, "IRDP: recvmsg: truncated message"); + return ret; + } + if (msg.msg_flags & MSG_CTRUNC) { + flog_warn(EC_LIB_SOCKET, + "IRDP: recvmsg: truncated control message"); + return ret; + } + + *ifindex = getsockopt_ifindex(AF_INET, &msg); + + return ret; +} + +void irdp_read_raw(struct thread *r) +{ + struct interface *ifp; + struct zebra_if *zi; + struct irdp_interface *irdp; + char buf[IRDP_RX_BUF]; + int ret, ifindex = 0; + + int irdp_sock = THREAD_FD(r); + thread_add_read(zrouter.master, irdp_read_raw, NULL, irdp_sock, + &t_irdp_raw); + + ret = irdp_recvmsg(irdp_sock, (uint8_t *)buf, IRDP_RX_BUF, &ifindex); + + if (ret < 0) + flog_warn(EC_LIB_SOCKET, "IRDP: RX Error length = %d", ret); + + ifp = if_lookup_by_index(ifindex, VRF_DEFAULT); + if (!ifp) + return; + + zi = ifp->info; + if (!zi) + return; + + irdp = zi->irdp; + if (!irdp) + return; + + if (!(irdp->flags & IF_ACTIVE)) { + + if (irdp->flags & IF_DEBUG_MISC) + zlog_debug("IRDP: RX ICMP for disabled interface %s", + ifp->name); + return; + } + + if (irdp->flags & IF_DEBUG_PACKET) { + int i; + zlog_debug("IRDP: RX (idx %d) ", ifindex); + for (i = 0; i < ret; i++) + zlog_debug("IRDP: RX %x ", buf[i] & 0xFF); + } + + parse_irdp_packet(buf, ret, ifp); +} + +void send_packet(struct interface *ifp, struct stream *s, uint32_t dst, + struct prefix *p, uint32_t ttl) +{ + static struct sockaddr_in sockdst = {AF_INET}; + struct ip *ip; + struct icmphdr *icmp; + struct msghdr *msg; + struct cmsghdr *cmsg; + struct iovec iovector; + char msgbuf[256]; + char buf[256]; + struct in_pktinfo *pktinfo; + unsigned long src; + uint8_t on; + + if (!(ifp->flags & IFF_UP)) + return; + + if (p) + src = ntohl(p->u.prefix4.s_addr); + else + src = 0; /* Is filled in */ + + ip = (struct ip *)buf; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_v = IPVERSION; + ip->ip_tos = 0xC0; + ip->ip_off = 0L; + ip->ip_p = 1; /* IP_ICMP */ + ip->ip_ttl = ttl; + ip->ip_src.s_addr = src; + ip->ip_dst.s_addr = dst; + icmp = (struct icmphdr *)(buf + sizeof(struct ip)); + + /* Merge IP header with icmp packet */ + assert(stream_get_endp(s) < (sizeof(buf) - sizeof(struct ip))); + stream_get(icmp, s, stream_get_endp(s)); + + /* icmp->checksum is already calculated */ + ip->ip_len = sizeof(struct ip) + stream_get_endp(s); + + on = 1; + if (setsockopt(irdp_sock, IPPROTO_IP, IP_HDRINCL, (char *)&on, + sizeof(on)) + < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: Cannot set IP_HDRINCLU %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); + + + if (dst == INADDR_BROADCAST) { + uint32_t bon = 1; + + if (setsockopt(irdp_sock, SOL_SOCKET, SO_BROADCAST, &bon, + sizeof(bon)) + < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: Cannot set SO_BROADCAST %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); + } + + if (dst != INADDR_BROADCAST) + setsockopt_ipv4_multicast_loop(irdp_sock, 0); + + memset(&sockdst, 0, sizeof(sockdst)); + sockdst.sin_family = AF_INET; + sockdst.sin_addr.s_addr = dst; + + cmsg = (struct cmsghdr *)(msgbuf + sizeof(struct msghdr)); + cmsg->cmsg_len = sizeof(struct cmsghdr) + sizeof(struct in_pktinfo); + cmsg->cmsg_level = SOL_IP; + cmsg->cmsg_type = IP_PKTINFO; + pktinfo = (struct in_pktinfo *)CMSG_DATA(cmsg); + pktinfo->ipi_ifindex = ifp->ifindex; + pktinfo->ipi_spec_dst.s_addr = src; + pktinfo->ipi_addr.s_addr = src; + + iovector.iov_base = (void *)buf; + iovector.iov_len = ip->ip_len; + msg = (struct msghdr *)msgbuf; + msg->msg_name = &sockdst; + msg->msg_namelen = sizeof(sockdst); + msg->msg_iov = &iovector; + msg->msg_iovlen = 1; + msg->msg_control = cmsg; + msg->msg_controllen = cmsg->cmsg_len; + + sockopt_iphdrincl_swab_htosys(ip); + + if (sendmsg(irdp_sock, msg, 0) < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: sendmsg send failure %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); +} diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c new file mode 100644 index 0000000..a8b56bb --- /dev/null +++ b/zebra/kernel_netlink.c @@ -0,0 +1,1966 @@ +/* Kernel communication using netlink interface. + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "rib.h" +#include "thread.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "mpls.h" +#include "lib_errors.h" +#include "hash.h" + +#include "zebra/zebra_router.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/debug.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/tc_netlink.h" +#include "zebra/netconf_netlink.h" +#include "zebra/zebra_errors.h" + +#ifndef SO_RCVBUFFORCE +#define SO_RCVBUFFORCE (33) +#endif + +/* Hack for GNU libc version 2. */ +#ifndef MSG_TRUNC +#define MSG_TRUNC 0x20 +#endif /* MSG_TRUNC */ + +#ifndef NLMSG_TAIL +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *)(((uint8_t *)(nmsg)) \ + + NLMSG_ALIGN((nmsg)->nlmsg_len))) +#endif + +#ifndef RTA_TAIL +#define RTA_TAIL(rta) \ + ((struct rtattr *)(((uint8_t *)(rta)) + RTA_ALIGN((rta)->rta_len))) +#endif + +#ifndef RTNL_FAMILY_IP6MR +#define RTNL_FAMILY_IP6MR 129 +#endif + +#ifndef RTPROT_MROUTED +#define RTPROT_MROUTED 17 +#endif + +#define NL_DEFAULT_BATCH_BUFSIZE (16 * NL_PKT_BUF_SIZE) + +/* + * We limit the batch's size to a number smaller than the length of the + * underlying buffer since the last message that wouldn't fit the batch would go + * over the upper boundary and then it would have to be encoded again into a new + * buffer. If the difference between the limit and the length of the buffer is + * big enough (bigger than the biggest Netlink message) then this situation + * won't occur. + */ +#define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE) + +static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"}, + {RTM_DELROUTE, "RTM_DELROUTE"}, + {RTM_GETROUTE, "RTM_GETROUTE"}, + {RTM_NEWLINK, "RTM_NEWLINK"}, + {RTM_SETLINK, "RTM_SETLINK"}, + {RTM_DELLINK, "RTM_DELLINK"}, + {RTM_GETLINK, "RTM_GETLINK"}, + {RTM_NEWADDR, "RTM_NEWADDR"}, + {RTM_DELADDR, "RTM_DELADDR"}, + {RTM_GETADDR, "RTM_GETADDR"}, + {RTM_NEWNEIGH, "RTM_NEWNEIGH"}, + {RTM_DELNEIGH, "RTM_DELNEIGH"}, + {RTM_GETNEIGH, "RTM_GETNEIGH"}, + {RTM_NEWRULE, "RTM_NEWRULE"}, + {RTM_DELRULE, "RTM_DELRULE"}, + {RTM_GETRULE, "RTM_GETRULE"}, + {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"}, + {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"}, + {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"}, + {RTM_NEWNETCONF, "RTM_NEWNETCONF"}, + {RTM_DELNETCONF, "RTM_DELNETCONF"}, + {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"}, + {RTM_DELTUNNEL, "RTM_DELTUNNEL"}, + {RTM_GETTUNNEL, "RTM_GETTUNNEL"}, + {RTM_NEWQDISC, "RTM_NEWQDISC"}, + {RTM_DELQDISC, "RTM_DELQDISC"}, + {RTM_GETQDISC, "RTM_GETQDISC"}, + {RTM_NEWTCLASS, "RTM_NEWTCLASS"}, + {RTM_DELTCLASS, "RTM_DELTCLASS"}, + {RTM_GETTCLASS, "RTM_GETTCLASS"}, + {RTM_NEWTFILTER, "RTM_NEWTFILTER"}, + {RTM_DELTFILTER, "RTM_DELTFILTER"}, + {RTM_GETTFILTER, "RTM_GETTFILTER"}, + {0}}; + +static const struct message rtproto_str[] = { + {RTPROT_REDIRECT, "redirect"}, + {RTPROT_KERNEL, "kernel"}, + {RTPROT_BOOT, "boot"}, + {RTPROT_STATIC, "static"}, + {RTPROT_GATED, "GateD"}, + {RTPROT_RA, "router advertisement"}, + {RTPROT_MRT, "MRT"}, + {RTPROT_ZEBRA, "Zebra"}, +#ifdef RTPROT_BIRD + {RTPROT_BIRD, "BIRD"}, +#endif /* RTPROT_BIRD */ + {RTPROT_MROUTED, "mroute"}, + {RTPROT_BGP, "BGP"}, + {RTPROT_OSPF, "OSPF"}, + {RTPROT_ISIS, "IS-IS"}, + {RTPROT_RIP, "RIP"}, + {RTPROT_RIPNG, "RIPNG"}, + {RTPROT_ZSTATIC, "static"}, + {0}}; + +static const struct message family_str[] = {{AF_INET, "ipv4"}, + {AF_INET6, "ipv6"}, + {AF_BRIDGE, "bridge"}, + {RTNL_FAMILY_IPMR, "ipv4MR"}, + {RTNL_FAMILY_IP6MR, "ipv6MR"}, + {0}}; + +static const struct message rttype_str[] = {{RTN_UNSPEC, "none"}, + {RTN_UNICAST, "unicast"}, + {RTN_LOCAL, "local"}, + {RTN_BROADCAST, "broadcast"}, + {RTN_ANYCAST, "anycast"}, + {RTN_MULTICAST, "multicast"}, + {RTN_BLACKHOLE, "blackhole"}, + {RTN_UNREACHABLE, "unreachable"}, + {RTN_PROHIBIT, "prohibited"}, + {RTN_THROW, "throw"}, + {RTN_NAT, "nat"}, + {RTN_XRESOLVE, "resolver"}, + {0}}; + +extern struct thread_master *master; + +extern struct zebra_privs_t zserv_privs; + +DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers"); + +/* Hashtable and mutex to allow lookup of nlsock structs by socket/fd value. + * We have both the main and dplane pthreads using these structs, so we have + * to protect the hash with a lock. + */ +static struct hash *nlsock_hash; +pthread_mutex_t nlsock_mutex; + +/* Lock and unlock wrappers for nlsock hash */ +#define NLSOCK_LOCK() pthread_mutex_lock(&nlsock_mutex) +#define NLSOCK_UNLOCK() pthread_mutex_unlock(&nlsock_mutex) + +size_t nl_batch_tx_bufsize; +char *nl_batch_tx_buf; + +_Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE; +_Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD; + +struct nl_batch { + void *buf; + size_t bufsiz; + size_t limit; + + void *buf_head; + size_t curlen; + size_t msgcnt; + + const struct zebra_dplane_info *zns; + + struct dplane_ctx_q ctx_list; + + /* + * Pointer to the queue of completed contexts outbound back + * towards the dataplane module. + */ + struct dplane_ctx_q *ctx_out_q; +}; + +int netlink_config_write_helper(struct vty *vty) +{ + uint32_t size = + atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed); + uint32_t threshold = atomic_load_explicit(&nl_batch_send_threshold, + memory_order_relaxed); + + if (size != NL_DEFAULT_BATCH_BUFSIZE + || threshold != NL_DEFAULT_BATCH_SEND_THRESHOLD) + vty_out(vty, "zebra kernel netlink batch-tx-buf %u %u\n", size, + threshold); + + if (if_netlink_frr_protodown_r_bit_is_set()) + vty_out(vty, "zebra protodown reason-bit %u\n", + if_netlink_get_frr_protodown_r_bit()); + + return 0; +} + +void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, bool set) +{ + if (!set) { + size = NL_DEFAULT_BATCH_BUFSIZE; + threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD; + } + + atomic_store_explicit(&nl_batch_bufsize, size, memory_order_relaxed); + atomic_store_explicit(&nl_batch_send_threshold, threshold, + memory_order_relaxed); +} + +int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + /* + * This is an error condition that must be handled during + * development. + * + * The netlink_talk_filter function is used for communication + * down the netlink_cmd pipe and we are expecting + * an ack being received. So if we get here + * then we did not receive the ack and instead + * received some other message in an unexpected + * way. + */ + zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", __func__, + h->nlmsg_type, nl_msg_type_to_str(h->nlmsg_type), ns_id); + return 0; +} + +static int netlink_recvbuf(struct nlsock *nl, uint32_t newsize) +{ + uint32_t oldsize; + socklen_t newlen = sizeof(newsize); + socklen_t oldlen = sizeof(oldsize); + int ret; + + ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't get %s receive buffer size: %s", nl->name, + safe_strerror(errno)); + return -1; + } + + /* Try force option (linux >= 2.6.14) and fall back to normal set */ + frr_with_privs(&zserv_privs) { + ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, + &rcvbufsize, sizeof(rcvbufsize)); + } + if (ret < 0) + ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsize, + sizeof(rcvbufsize)); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't set %s receive buffer size: %s", nl->name, + safe_strerror(errno)); + return -1; + } + + ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't get %s receive buffer size: %s", nl->name, + safe_strerror(errno)); + return -1; + } + return 0; +} + +static const char *group2str(uint32_t group) +{ + switch (group) { + case RTNLGRP_TUNNEL: + return "RTNLGRP_TUNNEL"; + default: + return "UNKNOWN"; + } +} + +/* Make socket for Linux netlink interface. */ +static int netlink_socket(struct nlsock *nl, unsigned long groups, + uint32_t ext_groups[], uint8_t ext_group_size, + ns_id_t ns_id) +{ + int ret; + struct sockaddr_nl snl; + int sock; + int namelen; + + frr_with_privs(&zserv_privs) { + sock = ns_socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, ns_id); + if (sock < 0) { + zlog_err("Can't open %s socket: %s", nl->name, + safe_strerror(errno)); + return -1; + } + + memset(&snl, 0, sizeof(snl)); + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + if (ext_group_size) { + uint8_t i; + + for (i = 0; i < ext_group_size; i++) { +#if defined SOL_NETLINK + ret = setsockopt(sock, SOL_NETLINK, + NETLINK_ADD_MEMBERSHIP, + &ext_groups[i], + sizeof(ext_groups[i])); + if (ret < 0) { + zlog_notice( + "can't setsockopt NETLINK_ADD_MEMBERSHIP for group %s(%u), this linux kernel does not support it: %s(%d)", + group2str(ext_groups[i]), + ext_groups[i], + safe_strerror(errno), errno); + } +#else + zlog_notice( + "Unable to use NETLINK_ADD_MEMBERSHIP via SOL_NETLINK for %s(%u) since the linux kernel does not support the socket option", + group2str(ext_groups[i]), + ext_groups[i]); +#endif + } + } + + /* Bind the socket to the netlink structure for anything. */ + ret = bind(sock, (struct sockaddr *)&snl, sizeof(snl)); + } + + if (ret < 0) { + zlog_err("Can't bind %s socket to group 0x%x: %s", nl->name, + snl.nl_groups, safe_strerror(errno)); + close(sock); + return -1; + } + + /* multiple netlink sockets will have different nl_pid */ + namelen = sizeof(snl); + ret = getsockname(sock, (struct sockaddr *)&snl, (socklen_t *)&namelen); + if (ret < 0 || namelen != sizeof(snl)) { + flog_err_sys(EC_LIB_SOCKET, "Can't get %s socket name: %s", + nl->name, safe_strerror(errno)); + close(sock); + return -1; + } + + nl->snl = snl; + nl->sock = sock; + nl->buflen = NL_RCV_PKT_BUF_SIZE; + nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen); + + return ret; +} + +/* + * Dispatch an incoming netlink message; used by the zebra main pthread's + * netlink event reader. + */ +static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, + int startup) +{ + /* + * When we handle new message types here + * because we are starting to install them + * then lets check the netlink_install_filter + * and see if we should add the corresponding + * allow through entry there. + * Probably not needed to do but please + * think about it. + */ + switch (h->nlmsg_type) { + case RTM_NEWROUTE: + return netlink_route_change(h, ns_id, startup); + case RTM_DELROUTE: + return netlink_route_change(h, ns_id, startup); + case RTM_NEWLINK: + return netlink_link_change(h, ns_id, startup); + case RTM_DELLINK: + return netlink_link_change(h, ns_id, startup); + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + case RTM_GETNEIGH: + return netlink_neigh_change(h, ns_id); + case RTM_NEWRULE: + return netlink_rule_change(h, ns_id, startup); + case RTM_DELRULE: + return netlink_rule_change(h, ns_id, startup); + case RTM_NEWNEXTHOP: + return netlink_nexthop_change(h, ns_id, startup); + case RTM_DELNEXTHOP: + return netlink_nexthop_change(h, ns_id, startup); + + /* Messages handled in the dplane thread */ + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_NEWNETCONF: + case RTM_DELNETCONF: + case RTM_NEWTUNNEL: + case RTM_DELTUNNEL: + case RTM_GETTUNNEL: + return 0; + default: + /* + * If we have received this message then + * we have made a mistake during development + * and we need to write some code to handle + * this message type or not ask for + * it to be sent up to us + */ + flog_err(EC_ZEBRA_UNKNOWN_NLMSG, + "Unknown netlink nlmsg_type %s(%d) vrf %u", + nl_msg_type_to_str(h->nlmsg_type), h->nlmsg_type, + ns_id); + break; + } + return 0; +} + +/* + * Dispatch an incoming netlink message; used by the dataplane pthread's + * netlink event reader code. + */ +static int dplane_netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, + int startup) +{ + /* + * Dispatch the incoming messages that the dplane pthread handles + */ + switch (h->nlmsg_type) { + case RTM_NEWADDR: + case RTM_DELADDR: + return netlink_interface_addr_dplane(h, ns_id, startup); + + case RTM_NEWNETCONF: + case RTM_DELNETCONF: + return netlink_netconf_change(h, ns_id, startup); + + /* TODO -- other messages for the dplane socket and pthread */ + + case RTM_NEWLINK: + case RTM_DELLINK: + + default: + break; + } + + return 0; +} + +static void kernel_read(struct thread *thread) +{ + struct zebra_ns *zns = (struct zebra_ns *)THREAD_ARG(thread); + struct zebra_dplane_info dp_info; + + /* Capture key info from ns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, false); + + netlink_parse_info(netlink_information_fetch, &zns->netlink, &dp_info, + 5, false); + + thread_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock, + &zns->t_netlink); +} + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info) +{ + struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock); + + netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5, + false); + + return 0; +} + +/* + * Filter out messages from self that occur on listener socket, + * caused by our actions on the command socket(s) + * + * When we add new Netlink message types we probably + * do not need to add them here as that we are filtering + * on the routes we actually care to receive( which is rarer + * then the normal course of operations). We are intentionally + * allowing some messages from ourselves through + * ( I'm looking at you Interface based netlink messages ) + * so that we only have to write one way to handle incoming + * address add/delete and xxxNETCONF changes. + */ +static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid) +{ + /* + * BPF_JUMP instructions and where you jump to are based upon + * 0 as being the next statement. So count from 0. Writing + * this down because every time I look at this I have to + * re-remember it. + */ + struct sock_filter filter[] = { + /* + * Logic: + * if (nlmsg_pid == pid || + * nlmsg_pid == dplane_pid) { + * if (the incoming nlmsg_type == + * RTM_NEWADDR || RTM_DELADDR || RTM_NEWNETCONF || + * RTM_DELNETCONF) + * keep this message + * else + * skip this message + * } else + * keep this netlink message + */ + /* + * 0: Load the nlmsg_pid into the BPF register + */ + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, + offsetof(struct nlmsghdr, nlmsg_pid)), + /* + * 1: Compare to pid + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0), + /* + * 2: Compare to dplane pid + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 6), + /* + * 3: Load the nlmsg_type into BPF register + */ + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, + offsetof(struct nlmsghdr, nlmsg_type)), + /* + * 4: Compare to RTM_NEWADDR + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 4, 0), + /* + * 5: Compare to RTM_DELADDR + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 3, 0), + /* + * 6: Compare to RTM_NEWNETCONF + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 2, + 0), + /* + * 7: Compare to RTM_DELNETCONF + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 1, + 0), + /* + * 8: This is the end state of we want to skip the + * message + */ + BPF_STMT(BPF_RET | BPF_K, 0), + /* 9: This is the end state of we want to keep + * the message + */ + BPF_STMT(BPF_RET | BPF_K, 0xffff), + }; + + struct sock_fprog prog = { + .len = array_size(filter), .filter = filter, + }; + + if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) + < 0) + flog_err_sys(EC_LIB_SOCKET, "Can't install socket filter: %s", + safe_strerror(errno)); +} + +void netlink_parse_rtattr_flags(struct rtattr **tb, int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if ((type <= max) && (!tb[type])) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +void netlink_parse_rtattr(struct rtattr **tb, int max, struct rtattr *rta, + int len) +{ + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + if (rta->rta_type <= max) + tb[rta->rta_type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +/** + * netlink_parse_rtattr_nested() - Parses a nested route attribute + * @tb: Pointer to array for storing rtattr in. + * @max: Max number to store. + * @rta: Pointer to rtattr to look for nested items in. + */ +void netlink_parse_rtattr_nested(struct rtattr **tb, int max, + struct rtattr *rta) +{ + netlink_parse_rtattr(tb, max, RTA_DATA(rta), RTA_PAYLOAD(rta)); +} + +bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, const void *data, + unsigned int len) +{ + if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) { + zlog_err("ERROR message exceeded bound of %d", maxlen); + return false; + } + + memcpy(NLMSG_TAIL(n), data, len); + memset((uint8_t *)NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len); + + return true; +} + +bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type, + const void *data, unsigned int alen) +{ + int len; + struct rtattr *rta; + + len = RTA_LENGTH(alen); + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) + return false; + + rta = (struct rtattr *)(((char *)n) + NLMSG_ALIGN(n->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = len; + + if (data) + memcpy(RTA_DATA(rta), data, alen); + else + assert(alen == 0); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return true; +} + +bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type, + uint8_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint8_t)); +} + +bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type, + uint16_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint16_t)); +} + +bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type, + uint32_t data) +{ + return nl_attr_put(n, maxlen, type, &data, sizeof(uint32_t)); +} + +struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(n); + + if (!nl_attr_put(n, maxlen, type, NULL, 0)) + return NULL; + + nest->rta_type |= NLA_F_NESTED; + return nest; +} + +int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)nest; + return n->nlmsg_len; +} + +struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen) +{ + struct rtnexthop *rtnh = (struct rtnexthop *)NLMSG_TAIL(n); + + if (NLMSG_ALIGN(n->nlmsg_len) + RTNH_ALIGN(sizeof(struct rtnexthop)) + > maxlen) + return NULL; + + memset(rtnh, 0, sizeof(struct rtnexthop)); + n->nlmsg_len = + NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(sizeof(struct rtnexthop)); + + return rtnh; +} + +void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh) +{ + rtnh->rtnh_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)rtnh; +} + +bool nl_rta_put(struct rtattr *rta, unsigned int maxlen, int type, + const void *data, int alen) +{ + struct rtattr *subrta; + int len = RTA_LENGTH(alen); + + if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) { + zlog_err("ERROR max allowed bound %d exceeded for rtattr", + maxlen); + return false; + } + subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; + if (alen) + memcpy(RTA_DATA(subrta), data, alen); + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len); + + return true; +} + +bool nl_rta_put16(struct rtattr *rta, unsigned int maxlen, int type, + uint16_t data) +{ + return nl_rta_put(rta, maxlen, type, &data, sizeof(uint16_t)); +} + +bool nl_rta_put64(struct rtattr *rta, unsigned int maxlen, int type, + uint64_t data) +{ + return nl_rta_put(rta, maxlen, type, &data, sizeof(uint64_t)); +} + +struct rtattr *nl_rta_nest(struct rtattr *rta, unsigned int maxlen, int type) +{ + struct rtattr *nest = RTA_TAIL(rta); + + if (nl_rta_put(rta, maxlen, type, NULL, 0)) + return NULL; + + nest->rta_type |= NLA_F_NESTED; + + return nest; +} + +int nl_rta_nest_end(struct rtattr *rta, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)RTA_TAIL(rta) - (uint8_t *)nest; + + return rta->rta_len; +} + +const char *nl_msg_type_to_str(uint16_t msg_type) +{ + return lookup_msg(nlmsg_str, msg_type, ""); +} + +const char *nl_rtproto_to_str(uint8_t rtproto) +{ + return lookup_msg(rtproto_str, rtproto, ""); +} + +const char *nl_family_to_str(uint8_t family) +{ + return lookup_msg(family_str, family, ""); +} + +const char *nl_rttype_to_str(uint8_t rttype) +{ + return lookup_msg(rttype_str, rttype, ""); +} + +#define NLA_OK(nla, len) \ + ((len) >= (int)sizeof(struct nlattr) \ + && (nla)->nla_len >= sizeof(struct nlattr) \ + && (nla)->nla_len <= (len)) +#define NLA_NEXT(nla, attrlen) \ + ((attrlen) -= NLA_ALIGN((nla)->nla_len), \ + (struct nlattr *)(((char *)(nla)) + NLA_ALIGN((nla)->nla_len))) +#define NLA_LENGTH(len) (NLA_ALIGN(sizeof(struct nlattr)) + (len)) +#define NLA_DATA(nla) ((struct nlattr *)(((char *)(nla)) + NLA_LENGTH(0))) + +#define ERR_NLA(err, inner_len) \ + ((struct nlattr *)(((char *)(err)) \ + + NLMSG_ALIGN(sizeof(struct nlmsgerr)) \ + + NLMSG_ALIGN((inner_len)))) + +static void netlink_parse_nlattr(struct nlattr **tb, int max, + struct nlattr *nla, int len) +{ + while (NLA_OK(nla, len)) { + if (nla->nla_type <= max) + tb[nla->nla_type] = nla; + nla = NLA_NEXT(nla, len); + } +} + +static void netlink_parse_extended_ack(struct nlmsghdr *h) +{ + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; + const struct nlmsgerr *err = (const struct nlmsgerr *)NLMSG_DATA(h); + const struct nlmsghdr *err_nlh = NULL; + /* Length not including nlmsghdr */ + uint32_t len = 0; + /* Inner error netlink message length */ + uint32_t inner_len = 0; + const char *msg = NULL; + uint32_t off = 0; + + if (!(h->nlmsg_flags & NLM_F_CAPPED)) + inner_len = (uint32_t)NLMSG_PAYLOAD(&err->msg, 0); + + len = (uint32_t)(NLMSG_PAYLOAD(h, sizeof(struct nlmsgerr)) - inner_len); + + netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, ERR_NLA(err, inner_len), + len); + + if (tb[NLMSGERR_ATTR_MSG]) + msg = (const char *)NLA_DATA(tb[NLMSGERR_ATTR_MSG]); + + if (tb[NLMSGERR_ATTR_OFFS]) { + off = *(uint32_t *)NLA_DATA(tb[NLMSGERR_ATTR_OFFS]); + + if (off > h->nlmsg_len) { + zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS"); + } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) { + /* + * Header of failed message + * we are not doing anything currently with it + * but noticing it for later. + */ + err_nlh = &err->msg; + zlog_debug("%s: Received %s extended Ack", __func__, + nl_msg_type_to_str(err_nlh->nlmsg_type)); + } + } + + if (msg && *msg != '\0') { + bool is_err = !!err->error; + + if (is_err) + zlog_err("Extended Error: %s", msg); + else + flog_warn(EC_ZEBRA_NETLINK_EXTENDED_WARNING, + "Extended Warning: %s", msg); + } +} + +/* + * netlink_send_msg - send a netlink message of a certain size. + * + * Returns -1 on error. Otherwise, it returns the number of bytes sent. + */ +static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf, + size_t buflen) +{ + struct sockaddr_nl snl = {}; + struct iovec iov = {}; + struct msghdr msg = {}; + ssize_t status; + int save_errno = 0; + + iov.iov_base = buf; + iov.iov_len = buflen; + msg.msg_name = &snl; + msg.msg_namelen = sizeof(snl); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + snl.nl_family = AF_NETLINK; + + /* Send message to netlink interface. */ + frr_with_privs(&zserv_privs) { + status = sendmsg(nl->sock, &msg, 0); + save_errno = errno; + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) { + zlog_debug("%s: >> netlink message dump [sent]", __func__); +#ifdef NETLINK_DEBUG + nl_dump(buf, buflen); +#else + zlog_hexdump(buf, buflen); +#endif /* NETLINK_DEBUG */ + } + + if (status == -1) { + flog_err_sys(EC_LIB_SOCKET, "%s error: %s", __func__, + safe_strerror(save_errno)); + return -1; + } + + return status; +} + +/* + * netlink_recv_msg - receive a netlink message. + * + * Returns -1 on error, 0 if read would block or the number of bytes received. + */ +static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg) +{ + struct iovec iov; + int status; + + iov.iov_base = nl->buf; + iov.iov_len = nl->buflen; + msg->msg_iov = &iov; + msg->msg_iovlen = 1; + + do { + int bytes; + + bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC); + + if (bytes >= 0 && (size_t)bytes > nl->buflen) { + nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes); + nl->buflen = bytes; + iov.iov_base = nl->buf; + iov.iov_len = nl->buflen; + } + + status = recvmsg(nl->sock, msg, 0); + } while (status == -1 && errno == EINTR); + + if (status == -1) { + if (errno == EWOULDBLOCK || errno == EAGAIN) + return 0; + flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s", + nl->name, safe_strerror(errno)); + /* + * In this case we are screwed. There is no good way to recover + * zebra at this point. + */ + exit(-1); + } + + if (status == 0) { + flog_err_sys(EC_LIB_SOCKET, "%s EOF", nl->name); + return -1; + } + + if (msg->msg_namelen != sizeof(struct sockaddr_nl)) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s sender address length error: length %d", nl->name, + msg->msg_namelen); + return -1; + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { + zlog_debug("%s: << netlink message dump [recv]", __func__); +#ifdef NETLINK_DEBUG + nl_dump(nl->buf, status); +#else + zlog_hexdump(nl->buf, status); +#endif /* NETLINK_DEBUG */ + } + + return status; +} + +/* + * netlink_parse_error - parse a netlink error message + * + * Returns 1 if this message is acknowledgement, 0 if this error should be + * ignored, -1 otherwise. + */ +static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h, + bool is_cmd, bool startup) +{ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + int errnum = err->error; + int msg_type = err->msg.nlmsg_type; + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s error: message truncated", nl->name); + return -1; + } + + /* + * Parse the extended information before we actually handle it. At this + * point in time we do not do anything other than report the issue. + */ + if (h->nlmsg_flags & NLM_F_ACK_TLVS) + netlink_parse_extended_ack(h); + + /* If the error field is zero, then this is an ACK. */ + if (err->error == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) { + zlog_debug("%s: %s ACK: type=%s(%u), seq=%u, pid=%u", + __func__, nl->name, + nl_msg_type_to_str(err->msg.nlmsg_type), + err->msg.nlmsg_type, err->msg.nlmsg_seq, + err->msg.nlmsg_pid); + } + + return 1; + } + + /* + * Deal with errors that occur because of races in link handling + * or types are not supported in kernel. + */ + if (is_cmd && + ((msg_type == RTM_DELROUTE && + (-errnum == ENODEV || -errnum == ESRCH)) || + (msg_type == RTM_NEWROUTE && + (-errnum == ENETDOWN || -errnum == EEXIST)) || + ((msg_type == RTM_NEWTUNNEL || msg_type == RTM_DELTUNNEL || + msg_type == RTM_GETTUNNEL) && + (-errnum == EOPNOTSUPP)))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: error: %s type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror(-errnum), + nl_msg_type_to_str(msg_type), msg_type, + err->msg.nlmsg_seq, err->msg.nlmsg_pid); + return 0; + } + + /* + * We see RTM_DELNEIGH when shutting down an interface with an IPv4 + * link-local. The kernel should have already deleted the neighbor so + * do not log these as an error. + */ + if (msg_type == RTM_DELNEIGH + || (is_cmd && msg_type == RTM_NEWROUTE + && (-errnum == ESRCH || -errnum == ENETUNREACH))) { + /* + * This is known to happen in some situations, don't log as + * error. + */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror(-errnum), + nl_msg_type_to_str(msg_type), msg_type, + err->msg.nlmsg_seq, err->msg.nlmsg_pid); + } else { + if ((msg_type != RTM_GETNEXTHOP) || !startup) + flog_err(EC_ZEBRA_UNEXPECTED_MESSAGE, + "%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror(-errnum), + nl_msg_type_to_str(msg_type), msg_type, + err->msg.nlmsg_seq, err->msg.nlmsg_pid); + } + + return -1; +} + +/* + * netlink_parse_info + * + * Receive message from netlink interface and pass those information + * to the given function. + * + * filter -> Function to call to read the results + * nl -> netlink socket information + * zns -> The zebra namespace data + * count -> How many we should read in, 0 means as much as possible + * startup -> Are we reading in under startup conditions? passed to + * the filter. + */ +int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), + struct nlsock *nl, const struct zebra_dplane_info *zns, + int count, bool startup) +{ + int status; + int ret = 0; + int error; + int read_in = 0; + + while (1) { + struct sockaddr_nl snl; + struct msghdr msg = {.msg_name = (void *)&snl, + .msg_namelen = sizeof(snl)}; + struct nlmsghdr *h; + + if (count && read_in >= count) + return 0; + + status = netlink_recv_msg(nl, &msg); + if (status == -1) + return -1; + else if (status == 0) + break; + + read_in++; + for (h = (struct nlmsghdr *)nl->buf; + (status >= 0 && NLMSG_OK(h, (unsigned int)status)); + h = NLMSG_NEXT(h, status)) { + /* Finish of reading. */ + if (h->nlmsg_type == NLMSG_DONE) + return ret; + + /* Error handling. */ + if (h->nlmsg_type == NLMSG_ERROR) { + int err = netlink_parse_error( + nl, h, zns->is_cmd, startup); + + if (err == 1) { + if (!(h->nlmsg_flags & NLM_F_MULTI)) + return 0; + continue; + } else + return err; + } + + /* + * What is the right thing to do? The kernel + * is telling us that the dump request was interrupted + * and we more than likely are out of luck and have + * missed data from the kernel. At this point in time + * lets just note that this is happening. + */ + if (h->nlmsg_flags & NLM_F_DUMP_INTR) + flog_err( + EC_ZEBRA_NETLINK_BAD_SEQUENCE, + "netlink recvmsg: The Dump request was interrupted"); + + /* OK we got netlink message. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: %s type %s(%u), len=%d, seq=%u, pid=%u", + __func__, nl->name, + nl_msg_type_to_str(h->nlmsg_type), + h->nlmsg_type, h->nlmsg_len, + h->nlmsg_seq, h->nlmsg_pid); + + + /* + * Ignore messages that maybe sent from + * other actors besides the kernel + */ + if (snl.nl_pid != 0) { + zlog_debug("Ignoring message from pid %u", + snl.nl_pid); + continue; + } + + error = (*filter)(h, zns->ns_id, startup); + if (error < 0) { + zlog_debug("%s filter function error", + nl->name); + ret = error; + } + } + + /* After error care. */ + if (msg.msg_flags & MSG_TRUNC) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s error: message truncated", nl->name); + continue; + } + if (status) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s error: data remnant size %d", nl->name, + status); + return -1; + } + } + return ret; +} + +/* + * netlink_talk_info + * + * sendmsg() to netlink socket then recvmsg(). + * Calls netlink_parse_info to parse returned data + * + * filter -> The filter to read final results from kernel + * nlmsghdr -> The data to send to the kernel + * dp_info -> The dataplane and netlink socket information + * startup -> Are we reading in under startup conditions + * This is passed through eventually to filter. + */ +static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, + int startup), + struct nlmsghdr *n, + struct zebra_dplane_info *dp_info, bool startup) +{ + struct nlsock *nl; + + nl = kernel_netlink_nlsock_lookup(dp_info->sock); + n->nlmsg_seq = dp_info->seq; + n->nlmsg_pid = nl->snl.nl_pid; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x", + nl->name, nl_msg_type_to_str(n->nlmsg_type), + n->nlmsg_type, n->nlmsg_len, n->nlmsg_seq, + n->nlmsg_flags); + + if (netlink_send_msg(nl, n, n->nlmsg_len) == -1) + return -1; + + /* + * Get reply from netlink socket. + * The reply should either be an acknowlegement or an error. + */ + return netlink_parse_info(filter, nl, dp_info, 0, startup); +} + +/* + * Synchronous version of netlink_talk_info. Converts args to suit the + * common version, which is suitable for both sync and async use. + */ +int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, + bool startup) +{ + struct zebra_dplane_info dp_info; + + /* Increment sequence number before capturing snapshot of ns socket + * info. + */ + nl->seq++; + + /* Capture info in intermediate info struct */ + zebra_dplane_info_from_zns(&dp_info, zns, (nl == &(zns->netlink_cmd))); + + return netlink_talk_info(filter, n, &dp_info, startup); +} + +/* Issue request message to kernel via netlink socket. GET messages + * are issued through this interface. + */ +int netlink_request(struct nlsock *nl, void *req) +{ + struct nlmsghdr *n = (struct nlmsghdr *)req; + + /* Check netlink socket. */ + if (nl->sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "%s socket isn't active.", + nl->name); + return -1; + } + + /* Fill common fields for all requests. */ + n->nlmsg_pid = nl->snl.nl_pid; + n->nlmsg_seq = ++nl->seq; + + if (netlink_send_msg(nl, req, n->nlmsg_len) == -1) + return -1; + + return 0; +} + +static int nl_batch_read_resp(struct nl_batch *bth) +{ + struct nlmsghdr *h; + struct sockaddr_nl snl; + struct msghdr msg = {}; + int status, seq; + struct nlsock *nl; + struct zebra_dplane_ctx *ctx; + bool ignore_msg; + + nl = kernel_netlink_nlsock_lookup(bth->zns->sock); + + msg.msg_name = (void *)&snl; + msg.msg_namelen = sizeof(snl); + + /* + * The responses are not batched, so we need to read and process one + * message at a time. + */ + while (true) { + status = netlink_recv_msg(nl, &msg); + /* + * status == -1 is a full on failure somewhere + * since we don't know where the problem happened + * we must mark all as failed + * + * Else we mark everything as worked + * + */ + if (status == -1 || status == 0) { + while ((ctx = dplane_ctx_dequeue(&(bth->ctx_list))) != + NULL) { + if (status == -1) + dplane_ctx_set_status( + ctx, + ZEBRA_DPLANE_REQUEST_FAILURE); + dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); + } + return status; + } + + h = (struct nlmsghdr *)nl->buf; + ignore_msg = false; + seq = h->nlmsg_seq; + /* + * Find the corresponding context object. Received responses are + * in the same order as requests we sent, so we can simply + * iterate over the context list and match responses with + * requests at same time. + */ + while (true) { + ctx = dplane_ctx_get_head(&(bth->ctx_list)); + if (ctx == NULL) { + /* + * This is a situation where we have gotten + * into a bad spot. We need to know that + * this happens( does it? ) + */ + zlog_err( + "%s:WARNING Received netlink Response for an error and no Contexts to associate with it", + __func__); + break; + } + + /* + * 'update' context objects take two consecutive + * sequence numbers. + */ + if (dplane_ctx_is_update(ctx) && + dplane_ctx_get_ns(ctx)->seq + 1 == seq) { + /* + * This is the situation where we get a response + * to a message that should be ignored. + */ + ignore_msg = true; + break; + } + + ctx = dplane_ctx_dequeue(&(bth->ctx_list)); + dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); + + /* We have found corresponding context object. */ + if (dplane_ctx_get_ns(ctx)->seq == seq) + break; + + if (dplane_ctx_get_ns(ctx)->seq > seq) + zlog_warn( + "%s:WARNING Received %u is less than any context on the queue ctx->seq %u", + __func__, seq, + dplane_ctx_get_ns(ctx)->seq); + } + + if (ignore_msg) { + /* + * If we ignore the message due to an update + * above we should still fricking decode the + * message for our operator to understand + * what is going on + */ + int err = netlink_parse_error(nl, h, bth->zns->is_cmd, + false); + + zlog_debug("%s: netlink error message seq=%d %d", + __func__, h->nlmsg_seq, err); + continue; + } + + /* + * We received a message with the sequence number that isn't + * associated with any dplane context object. + */ + if (ctx == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: skipping unassociated response, seq number %d NS %u", + __func__, h->nlmsg_seq, + bth->zns->ns_id); + continue; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + int err = netlink_parse_error(nl, h, bth->zns->is_cmd, + false); + + if (err == -1) + dplane_ctx_set_status( + ctx, ZEBRA_DPLANE_REQUEST_FAILURE); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: netlink error message seq=%d ", + __func__, h->nlmsg_seq); + continue; + } + + /* + * If we get here then we did not receive neither the ack nor + * the error and instead received some other message in an + * unexpected way. + */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", + __func__, h->nlmsg_type, + nl_msg_type_to_str(h->nlmsg_type), + bth->zns->ns_id); + } + + return 0; +} + +static void nl_batch_reset(struct nl_batch *bth) +{ + bth->buf_head = bth->buf; + bth->curlen = 0; + bth->msgcnt = 0; + bth->zns = NULL; + + TAILQ_INIT(&(bth->ctx_list)); +} + +static void nl_batch_init(struct nl_batch *bth, struct dplane_ctx_q *ctx_out_q) +{ + /* + * If the size of the buffer has changed, free and then allocate a new + * one. + */ + size_t bufsize = + atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed); + if (bufsize != nl_batch_tx_bufsize) { + if (nl_batch_tx_buf) + XFREE(MTYPE_NL_BUF, nl_batch_tx_buf); + + nl_batch_tx_buf = XCALLOC(MTYPE_NL_BUF, bufsize); + nl_batch_tx_bufsize = bufsize; + } + + bth->buf = nl_batch_tx_buf; + bth->bufsiz = bufsize; + bth->limit = atomic_load_explicit(&nl_batch_send_threshold, + memory_order_relaxed); + + bth->ctx_out_q = ctx_out_q; + + nl_batch_reset(bth); +} + +static void nl_batch_send(struct nl_batch *bth) +{ + struct zebra_dplane_ctx *ctx; + bool err = false; + + if (bth->curlen != 0 && bth->zns != NULL) { + struct nlsock *nl = + kernel_netlink_nlsock_lookup(bth->zns->sock); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu", + __func__, nl->name, bth->curlen, + bth->msgcnt); + + if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1) + err = true; + + if (!err) { + if (nl_batch_read_resp(bth) == -1) + err = true; + } + } + + /* Move remaining contexts to the outbound queue. */ + while (true) { + ctx = dplane_ctx_dequeue(&(bth->ctx_list)); + if (ctx == NULL) + break; + + if (err) + dplane_ctx_set_status(ctx, + ZEBRA_DPLANE_REQUEST_FAILURE); + + dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); + } + + nl_batch_reset(bth); +} + +enum netlink_msg_status netlink_batch_add_msg( + struct nl_batch *bth, struct zebra_dplane_ctx *ctx, + ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t), + bool ignore_res) +{ + int seq; + ssize_t size; + struct nlmsghdr *msgh; + struct nlsock *nl; + + size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen); + + /* + * If there was an error while encoding the message (other than buffer + * overflow) then return an error. + */ + if (size < 0) + return FRR_NETLINK_ERROR; + + /* + * If the message doesn't fit entirely in the buffer then send the batch + * and retry. + */ + if (size == 0) { + nl_batch_send(bth); + size = (*msg_encoder)(ctx, bth->buf_head, + bth->bufsiz - bth->curlen); + /* + * If the message doesn't fit in the empty buffer then just + * return an error. + */ + if (size <= 0) + return FRR_NETLINK_ERROR; + } + + seq = dplane_ctx_get_ns(ctx)->seq; + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + if (ignore_res) + seq++; + + msgh = (struct nlmsghdr *)bth->buf_head; + msgh->nlmsg_seq = seq; + msgh->nlmsg_pid = nl->snl.nl_pid; + + bth->zns = dplane_ctx_get_ns(ctx); + bth->buf_head = ((char *)bth->buf_head) + size; + bth->curlen += size; + bth->msgcnt++; + + return FRR_NETLINK_QUEUED; +} + +static enum netlink_msg_status nl_put_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + if (dplane_ctx_is_skip_kernel(ctx)) + return FRR_NETLINK_SUCCESS; + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + return netlink_put_route_update_msg(bth, ctx); + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + return netlink_put_nexthop_update_msg(bth, ctx); + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + return netlink_put_lsp_update_msg(bth, ctx); + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + return netlink_put_pw_update_msg(bth, ctx); + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + return netlink_put_address_update_msg(bth, ctx); + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + return netlink_put_mac_update_msg(bth, ctx); + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + return netlink_put_neigh_update_msg(bth, ctx); + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + return netlink_put_rule_update_msg(bth, ctx); + + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_BR_PORT_UPDATE: + return FRR_NETLINK_SUCCESS; + + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + return FRR_NETLINK_ERROR; + + case DPLANE_OP_GRE_SET: + return netlink_put_gre_set_msg(bth, ctx); + + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_NONE: + return FRR_NETLINK_ERROR; + + case DPLANE_OP_INTF_NETCONFIG: + return netlink_put_intf_netconfig(bth, ctx); + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + return netlink_put_intf_update_msg(bth, ctx); + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + return netlink_put_tc_update_msg(bth, ctx); + } + + return FRR_NETLINK_ERROR; +} + +void kernel_update_multi(struct dplane_ctx_q *ctx_list) +{ + struct nl_batch batch; + struct zebra_dplane_ctx *ctx; + struct dplane_ctx_q handled_list; + enum netlink_msg_status res; + + TAILQ_INIT(&handled_list); + nl_batch_init(&batch, &handled_list); + + while (true) { + ctx = dplane_ctx_dequeue(ctx_list); + if (ctx == NULL) + break; + + if (batch.zns != NULL + && batch.zns->ns_id != dplane_ctx_get_ns(ctx)->ns_id) + nl_batch_send(&batch); + + /* + * Assume all messages will succeed and then mark only the ones + * that failed. + */ + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + + res = nl_put_msg(&batch, ctx); + + dplane_ctx_enqueue_tail(&(batch.ctx_list), ctx); + if (res == FRR_NETLINK_ERROR) + dplane_ctx_set_status(ctx, + ZEBRA_DPLANE_REQUEST_FAILURE); + + if (batch.curlen > batch.limit) + nl_batch_send(&batch); + } + + nl_batch_send(&batch); + + TAILQ_INIT(ctx_list); + dplane_ctx_list_append(ctx_list, &handled_list); +} + +struct nlsock *kernel_netlink_nlsock_lookup(int sock) +{ + struct nlsock lookup, *retval; + + lookup.sock = sock; + + NLSOCK_LOCK(); + retval = hash_lookup(nlsock_hash, &lookup); + NLSOCK_UNLOCK(); + + return retval; +} + +/* Insert nlsock entry into hash */ +static void kernel_netlink_nlsock_insert(struct nlsock *nls) +{ + NLSOCK_LOCK(); + (void)hash_get(nlsock_hash, nls, hash_alloc_intern); + NLSOCK_UNLOCK(); +} + +/* Remove nlsock entry from hash */ +static void kernel_netlink_nlsock_remove(struct nlsock *nls) +{ + NLSOCK_LOCK(); + (void)hash_release(nlsock_hash, nls); + NLSOCK_UNLOCK(); +} + +static uint32_t kernel_netlink_nlsock_key(const void *arg) +{ + const struct nlsock *nl = arg; + + return nl->sock; +} + +static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2) +{ + const struct nlsock *nl1 = arg1; + const struct nlsock *nl2 = arg2; + + if (nl1->sock == nl2->sock) + return true; + + return false; +} + +/* Exported interface function. This function simply calls + netlink_socket (). */ +void kernel_init(struct zebra_ns *zns) +{ + uint32_t groups, dplane_groups, ext_groups; +#if defined SOL_NETLINK + int one, ret; +#endif + + /* + * Initialize netlink sockets + * + * If RTMGRP_XXX exists use that, but at some point + * I think the kernel developers realized that + * keeping track of all the different values would + * lead to confusion, so we need to convert the + * RTNLGRP_XXX to a bit position for ourself + */ + groups = RTMGRP_LINK | + RTMGRP_IPV4_ROUTE | + RTMGRP_IPV4_IFADDR | + RTMGRP_IPV6_ROUTE | + RTMGRP_IPV6_IFADDR | + RTMGRP_IPV4_MROUTE | + RTMGRP_NEIGH | + ((uint32_t) 1 << (RTNLGRP_IPV4_RULE - 1)) | + ((uint32_t) 1 << (RTNLGRP_IPV6_RULE - 1)) | + ((uint32_t) 1 << (RTNLGRP_NEXTHOP - 1)); + + dplane_groups = (RTMGRP_LINK | + RTMGRP_IPV4_IFADDR | + RTMGRP_IPV6_IFADDR | + ((uint32_t) 1 << (RTNLGRP_IPV4_NETCONF - 1)) | + ((uint32_t) 1 << (RTNLGRP_IPV6_NETCONF - 1)) | + ((uint32_t) 1 << (RTNLGRP_MPLS_NETCONF - 1))); + + /* Use setsockopt for > 31 group */ + ext_groups = RTNLGRP_TUNNEL; + + snprintf(zns->netlink.name, sizeof(zns->netlink.name), + "netlink-listen (NS %u)", zns->ns_id); + zns->netlink.sock = -1; + if (netlink_socket(&zns->netlink, groups, &ext_groups, 1, zns->ns_id) < + 0) { + zlog_err("Failure to create %s socket", + zns->netlink.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink); + + snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name), + "netlink-cmd (NS %u)", zns->ns_id); + zns->netlink_cmd.sock = -1; + if (netlink_socket(&zns->netlink_cmd, 0, 0, 0, zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_cmd.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink_cmd); + + /* Outbound socket for dplane programming of the host OS. */ + snprintf(zns->netlink_dplane_out.name, + sizeof(zns->netlink_dplane_out.name), "netlink-dp (NS %u)", + zns->ns_id); + zns->netlink_dplane_out.sock = -1; + if (netlink_socket(&zns->netlink_dplane_out, 0, 0, 0, zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_dplane_out.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink_dplane_out); + + /* Inbound socket for OS events coming to the dplane. */ + snprintf(zns->netlink_dplane_in.name, + sizeof(zns->netlink_dplane_in.name), "netlink-dp-in (NS %u)", + zns->ns_id); + zns->netlink_dplane_in.sock = -1; + if (netlink_socket(&zns->netlink_dplane_in, dplane_groups, 0, 0, + zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_dplane_in.name); + exit(-1); + } + + kernel_netlink_nlsock_insert(&zns->netlink_dplane_in); + + /* + * SOL_NETLINK is not available on all platforms yet + * apparently. It's in bits/socket.h which I am not + * sure that we want to pull into our build system. + */ +#if defined SOL_NETLINK + /* + * Let's tell the kernel that we want to receive extended + * ACKS over our command socket(s) + */ + one = 1; + ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended cmd ACK failed : %d %s", + errno, safe_strerror(errno)); + + one = 1; + ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, + NETLINK_EXT_ACK, &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended dp ACK failed : %d %s", + errno, safe_strerror(errno)); + + /* + * Trim off the payload of the original netlink message in the + * acknowledgment. This option is available since Linux 4.2, so if + * setsockopt fails, ignore the error. + */ + one = 1; + ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, + NETLINK_CAP_ACK, &one, sizeof(one)); + if (ret < 0) + zlog_notice( + "Registration for reduced ACK packet size failed, probably running an early kernel"); +#endif + + /* Register kernel socket. */ + if (fcntl(zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0) + flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket flags: %s", + zns->netlink.name, safe_strerror(errno)); + + if (fcntl(zns->netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_cmd.name, safe_strerror(errno), errno); + + if (fcntl(zns->netlink_dplane_out.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_dplane_out.name, safe_strerror(errno), + errno); + + if (fcntl(zns->netlink_dplane_in.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_dplane_in.name, safe_strerror(errno), + errno); + + /* Set receive buffer size if it's set from command line */ + if (rcvbufsize) { + netlink_recvbuf(&zns->netlink, rcvbufsize); + netlink_recvbuf(&zns->netlink_cmd, rcvbufsize); + netlink_recvbuf(&zns->netlink_dplane_out, rcvbufsize); + netlink_recvbuf(&zns->netlink_dplane_in, rcvbufsize); + } + + /* Set filter for inbound sockets, to exclude events we've generated + * ourselves. + */ + netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid, + zns->netlink_dplane_out.snl.nl_pid); + + netlink_install_filter(zns->netlink_dplane_in.sock, + zns->netlink_cmd.snl.nl_pid, + zns->netlink_dplane_out.snl.nl_pid); + + zns->t_netlink = NULL; + + thread_add_read(zrouter.master, kernel_read, zns, + zns->netlink.sock, &zns->t_netlink); + + rt_netlink_init(); +} + +/* Helper to clean up an nlsock */ +static void kernel_nlsock_fini(struct nlsock *nls) +{ + if (nls && nls->sock >= 0) { + kernel_netlink_nlsock_remove(nls); + close(nls->sock); + nls->sock = -1; + XFREE(MTYPE_NL_BUF, nls->buf); + nls->buflen = 0; + } +} + +void kernel_terminate(struct zebra_ns *zns, bool complete) +{ + THREAD_OFF(zns->t_netlink); + + kernel_nlsock_fini(&zns->netlink); + + kernel_nlsock_fini(&zns->netlink_cmd); + + kernel_nlsock_fini(&zns->netlink_dplane_in); + + /* During zebra shutdown, we need to leave the dataplane socket + * around until all work is done. + */ + if (complete) + kernel_nlsock_fini(&zns->netlink_dplane_out); +} + +/* + * Global init for platform-/OS-specific things + */ +void kernel_router_init(void) +{ + /* Init nlsock hash and lock */ + pthread_mutex_init(&nlsock_mutex, NULL); + nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key, + kernel_netlink_nlsock_hash_equal, + "Netlink Socket Hash"); +} + +/* + * Global deinit for platform-/OS-specific things + */ +void kernel_router_terminate(void) +{ + pthread_mutex_destroy(&nlsock_mutex); + + hash_free(nlsock_hash); + nlsock_hash = NULL; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h new file mode 100644 index 0000000..08cd706 --- /dev/null +++ b/zebra/kernel_netlink.h @@ -0,0 +1,193 @@ +/* Declarations and definitions for kernel interaction over netlink + * Copyright (C) 2016 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_KERNEL_NETLINK_H +#define _ZEBRA_KERNEL_NETLINK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_NETLINK + +#define RTM_NHA(h) \ + ((struct rtattr *)(((char *)(h)) + NLMSG_ALIGN(sizeof(struct nhmsg)))) + + +#define NL_RCV_PKT_BUF_SIZE 32768 +#define NL_PKT_BUF_SIZE 8192 + +/* + * nl_attr_put - add an attribute to the Netlink message. + * + * Returns true if the attribute could be added to the message (fits into the + * buffer), otherwise false is returned. + */ +extern bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type, + const void *data, unsigned int alen); +extern bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type, + uint8_t data); +extern bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type, + uint16_t data); +extern bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type, + uint32_t data); + +/* + * nl_attr_nest - start an attribute nest. + * + * Returns a valid pointer to the beginning of the nest if the attribute + * describing the nest could be added to the message (fits into the buffer), + * otherwise NULL is returned. + */ +extern struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, + int type); + +/* + * nl_attr_nest_end - finalize nesting of attributes. + * + * Updates the length field of the attribute header to include the appeneded + * attributes. Returns a total length of the Netlink message. + */ +extern int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest); + +/* + * nl_attr_rtnh - append a rtnexthop record to the Netlink message. + * + * Returns a valid pointer to the rtnexthop struct if it could be added to + * the message (fits into the buffer), otherwise NULL is returned. + */ +extern struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen); + +/* + * nl_attr_rtnh_end - finalize adding a rtnexthop record. + * + * Updates the length field of the rtnexthop to include the appeneded + * attributes. + */ +extern void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh); + +extern void netlink_parse_rtattr(struct rtattr **tb, int max, + struct rtattr *rta, int len); +extern void netlink_parse_rtattr_flags(struct rtattr **tb, int max, + struct rtattr *rta, int len, + unsigned short flags); +extern void netlink_parse_rtattr_nested(struct rtattr **tb, int max, + struct rtattr *rta); +/* + * nl_addraw_l copies raw form the netlink message buffer into netlink + * message header pointer. It ensures the aligned data buffer does not + * override past max length. + * return value is 0 if its successful + */ +extern bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, + const void *data, unsigned int len); +/* + * nl_rta_put - add an additional optional attribute(rtattr) to the + * Netlink message buffer. + * + * Returns true if the attribute could be added to the message (fits into the + * buffer), otherwise false is returned. + */ +extern bool nl_rta_put(struct rtattr *rta, unsigned int maxlen, int type, + const void *data, int alen); +extern bool nl_rta_put16(struct rtattr *rta, unsigned int maxlen, int type, + uint16_t data); +extern bool nl_rta_put64(struct rtattr *rta, unsigned int maxlen, int type, + uint64_t data); +/* + * nl_rta_nest - start an additional optional attribute (rtattr) nest. + * + * Returns a valid pointer to the beginning of the nest if the attribute + * describing the nest could be added to the message (fits into the buffer), + * otherwise NULL is returned. + */ +extern struct rtattr *nl_rta_nest(struct rtattr *rta, unsigned int maxlen, + int type); +/* + * nl_rta_nest_end - finalize nesting of an aditionl optionl attributes. + * + * Updates the length field of the attribute header to include the appeneded + * attributes. Returns a total length of the Netlink message. + */ +extern int nl_rta_nest_end(struct rtattr *rta, struct rtattr *nest); +extern const char *nl_msg_type_to_str(uint16_t msg_type); +extern const char *nl_rtproto_to_str(uint8_t rtproto); +extern const char *nl_family_to_str(uint8_t family); +extern const char *nl_rttype_to_str(uint8_t rttype); + +extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), + struct nlsock *nl, + const struct zebra_dplane_info *dp_info, + int count, bool startup); +extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup); +extern int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, struct nlsock *nl, + struct zebra_ns *zns, bool startup); +extern int netlink_request(struct nlsock *nl, void *req); + +enum netlink_msg_status { + FRR_NETLINK_SUCCESS, + FRR_NETLINK_ERROR, + FRR_NETLINK_QUEUED, +}; + +struct nl_batch; + +/* + * netlink_batch_add_msg - add message to the netlink batch using dplane + * context object. + * + * @ctx: Dataplane context + * @msg_encoder: A function that encodes dplane context object into + * netlink message. Should take dplane context object, + * pointer to a buffer and buffer's length as parameters + * and should return -1 on error, 0 on buffer overflow or + * size of the encoded message. + * @ignore_res: Whether the result of this message should be ignored. + * This should be used in some 'update' cases where we + * need to send two messages for one context object. + * + * Return: Status of the message. + */ +extern enum netlink_msg_status netlink_batch_add_msg( + struct nl_batch *bth, struct zebra_dplane_ctx *ctx, + ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t), + bool ignore_res); + +/* + * Vty/cli apis + */ +extern int netlink_config_write_helper(struct vty *vty); + +/* + * Configure size of the batch buffer and sending threshold. If 'unset', reset + * to default value. + */ +extern void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, + bool set); + +extern struct nlsock *kernel_netlink_nlsock_lookup(int sock); +#endif /* HAVE_NETLINK */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_KERNEL_NETLINK_H */ diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c new file mode 100644 index 0000000..e76d8c0 --- /dev/null +++ b/zebra/kernel_socket.c @@ -0,0 +1,1644 @@ +/* Kernel communication using routing socket. + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#include <net/if_types.h> +#ifdef __OpenBSD__ +#include <netmpls/mpls.h> +#endif + +#include "if.h" +#include "prefix.h" +#include "sockunion.h" +#include "connected.h" +#include "memory.h" +#include "ioctl.h" +#include "log.h" +#include "table.h" +#include "rib.h" +#include "privs.h" +#include "vrf.h" +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/kernel_socket.h" +#include "zebra/rib.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_ptm.h" + +extern struct zebra_privs_t zserv_privs; + +/* + * Historically, the BSD routing socket has aligned data following a + * struct sockaddr to sizeof(long), which was 4 bytes on some + * platforms, and 8 bytes on others. NetBSD 6 changed the routing + * socket to align to sizeof(uint64_t), which is 8 bytes. OS X + * appears to align to sizeof(int), which is 4 bytes. + * + * Alignment of zero-sized sockaddrs is nonsensical, but historically + * BSD defines RT_ROUNDUP(0) to be the alignment interval (rather than + * 0). We follow this practice without questioning it, but it is a + * bug if frr calls ROUNDUP with 0. + */ +#ifdef __APPLE__ +#define ROUNDUP_TYPE int +#else +#define ROUNDUP_TYPE long +#endif + +/* + * Because of these varying conventions, the only sane approach is for + * the <net/route.h> header to define some flavor of ROUNDUP macro. + */ + +/* OS X (Xcode as of 2014-12) is known not to define RT_ROUNDUP */ +#if defined(RT_ROUNDUP) +#define ROUNDUP(a) RT_ROUNDUP(a) +#endif /* defined(RT_ROUNDUP) */ + +/* + * If ROUNDUP has not yet been defined in terms of platform-provided + * defines, attempt to cope with heuristics. + */ +#if !defined(ROUNDUP) + +/* + * If you're porting to a platform that changed RT_ROUNDUP but doesn't + * have it in its headers, this will break rather obviously and you'll + * have to fix it here. + */ +#define ROUNDUP(a) \ + ((a) > 0 ? (1 + (((a)-1) | (sizeof(ROUNDUP_TYPE) - 1))) \ + : sizeof(ROUNDUP_TYPE)) + +#endif /* defined(ROUNDUP) */ + + +#if defined(SA_SIZE) +/* SAROUNDUP is the only thing we need, and SA_SIZE provides that */ +#define SAROUNDUP(a) SA_SIZE(a) +#else /* !SA_SIZE */ +/* + * Given a pointer (sockaddr or void *), return the number of bytes + * taken up by the sockaddr and any padding needed for alignment. + */ +#if defined(HAVE_STRUCT_SOCKADDR_SA_LEN) +#define SAROUNDUP(X) ROUNDUP(((struct sockaddr *)(X))->sa_len) +#else +/* + * One would hope all fixed-size structure definitions are aligned, + * but round them up nonetheless. + */ +#define SAROUNDUP(X) \ + (((struct sockaddr *)(X))->sa_family == AF_INET \ + ? ROUNDUP(sizeof(struct sockaddr_in)) \ + : (((struct sockaddr *)(X))->sa_family == AF_INET6 \ + ? ROUNDUP(sizeof(struct sockaddr_in6)) \ + : (((struct sockaddr *)(X))->sa_family == AF_LINK \ + ? ROUNDUP(sizeof(struct sockaddr_dl)) \ + : sizeof(struct sockaddr)))) +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + +#endif /* !SA_SIZE */ + +/* Routing socket message types. */ +const struct message rtm_type_str[] = {{RTM_ADD, "RTM_ADD"}, + {RTM_DELETE, "RTM_DELETE"}, + {RTM_CHANGE, "RTM_CHANGE"}, + {RTM_GET, "RTM_GET"}, + {RTM_LOSING, "RTM_LOSING"}, + {RTM_REDIRECT, "RTM_REDIRECT"}, + {RTM_MISS, "RTM_MISS"}, +#ifdef RTM_LOCK + {RTM_LOCK, "RTM_LOCK"}, +#endif /* RTM_LOCK */ +#ifdef OLDADD + {RTM_OLDADD, "RTM_OLDADD"}, +#endif /* RTM_OLDADD */ +#ifdef RTM_OLDDEL + {RTM_OLDDEL, "RTM_OLDDEL"}, +#endif /* RTM_OLDDEL */ +#ifdef RTM_RESOLVE + {RTM_RESOLVE, "RTM_RESOLVE"}, +#endif /* RTM_RESOLVE */ + {RTM_NEWADDR, "RTM_NEWADDR"}, + {RTM_DELADDR, "RTM_DELADDR"}, + {RTM_IFINFO, "RTM_IFINFO"}, +#ifdef RTM_OIFINFO + {RTM_OIFINFO, "RTM_OIFINFO"}, +#endif /* RTM_OIFINFO */ +#ifdef RTM_NEWMADDR + {RTM_NEWMADDR, "RTM_NEWMADDR"}, +#endif /* RTM_NEWMADDR */ +#ifdef RTM_DELMADDR + {RTM_DELMADDR, "RTM_DELMADDR"}, +#endif /* RTM_DELMADDR */ +#ifdef RTM_IFANNOUNCE + {RTM_IFANNOUNCE, "RTM_IFANNOUNCE"}, +#endif /* RTM_IFANNOUNCE */ +#ifdef RTM_IEEE80211 + {RTM_IEEE80211, "RTM_IEEE80211"}, +#endif + {0}}; + +static const struct message rtm_flag_str[] = {{RTF_UP, "UP"}, + {RTF_GATEWAY, "GATEWAY"}, + {RTF_HOST, "HOST"}, + {RTF_REJECT, "REJECT"}, + {RTF_DYNAMIC, "DYNAMIC"}, + {RTF_MODIFIED, "MODIFIED"}, + {RTF_DONE, "DONE"}, +#ifdef RTF_MASK + {RTF_MASK, "MASK"}, +#endif /* RTF_MASK */ +#ifdef RTF_CLONING + {RTF_CLONING, "CLONING"}, +#endif /* RTF_CLONING */ +#ifdef RTF_XRESOLVE + {RTF_XRESOLVE, "XRESOLVE"}, +#endif /* RTF_XRESOLVE */ +#ifdef RTF_LLINFO + {RTF_LLINFO, "LLINFO"}, +#endif /* RTF_LLINFO */ + {RTF_STATIC, "STATIC"}, + {RTF_BLACKHOLE, "BLACKHOLE"}, +#ifdef RTF_PRIVATE + {RTF_PRIVATE, "PRIVATE"}, +#endif /* RTF_PRIVATE */ + {RTF_PROTO1, "PROTO1"}, + {RTF_PROTO2, "PROTO2"}, +#ifdef RTF_PRCLONING + {RTF_PRCLONING, "PRCLONING"}, +#endif /* RTF_PRCLONING */ +#ifdef RTF_WASCLONED + {RTF_WASCLONED, "WASCLONED"}, +#endif /* RTF_WASCLONED */ +#ifdef RTF_PROTO3 + {RTF_PROTO3, "PROTO3"}, +#endif /* RTF_PROTO3 */ +#ifdef RTF_PINNED + {RTF_PINNED, "PINNED"}, +#endif /* RTF_PINNED */ +#ifdef RTF_LOCAL + {RTF_LOCAL, "LOCAL"}, +#endif /* RTF_LOCAL */ +#ifdef RTF_BROADCAST + {RTF_BROADCAST, "BROADCAST"}, +#endif /* RTF_BROADCAST */ +#ifdef RTF_MULTICAST + {RTF_MULTICAST, "MULTICAST"}, +#endif /* RTF_MULTICAST */ +#ifdef RTF_MULTIRT + {RTF_MULTIRT, "MULTIRT"}, +#endif /* RTF_MULTIRT */ +#ifdef RTF_SETSRC + {RTF_SETSRC, "SETSRC"}, +#endif /* RTF_SETSRC */ + {0}}; + +/* Kernel routing update socket. */ +int routing_sock = -1; + +/* Kernel dataplane routing update socket, used in the dataplane pthread + * context. + */ +int dplane_routing_sock = -1; + +/* Yes I'm checking ugly routing socket behavior. */ +/* #define DEBUG */ + +size_t _rta_get(caddr_t sap, void *destp, size_t destlen, bool checkaf); +size_t rta_get(caddr_t sap, void *dest, size_t destlen); +size_t rta_getattr(caddr_t sap, void *destp, size_t destlen); +size_t rta_getsdlname(caddr_t sap, void *dest, short *destlen); +const char *rtatostr(unsigned int flags, char *buf, size_t buflen); + +/* Supported address family check. */ +static inline int af_check(int family) +{ + if (family == AF_INET) + return 1; + if (family == AF_INET6) + return 1; + return 0; +} + +size_t _rta_get(caddr_t sap, void *destp, size_t destlen, bool checkaf) +{ + struct sockaddr *sa = (struct sockaddr *)sap; + struct sockaddr_dl *sdl; + uint8_t *dest = destp; + size_t tlen, copylen; + +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + copylen = sa->sa_len; + tlen = (copylen == 0) ? sizeof(ROUNDUP_TYPE) : ROUNDUP(copylen); +#else /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + copylen = tlen = SAROUNDUP(sap); +#endif /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + + if (copylen > 0 && dest != NULL) { + if (checkaf && af_check(sa->sa_family) == 0) + return tlen; + /* + * Handle sockaddr_dl corner case: + * RTA_NETMASK might be AF_LINK, but it doesn't anything + * relevant (e.g. zeroed out fields). Check for this + * case and avoid warning log message. + */ + if (sa->sa_family == AF_LINK) { + sdl = (struct sockaddr_dl *)sa; + if (sdl->sdl_index == 0 || sdl->sdl_nlen == 0) + copylen = destlen; + } + + if (copylen > destlen) { + zlog_warn( + "%s: destination buffer too small (%zu vs %zu)", + __func__, copylen, destlen); + memcpy(dest, sap, destlen); + } else + memcpy(dest, sap, copylen); + } + + return tlen; +} + +size_t rta_get(caddr_t sap, void *destp, size_t destlen) +{ + return _rta_get(sap, destp, destlen, true); +} + +size_t rta_getattr(caddr_t sap, void *destp, size_t destlen) +{ + return _rta_get(sap, destp, destlen, false); +} + +size_t rta_getsdlname(caddr_t sap, void *destp, short *destlen) +{ + struct sockaddr_dl *sdl = (struct sockaddr_dl *)sap; + uint8_t *dest = destp; + size_t tlen, copylen; + + copylen = sdl->sdl_nlen; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + struct sockaddr *sa = (struct sockaddr *)sap; + + tlen = (sa->sa_len == 0) ? sizeof(ROUNDUP_TYPE) : ROUNDUP(sa->sa_len); +#else /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + tlen = SAROUNDUP(sap); +#endif /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + + if (copylen > 0 && dest != NULL && sdl->sdl_family == AF_LINK) { + if (copylen > IFNAMSIZ) { + zlog_warn( + "%s: destination buffer too small (%zu vs %d)", + __func__, copylen, IFNAMSIZ); + memcpy(dest, sdl->sdl_data, IFNAMSIZ); + dest[IFNAMSIZ] = 0; + *destlen = IFNAMSIZ; + } else { + memcpy(dest, sdl->sdl_data, copylen); + dest[copylen] = 0; + *destlen = copylen; + } + } else + *destlen = 0; + + return tlen; +} + +const char *rtatostr(unsigned int flags, char *buf, size_t buflen) +{ + const char *flagstr, *bufstart; + int bit, wlen; + char ustr[32]; + + /* Hold the pointer to the buffer beginning. */ + bufstart = buf; + + for (bit = 1; bit; bit <<= 1) { + if ((flags & bit) == 0) + continue; + + switch (bit) { + case RTA_DST: + flagstr = "DST"; + break; + case RTA_GATEWAY: + flagstr = "GATEWAY"; + break; + case RTA_NETMASK: + flagstr = "NETMASK"; + break; +#ifdef RTA_GENMASK + case RTA_GENMASK: + flagstr = "GENMASK"; + break; +#endif /* RTA_GENMASK */ + case RTA_IFP: + flagstr = "IFP"; + break; + case RTA_IFA: + flagstr = "IFA"; + break; +#ifdef RTA_AUTHOR + case RTA_AUTHOR: + flagstr = "AUTHOR"; + break; +#endif /* RTA_AUTHOR */ + case RTA_BRD: + flagstr = "BRD"; + break; +#ifdef RTA_SRC + case RTA_SRC: + flagstr = "SRC"; + break; +#endif /* RTA_SRC */ +#ifdef RTA_SRCMASK + case RTA_SRCMASK: + flagstr = "SRCMASK"; + break; +#endif /* RTA_SRCMASK */ +#ifdef RTA_LABEL + case RTA_LABEL: + flagstr = "LABEL"; + break; +#endif /* RTA_LABEL */ + + default: + snprintf(ustr, sizeof(ustr), "0x%x", bit); + flagstr = ustr; + break; + } + + wlen = snprintf(buf, buflen, "%s,", flagstr); + buf += wlen; + buflen -= wlen; + } + + /* Check for empty buffer. */ + if (bufstart != buf) + buf--; + + /* Remove the last comma. */ + *buf = 0; + + return bufstart; +} + +/* Dump routing table flag for debug purpose. */ +static void rtm_flag_dump(int flag) +{ + const struct message *mes; + static char buf[BUFSIZ]; + + buf[0] = '\0'; + for (mes = rtm_flag_str; mes->key != 0; mes++) { + if (mes->key & flag) { + strlcat(buf, mes->str, BUFSIZ); + strlcat(buf, " ", BUFSIZ); + } + } + zlog_debug("Kernel: %s", buf); +} + +#ifdef RTM_IFANNOUNCE +/* Interface adding function */ +static int ifan_read(struct if_announcemsghdr *ifan) +{ + struct interface *ifp; + + ifp = if_lookup_by_index(ifan->ifan_index, VRF_DEFAULT); + + if (ifp) + assert((ifp->ifindex == ifan->ifan_index) + || (ifp->ifindex == IFINDEX_INTERNAL)); + + if ((ifp == NULL) || ((ifp->ifindex == IFINDEX_INTERNAL) + && (ifan->ifan_what == IFAN_ARRIVAL))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: creating interface for ifindex %d, name %s", + __func__, ifan->ifan_index, ifan->ifan_name); + + /* Create Interface */ + ifp = if_get_by_name(ifan->ifan_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_set_index(ifp, ifan->ifan_index); + + if_get_metric(ifp); + if_add_update(ifp); + } else if (ifp != NULL && ifan->ifan_what == IFAN_DEPARTURE) + if_delete_update(&ifp); + + if (ifp) { + if_get_flags(ifp); + if_get_mtu(ifp); + if_get_metric(ifp); + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: interface %s index %d", __func__, + ifan->ifan_name, ifan->ifan_index); + + return 0; +} +#endif /* RTM_IFANNOUNCE */ + +#ifdef HAVE_BSD_IFI_LINK_STATE +/* BSD link detect translation */ +static void bsd_linkdetect_translate(struct if_msghdr *ifm) +{ + if ((ifm->ifm_data.ifi_link_state >= LINK_STATE_UP) + || (ifm->ifm_data.ifi_link_state == LINK_STATE_UNKNOWN)) + SET_FLAG(ifm->ifm_flags, IFF_RUNNING); + else + UNSET_FLAG(ifm->ifm_flags, IFF_RUNNING); +} +#endif /* HAVE_BSD_IFI_LINK_STATE */ + +static enum zebra_link_type sdl_to_zebra_link_type(unsigned int sdlt) +{ + switch (sdlt) { + case IFT_ETHER: + return ZEBRA_LLT_ETHER; + case IFT_X25: + return ZEBRA_LLT_X25; + case IFT_FDDI: + return ZEBRA_LLT_FDDI; + case IFT_PPP: + return ZEBRA_LLT_PPP; + case IFT_LOOP: + return ZEBRA_LLT_LOOPBACK; + case IFT_SLIP: + return ZEBRA_LLT_SLIP; + case IFT_ARCNET: + return ZEBRA_LLT_ARCNET; + case IFT_ATM: + return ZEBRA_LLT_ATM; + case IFT_LOCALTALK: + return ZEBRA_LLT_LOCALTLK; + case IFT_HIPPI: + return ZEBRA_LLT_HIPPI; +#ifdef IFT_IEEE1394 + case IFT_IEEE1394: + return ZEBRA_LLT_IEEE1394; +#endif + + default: + return ZEBRA_LLT_UNKNOWN; + } +} + +/* + * Handle struct if_msghdr obtained from reading routing socket or + * sysctl (from interface_list). There may or may not be sockaddrs + * present after the header. + */ +int ifm_read(struct if_msghdr *ifm) +{ + struct interface *ifp = NULL; + struct sockaddr_dl *sdl = NULL; + char ifname[IFNAMSIZ]; + short ifnlen = 0; + int maskbit; + caddr_t cp; + char fbuf[64]; + + /* terminate ifname at head (for strnlen) and tail (for safety) */ + ifname[IFNAMSIZ - 1] = '\0'; + + /* paranoia: sanity check structure */ + if (ifm->ifm_msglen < sizeof(struct if_msghdr)) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s: ifm->ifm_msglen %d too short", __func__, + ifm->ifm_msglen); + return -1; + } + + /* + * Check for a sockaddr_dl following the message. First, point to + * where a socakddr might be if one follows the message. + */ + cp = (void *)(ifm + 1); + + /* Look up for RTA_IFP and skip others. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & ifm->ifm_addrs) == 0) + continue; + if (maskbit != RTA_IFP) { + cp += rta_get(cp, NULL, 0); + continue; + } + + /* Save the pointer to the structure. */ + sdl = (struct sockaddr_dl *)cp; + cp += rta_getsdlname(cp, ifname, &ifnlen); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: sdl ifname %s addrs {%s}", __func__, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifm_addrs, fbuf, sizeof(fbuf))); + + /* + * Look up on ifindex first, because ifindices are the primary handle + * for + * interfaces across the user/kernel boundary, for most systems. (Some + * messages, such as up/down status changes on NetBSD, do not include a + * sockaddr_dl). + */ + if ((ifp = if_lookup_by_index(ifm->ifm_index, VRF_DEFAULT)) != NULL) { + /* we have an ifp, verify that the name matches as some systems, + * eg Solaris, have a 1:many association of ifindex:ifname + * if they dont match, we dont have the correct ifp and should + * set it back to NULL to let next check do lookup by name + */ + if (ifnlen && (strncmp(ifp->name, ifname, IFNAMSIZ) != 0)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: ifp name %s doesn't match sdl name %s", + __func__, ifp->name, ifname); + ifp = NULL; + } + } + + /* + * If we dont have an ifp, try looking up by name. Particularly as some + * systems (Solaris) have a 1:many mapping of ifindex:ifname - the + * ifname + * is therefore our unique handle to that interface. + * + * Interfaces specified in the configuration file for which the ifindex + * has not been determined will have ifindex == IFINDEX_INTERNAL, and + * such + * interfaces are found by this search, and then their ifindex values + * can + * be filled in. + */ + if ((ifp == NULL) && ifnlen) + ifp = if_lookup_by_name(ifname, VRF_DEFAULT); + + /* + * If ifp still does not exist or has an invalid index + * (IFINDEX_INTERNAL), + * create or fill in an interface. + */ + if ((ifp == NULL) || (ifp->ifindex == IFINDEX_INTERNAL)) { + /* + * To create or fill in an interface, a sockaddr_dl (via + * RTA_IFP) is required. + */ + if (!ifnlen) { + zlog_debug("Interface index %d (new) missing ifname", + ifm->ifm_index); + return -1; + } + +#ifndef RTM_IFANNOUNCE + /* Down->Down interface should be ignored here. + * See further comment below. + */ + if (!CHECK_FLAG(ifm->ifm_flags, IFF_UP)) + return 0; +#endif /* !RTM_IFANNOUNCE */ + + if (ifp == NULL) { + /* Interface that zebra was not previously aware of, so + * create. */ + ifp = if_get_by_name(ifname, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: creating ifp for ifindex %d", + __func__, ifm->ifm_index); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: updated/created ifp, ifname %s, ifindex %d", + __func__, ifp->name, ifp->ifindex); + /* + * Fill in newly created interface structure, or larval + * structure with ifindex IFINDEX_INTERNAL. + */ + if_set_index(ifp, ifm->ifm_index); + +#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */ + bsd_linkdetect_translate(ifm); +#endif /* HAVE_BSD_IFI_LINK_STATE */ + + if_flags_update(ifp, ifm->ifm_flags); +#if defined(__bsdi__) + if_kvm_get_mtu(ifp); +#else + if_get_mtu(ifp); +#endif /* __bsdi__ */ + if_get_metric(ifp); + + /* + * XXX sockaddr_dl contents can be larger than the structure + * definition. There are 2 big families here: + * - BSD has sdl_len + sdl_data[16] + overruns sdl_data + * we MUST use sdl_len here or we'll truncate data. + * - Solaris has no sdl_len, but sdl_data[244] + * presumably, it's not going to run past that, so sizeof() + * is fine here. + * a nonzero ifnlen from rta_getsdlname() means sdl is valid + */ + ifp->ll_type = ZEBRA_LLT_UNKNOWN; + ifp->hw_addr_len = 0; + if (ifnlen) { +#ifdef HAVE_STRUCT_SOCKADDR_DL_SDL_LEN + memcpy(&((struct zebra_if *)ifp->info)->sdl, sdl, + sdl->sdl_len); +#else + memcpy(&((struct zebra_if *)ifp->info)->sdl, sdl, + sizeof(struct sockaddr_dl)); +#endif /* HAVE_STRUCT_SOCKADDR_DL_SDL_LEN */ + + ifp->ll_type = sdl_to_zebra_link_type(sdl->sdl_type); + if (sdl->sdl_alen <= sizeof(ifp->hw_addr)) { + memcpy(ifp->hw_addr, LLADDR(sdl), + sdl->sdl_alen); + ifp->hw_addr_len = sdl->sdl_alen; + } + } + + if_add_update(ifp); + } else + /* + * Interface structure exists. Adjust stored flags from + * notification. If interface has up->down or down->up + * transition, call state change routines (to adjust routes, + * notify routing daemons, etc.). (Other flag changes are stored + * but apparently do not trigger action.) + */ + { + if (ifp->ifindex != ifm->ifm_index) { + zlog_debug( + "%s: index mismatch, ifname %s, ifp index %d, ifm index %d", + __func__, ifp->name, ifp->ifindex, + ifm->ifm_index); + return -1; + } + +#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */ + bsd_linkdetect_translate(ifm); +#endif /* HAVE_BSD_IFI_LINK_STATE */ + + /* update flags and handle operative->inoperative transition, if + * any */ + if_flags_update(ifp, ifm->ifm_flags); + +#ifndef RTM_IFANNOUNCE + if (!if_is_up(ifp)) { + /* No RTM_IFANNOUNCE on this platform, so we can never + * distinguish between ~IFF_UP and delete. We must + * presume + * it has been deleted. + * Eg, Solaris will not notify us of unplumb. + * + * XXX: Fixme - this should be runtime detected + * So that a binary compiled on a system with IFANNOUNCE + * will still behave correctly if run on a platform + * without + */ + if_delete_update(&ifp); + } +#endif /* RTM_IFANNOUNCE */ + if (ifp && if_is_up(ifp)) { +#if defined(__bsdi__) + if_kvm_get_mtu(ifp); +#else + if_get_mtu(ifp); +#endif /* __bsdi__ */ + if_get_metric(ifp); + } + } + + if (ifp) { +#ifdef HAVE_NET_RT_IFLIST + ifp->stats = ifm->ifm_data; +#endif /* HAVE_NET_RT_IFLIST */ + ifp->speed = ifm->ifm_data.ifi_baudrate / 1000000; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: interface %s index %d", __func__, + ifp->name, ifp->ifindex); + } + + return 0; +} + +/* Address read from struct ifa_msghdr. */ +static void ifam_read_mesg(struct ifa_msghdr *ifm, union sockunion *addr, + union sockunion *mask, union sockunion *brd, + char *ifname, short *ifnlen) +{ + caddr_t pnt, end; + union sockunion dst; + union sockunion gateway; + int maskbit; + char fbuf[64]; + + pnt = (caddr_t)(ifm + 1); + end = ((caddr_t)ifm) + ifm->ifam_msglen; + + /* Be sure structure is cleared */ + memset(mask, 0, sizeof(union sockunion)); + memset(addr, 0, sizeof(union sockunion)); + memset(brd, 0, sizeof(union sockunion)); + memset(&dst, 0, sizeof(union sockunion)); + memset(&gateway, 0, sizeof(union sockunion)); + + /* We fetch each socket variable into sockunion. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & ifm->ifam_addrs) == 0) + continue; + + switch (maskbit) { + case RTA_DST: + pnt += rta_get(pnt, &dst, sizeof(dst)); + break; + case RTA_GATEWAY: + pnt += rta_get(pnt, &gateway, sizeof(gateway)); + break; + case RTA_NETMASK: + pnt += rta_getattr(pnt, mask, sizeof(*mask)); + break; + case RTA_IFP: + pnt += rta_getsdlname(pnt, ifname, ifnlen); + break; + case RTA_IFA: + pnt += rta_get(pnt, addr, sizeof(*addr)); + break; + case RTA_BRD: + pnt += rta_get(pnt, brd, sizeof(*brd)); + break; + + default: + pnt += rta_get(pnt, NULL, 0); + break; + } + + if (pnt > end) { + zlog_warn("%s: overflow detected (pnt:%p end:%p)", + __func__, pnt, end); + break; + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + switch (sockunion_family(addr)) { + case AF_INET: + case AF_INET6: { + int masklen = + (sockunion_family(addr) == AF_INET) + ? ip_masklen(mask->sin.sin_addr) + : ip6_masklen(mask->sin6.sin6_addr); + zlog_debug( + "%s: ifindex %d, ifname %s, ifam_addrs {%s}, ifam_flags 0x%x, addr %pSU/%d broad %pSU dst %pSU gateway %pSU", + __func__, ifm->ifam_index, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifam_addrs, fbuf, sizeof(fbuf)), + ifm->ifam_flags, addr, masklen, brd, &dst, + &gateway); + } break; + default: + zlog_debug("%s: ifindex %d, ifname %s, ifam_addrs {%s}", + __func__, ifm->ifam_index, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifam_addrs, fbuf, + sizeof(fbuf))); + break; + } + } + + /* Assert read up end point matches to end point */ + pnt = (caddr_t)ROUNDUP((size_t)pnt); + if (pnt != (caddr_t)ROUNDUP((size_t)end)) + zlog_debug("ifam_read() doesn't read all socket data"); +} + +/* Interface's address information get. */ +int ifam_read(struct ifa_msghdr *ifam) +{ + struct interface *ifp = NULL; + union sockunion addr, mask, brd; + bool dest_same = false; + char ifname[INTERFACE_NAMSIZ]; + short ifnlen = 0; + bool isalias = false; + uint32_t flags = 0; + + ifname[0] = ifname[INTERFACE_NAMSIZ - 1] = '\0'; + + /* Allocate and read address information. */ + ifam_read_mesg(ifam, &addr, &mask, &brd, ifname, &ifnlen); + + if ((ifp = if_lookup_by_index(ifam->ifam_index, VRF_DEFAULT)) == NULL) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: no interface for ifname %s, index %d", __func__, + ifname, ifam->ifam_index); + return -1; + } + + if (ifnlen && strncmp(ifp->name, ifname, INTERFACE_NAMSIZ)) + isalias = true; + + /* + * Mark the alias prefixes as secondary + */ + if (isalias) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* N.B. The info in ifa_msghdr does not tell us whether the RTA_BRD + field contains a broadcast address or a peer address, so we are + forced to + rely upon the interface type. */ + if (if_is_pointopoint(ifp)) + SET_FLAG(flags, ZEBRA_IFA_PEER); + else { + if (memcmp(&addr, &brd, sizeof(addr)) == 0) + dest_same = true; + } + +#if 0 + /* it might seem cute to grab the interface metric here, however + * we're processing an address update message, and so some systems + * (e.g. FBSD) dont bother to fill in ifam_metric. Disabled, but left + * in deliberately, as comment. + */ + ifp->metric = ifam->ifam_metric; +#endif + + /* Add connected address. */ + switch (sockunion_family(&addr)) { + case AF_INET: + if (ifam->ifam_type == RTM_NEWADDR) + connected_add_ipv4(ifp, flags, &addr.sin.sin_addr, + ip_masklen(mask.sin.sin_addr), + dest_same ? NULL : &brd.sin.sin_addr, + (isalias ? ifname : NULL), + METRIC_MAX); + else + connected_delete_ipv4(ifp, flags, &addr.sin.sin_addr, + ip_masklen(mask.sin.sin_addr), + dest_same ? NULL + : &brd.sin.sin_addr); + break; + case AF_INET6: + /* Unset interface index from link-local address when IPv6 stack + is KAME. */ + if (IN6_IS_ADDR_LINKLOCAL(&addr.sin6.sin6_addr)) { + SET_IN6_LINKLOCAL_IFINDEX(addr.sin6.sin6_addr, 0); + } + + if (ifam->ifam_type == RTM_NEWADDR) + connected_add_ipv6(ifp, flags, &addr.sin6.sin6_addr, + NULL, + ip6_masklen(mask.sin6.sin6_addr), + (isalias ? ifname : NULL), + METRIC_MAX); + else + connected_delete_ipv6(ifp, &addr.sin6.sin6_addr, NULL, + ip6_masklen(mask.sin6.sin6_addr)); + break; + default: + /* Unsupported family silently ignore... */ + break; + } + + /* Check interface flag for implicit up of the interface. */ + if_refresh(ifp); + + return 0; +} + +/* Interface function for reading kernel routing table information. */ +static int rtm_read_mesg(struct rt_msghdr *rtm, union sockunion *dest, + union sockunion *mask, union sockunion *gate, + char *ifname, short *ifnlen) +{ + caddr_t pnt, end; + int maskbit; + + /* Pnt points out socket data start point. */ + pnt = (caddr_t)(rtm + 1); + end = ((caddr_t)rtm) + rtm->rtm_msglen; + + /* rt_msghdr version check. */ + if (rtm->rtm_version != RTM_VERSION) + flog_warn(EC_ZEBRA_RTM_VERSION_MISMATCH, + "Routing message version different %d should be %d.This may cause problem", + rtm->rtm_version, RTM_VERSION); + + /* Be sure structure is cleared */ + memset(dest, 0, sizeof(union sockunion)); + memset(gate, 0, sizeof(union sockunion)); + memset(mask, 0, sizeof(union sockunion)); + + /* We fetch each socket variable into sockunion. */ + /* We fetch each socket variable into sockunion. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & rtm->rtm_addrs) == 0) + continue; + + switch (maskbit) { + case RTA_DST: + pnt += rta_get(pnt, dest, sizeof(*dest)); + break; + case RTA_GATEWAY: + pnt += rta_get(pnt, gate, sizeof(*gate)); + break; + case RTA_NETMASK: + pnt += rta_getattr(pnt, mask, sizeof(*mask)); + break; + case RTA_IFP: + pnt += rta_getsdlname(pnt, ifname, ifnlen); + break; + + default: + pnt += rta_get(pnt, NULL, 0); + break; + } + + if (pnt > end) { + zlog_warn("%s: overflow detected (pnt:%p end:%p)", + __func__, pnt, end); + break; + } + } + + /* If there is netmask information set it's family same as + destination family*/ + if (rtm->rtm_addrs & RTA_NETMASK) + mask->sa.sa_family = dest->sa.sa_family; + + /* Assert read up to the end of pointer. */ + if (pnt != end) + zlog_debug("rtm_read() doesn't read all socket data."); + + return rtm->rtm_flags; +} + +void rtm_read(struct rt_msghdr *rtm) +{ + int flags; + uint32_t zebra_flags; + union sockunion dest, mask, gate; + char ifname[INTERFACE_NAMSIZ + 1]; + short ifnlen = 0; + struct nexthop nh; + struct prefix p; + ifindex_t ifindex = 0; + afi_t afi; + char fbuf[64]; + int32_t proto = ZEBRA_ROUTE_KERNEL; + uint8_t distance = 0; + + zebra_flags = 0; + + /* Read destination and netmask and gateway from rtm message + structure. */ + flags = rtm_read_mesg(rtm, &dest, &mask, &gate, ifname, &ifnlen); + if (!(flags & RTF_DONE)) + return; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: got rtm of type %d (%s) addrs {%s}", __func__, + rtm->rtm_type, + lookup_msg(rtm_type_str, rtm->rtm_type, NULL), + rtatostr(rtm->rtm_addrs, fbuf, sizeof(fbuf))); + +#ifdef RTF_CLONED /*bsdi, netbsd 1.6*/ + if (flags & RTF_CLONED) + return; +#endif +#ifdef RTF_WASCLONED /*freebsd*/ + if (flags & RTF_WASCLONED) + return; +#endif + + if ((rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) + && !(flags & RTF_UP)) + return; + + /* This is connected route. */ + if (!(flags & RTF_GATEWAY)) + return; + + if (flags & RTF_PROTO1) { + SET_FLAG(zebra_flags, ZEBRA_FLAG_SELFROUTE); + proto = ZEBRA_ROUTE_STATIC; + distance = 255; + } + + memset(&nh, 0, sizeof(nh)); + + nh.vrf_id = VRF_DEFAULT; + /* This is a reject or blackhole route */ + if (flags & RTF_REJECT) { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_REJECT; + } else if (flags & RTF_BLACKHOLE) { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_NULL; + } + + /* + * Ignore our own messages. + */ + if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid) + return; + + if (dest.sa.sa_family == AF_INET) { + afi = AFI_IP; + p.family = AF_INET; + p.u.prefix4 = dest.sin.sin_addr; + if (flags & RTF_HOST) + p.prefixlen = IPV4_MAX_BITLEN; + else + p.prefixlen = ip_masklen(mask.sin.sin_addr); + + if (!nh.type) { + nh.type = NEXTHOP_TYPE_IPV4; + nh.gate.ipv4 = gate.sin.sin_addr; + } + } else if (dest.sa.sa_family == AF_INET6) { + afi = AFI_IP6; + p.family = AF_INET6; + p.u.prefix6 = dest.sin6.sin6_addr; + if (flags & RTF_HOST) + p.prefixlen = IPV6_MAX_BITLEN; + else + p.prefixlen = ip6_masklen(mask.sin6.sin6_addr); + +#ifdef KAME + if (IN6_IS_ADDR_LINKLOCAL(&gate.sin6.sin6_addr)) { + ifindex = IN6_LINKLOCAL_IFINDEX(gate.sin6.sin6_addr); + SET_IN6_LINKLOCAL_IFINDEX(gate.sin6.sin6_addr, 0); + } +#endif /* KAME */ + + if (!nh.type) { + nh.type = ifindex ? NEXTHOP_TYPE_IPV6_IFINDEX + : NEXTHOP_TYPE_IPV6; + nh.gate.ipv6 = gate.sin6.sin6_addr; + nh.ifindex = ifindex; + } + } else + return; + + if (rtm->rtm_type == RTM_GET || rtm->rtm_type == RTM_ADD + || rtm->rtm_type == RTM_CHANGE) + rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, proto, 0, zebra_flags, + &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, 0, distance, 0, + false); + else + rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, proto, 0, + zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, + distance, true); +} + +/* Interface function for the kernel routing table updates. Support + * for RTM_CHANGE will be needed. + * Exported only for rt_socket.c + */ +int rtm_write(int message, union sockunion *dest, union sockunion *mask, + union sockunion *gate, union sockunion *mpls, unsigned int index, + enum blackhole_type bh_type, int metric) +{ + int ret; + caddr_t pnt; + struct interface *ifp; + + /* Sequencial number of routing message. */ + static int msg_seq = 0; + + /* Struct of rt_msghdr and buffer for storing socket's data. */ + struct { + struct rt_msghdr rtm; + char buf[512]; + } msg; + + if (dplane_routing_sock < 0) + return ZEBRA_ERR_EPERM; + + /* Clear and set rt_msghdr values */ + memset(&msg, 0, sizeof(msg)); + msg.rtm.rtm_version = RTM_VERSION; + msg.rtm.rtm_type = message; + msg.rtm.rtm_seq = msg_seq++; + msg.rtm.rtm_addrs = RTA_DST; + msg.rtm.rtm_addrs |= RTA_GATEWAY; + msg.rtm.rtm_flags = RTF_UP; +#ifdef __OpenBSD__ + msg.rtm.rtm_flags |= RTF_MPATH; + msg.rtm.rtm_fmask = RTF_MPLS; +#endif + msg.rtm.rtm_index = index; + + if (metric != 0) { + msg.rtm.rtm_rmx.rmx_hopcount = metric; + msg.rtm.rtm_inits |= RTV_HOPCOUNT; + } + + ifp = if_lookup_by_index(index, VRF_DEFAULT); + + if (gate && (message == RTM_ADD || message == RTM_CHANGE)) + msg.rtm.rtm_flags |= RTF_GATEWAY; + +/* When RTF_CLONING is unavailable on BSD, should we set some + * other flag instead? + */ +#ifdef RTF_CLONING + if (!gate && (message == RTM_ADD || message == RTM_CHANGE) && ifp + && (ifp->flags & IFF_POINTOPOINT) == 0) + msg.rtm.rtm_flags |= RTF_CLONING; +#endif /* RTF_CLONING */ + + /* If no protocol specific gateway is specified, use link + address for gateway. */ + if (!gate) { + if (!ifp) { + char dest_buf[INET_ADDRSTRLEN] = "NULL", + mask_buf[INET_ADDRSTRLEN] = "255.255.255.255"; + if (dest) + inet_ntop(AF_INET, &dest->sin.sin_addr, + dest_buf, INET_ADDRSTRLEN); + if (mask) + inet_ntop(AF_INET, &mask->sin.sin_addr, + mask_buf, INET_ADDRSTRLEN); + flog_warn( + EC_ZEBRA_RTM_NO_GATEWAY, + "%s: %s/%s: gate == NULL and no gateway found for ifindex %d", + __func__, dest_buf, mask_buf, index); + return -1; + } + gate = (union sockunion *)&((struct zebra_if *)ifp->info)->sdl; + } + + if (mask) + msg.rtm.rtm_addrs |= RTA_NETMASK; + else if (message == RTM_ADD || message == RTM_CHANGE) + msg.rtm.rtm_flags |= RTF_HOST; + +#ifdef __OpenBSD__ + if (mpls) { + msg.rtm.rtm_addrs |= RTA_SRC; + msg.rtm.rtm_flags |= RTF_MPLS; + + if (mpls->smpls.smpls_label + != htonl(MPLS_LABEL_IMPLICIT_NULL << MPLS_LABEL_OFFSET)) + msg.rtm.rtm_mpls = MPLS_OP_PUSH; + } +#endif + + /* Tagging route with flags */ + msg.rtm.rtm_flags |= (RTF_PROTO1); + + switch (bh_type) { + case BLACKHOLE_UNSPEC: + break; + case BLACKHOLE_REJECT: + msg.rtm.rtm_flags |= RTF_REJECT; + break; + default: + msg.rtm.rtm_flags |= RTF_BLACKHOLE; + break; + } + + +#define SOCKADDRSET(X, R) \ + if (msg.rtm.rtm_addrs & (R)) { \ + int len = SAROUNDUP(X); \ + memcpy(pnt, (caddr_t)(X), len); \ + pnt += len; \ + } + + pnt = (caddr_t)msg.buf; + + /* Write each socket data into rtm message buffer */ + SOCKADDRSET(dest, RTA_DST); + SOCKADDRSET(gate, RTA_GATEWAY); + SOCKADDRSET(mask, RTA_NETMASK); +#ifdef __OpenBSD__ + SOCKADDRSET(mpls, RTA_SRC); +#endif + + msg.rtm.rtm_msglen = pnt - (caddr_t)&msg; + + ret = write(dplane_routing_sock, &msg, msg.rtm.rtm_msglen); + + if (ret != msg.rtm.rtm_msglen) { + if (errno == EEXIST) + return ZEBRA_ERR_RTEXIST; + if (errno == ENETUNREACH) + return ZEBRA_ERR_RTUNREACH; + if (errno == ESRCH) + return ZEBRA_ERR_RTNOEXIST; + + flog_err_sys(EC_LIB_SOCKET, "%s: write : %s (%d)", __func__, + safe_strerror(errno), errno); + return ZEBRA_ERR_KERNEL; + } + return ZEBRA_ERR_NOERROR; +} + + +#include "thread.h" +#include "zebra/zserv.h" + +/* For debug purpose. */ +static void rtmsg_debug(struct rt_msghdr *rtm) +{ + char fbuf[64]; + + zlog_debug("Kernel: Len: %d Type: %s", rtm->rtm_msglen, + lookup_msg(rtm_type_str, rtm->rtm_type, NULL)); + rtm_flag_dump(rtm->rtm_flags); + zlog_debug("Kernel: message seq %d", rtm->rtm_seq); + zlog_debug("Kernel: pid %lld, rtm_addrs {%s}", (long long)rtm->rtm_pid, + rtatostr(rtm->rtm_addrs, fbuf, sizeof(fbuf))); +} + +/* This is pretty gross, better suggestions welcome -- mhandler */ +#ifndef RTAX_MAX +#ifdef RTA_NUMBITS +#define RTAX_MAX RTA_NUMBITS +#else +#define RTAX_MAX 8 +#endif /* RTA_NUMBITS */ +#endif /* RTAX_MAX */ + +/* Kernel routing table and interface updates via routing socket. */ +static void kernel_read(struct thread *thread) +{ + int sock; + int nbytes; + struct rt_msghdr *rtm; + + /* + * This must be big enough for any message the kernel might send. + * Rather than determining how many sockaddrs of what size might be + * in each particular message, just use RTAX_MAX of sockaddr_storage + * for each. Note that the sockaddrs must be after each message + * definition, or rather after whichever happens to be the largest, + * since the buffer needs to be big enough for a message and the + * sockaddrs together. + */ + union { + /* Routing information. */ + struct { + struct rt_msghdr rtm; + struct sockaddr_storage addr[RTAX_MAX]; + } r; + + /* Interface information. */ + struct { + struct if_msghdr ifm; + struct sockaddr_storage addr[RTAX_MAX]; + } im; + + /* Interface address information. */ + struct { + struct ifa_msghdr ifa; + struct sockaddr_storage addr[RTAX_MAX]; + } ia; + +#ifdef RTM_IFANNOUNCE + /* Interface arrival/departure */ + struct { + struct if_announcemsghdr ifan; + struct sockaddr_storage addr[RTAX_MAX]; + } ian; +#endif /* RTM_IFANNOUNCE */ + + } buf; + + /* Fetch routing socket. */ + sock = THREAD_FD(thread); + + nbytes = read(sock, &buf, sizeof(buf)); + + if (nbytes < 0) { + if (errno == ENOBUFS) { +#ifdef __FreeBSD__ + /* + * ENOBUFS indicates a temporary resource + * shortage and is not harmful for consistency of + * reading the routing socket. Ignore it. + */ + thread_add_read(zrouter.master, kernel_read, NULL, sock, + NULL); + return; +#else + flog_err(EC_ZEBRA_RECVMSG_OVERRUN, + "routing socket overrun: %s", + safe_strerror(errno)); + /* + * In this case we are screwed. + * There is no good way to + * recover zebra at this point. + */ + exit(-1); +#endif + } + if (errno != EAGAIN && errno != EWOULDBLOCK) + flog_err_sys(EC_LIB_SOCKET, "routing socket error: %s", + safe_strerror(errno)); + return; + } + + if (nbytes == 0) + return; + + thread_add_read(zrouter.master, kernel_read, NULL, sock, NULL); + + if (IS_ZEBRA_DEBUG_KERNEL) + rtmsg_debug(&buf.r.rtm); + + rtm = &buf.r.rtm; + + /* + * Ensure that we didn't drop any data, so that processing routines + * can assume they have the whole message. + */ + if (rtm->rtm_msglen != nbytes) { + zlog_debug("%s: rtm->rtm_msglen %d, nbytes %d, type %d", + __func__, rtm->rtm_msglen, nbytes, rtm->rtm_type); + return; + } + + switch (rtm->rtm_type) { + case RTM_ADD: + case RTM_DELETE: + case RTM_CHANGE: + rtm_read(rtm); + break; + case RTM_IFINFO: + ifm_read(&buf.im.ifm); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + ifam_read(&buf.ia.ifa); + break; +#ifdef RTM_IFANNOUNCE + case RTM_IFANNOUNCE: + ifan_read(&buf.ian.ifan); + break; +#endif /* RTM_IFANNOUNCE */ + default: + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Unprocessed RTM_type: %s(%d)", + lookup_msg(rtm_type_str, rtm->rtm_type, NULL), + rtm->rtm_type); + break; + } +} + +/* Make routing socket. */ +static void routing_socket(struct zebra_ns *zns) +{ + uint32_t default_rcvbuf; + socklen_t optlen; + + frr_with_privs(&zserv_privs) { + routing_sock = ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + + dplane_routing_sock = + ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + } + + if (routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "Can't init kernel routing socket"); + return; + } + + if (dplane_routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't init kernel dataplane routing socket"); + return; + } + +#ifdef SO_RERROR + /* Allow reporting of route(4) buffer overflow errors */ + int n = 1; + + if (setsockopt(routing_sock, SOL_SOCKET, SO_RERROR, &n, sizeof(n)) < 0) + flog_err_sys(EC_LIB_SOCKET, + "Can't set SO_RERROR on routing socket"); +#endif + + /* XXX: Socket should be NONBLOCK, however as we currently + * discard failed writes, this will lead to inconsistencies. + * For now, socket must be blocking. + */ + /*if (fcntl (routing_sock, F_SETFL, O_NONBLOCK) < 0) + zlog_warn ("Can't set O_NONBLOCK to routing socket");*/ + + /* + * Attempt to set a more useful receive buffer size + */ + optlen = sizeof(default_rcvbuf); + if (getsockopt(routing_sock, SOL_SOCKET, SO_RCVBUF, &default_rcvbuf, + &optlen) == -1) + flog_err_sys(EC_LIB_SOCKET, + "routing_sock sockopt SOL_SOCKET SO_RCVBUF"); + else { + for (; rcvbufsize > default_rcvbuf && + setsockopt(routing_sock, SOL_SOCKET, SO_RCVBUF, + &rcvbufsize, sizeof(rcvbufsize)) == -1 && + errno == ENOBUFS; + rcvbufsize /= 2) + ; + } + + /* kernel_read needs rewrite. */ + thread_add_read(zrouter.master, kernel_read, NULL, routing_sock, NULL); +} + +/* Exported interface function. This function simply calls + routing_socket (). */ +void kernel_init(struct zebra_ns *zns) +{ + routing_socket(zns); +} + +void kernel_terminate(struct zebra_ns *zns, bool complete) +{ + return; +} + +/* + * Global init for platform-/OS-specific things + */ +void kernel_router_init(void) +{ +} + +/* + * Global deinit for platform-/OS-specific things + */ +void kernel_router_terminate(void) +{ +} + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info) +{ + return 0; +} + +void kernel_update_multi(struct dplane_ctx_q *ctx_list) +{ + struct zebra_dplane_ctx *ctx; + struct dplane_ctx_q handled_list; + enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS; + + TAILQ_INIT(&handled_list); + + while (true) { + ctx = dplane_ctx_dequeue(ctx_list); + if (ctx == NULL) + break; + + /* + * A previous provider plugin may have asked to skip the + * kernel update. + */ + if (dplane_ctx_is_skip_kernel(ctx)) { + res = ZEBRA_DPLANE_REQUEST_SUCCESS; + goto skip_one; + } + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + res = kernel_route_update(ctx); + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + res = kernel_nexthop_update(ctx); + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + res = kernel_lsp_update(ctx); + break; + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + res = kernel_pw_update(ctx); + break; + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + res = kernel_address_update_ctx(ctx); + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + res = kernel_mac_update_ctx(ctx); + break; + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + res = kernel_neigh_update_ctx(ctx); + break; + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + res = kernel_pbr_rule_update(ctx); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + res = kernel_intf_update(ctx); + break; + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + res = kernel_tc_update(ctx); + break; + + /* Ignore 'notifications' - no-op */ + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + res = ZEBRA_DPLANE_REQUEST_SUCCESS; + break; + + case DPLANE_OP_INTF_NETCONFIG: + res = kernel_intf_netconf_update(ctx); + break; + + case DPLANE_OP_NONE: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + zlog_err("Unhandled dplane data for %s", + dplane_op2str(dplane_ctx_get_op(ctx))); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + } + + skip_one: + dplane_ctx_set_status(ctx, res); + + dplane_ctx_enqueue_tail(&handled_list, ctx); + } + + TAILQ_INIT(ctx_list); + dplane_ctx_list_append(ctx_list, &handled_list); +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/kernel_socket.h b/zebra/kernel_socket.h new file mode 100644 index 0000000..15079d7 --- /dev/null +++ b/zebra/kernel_socket.h @@ -0,0 +1,49 @@ +/* + * Exported kernel_socket functions, exported only for convenience of + * sysctl methods. + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __ZEBRA_KERNEL_SOCKET_H +#define __ZEBRA_KERNEL_SOCKET_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Error codes of zebra. */ +#define ZEBRA_ERR_NOERROR 0 +#define ZEBRA_ERR_RTEXIST -1 +#define ZEBRA_ERR_RTUNREACH -2 +#define ZEBRA_ERR_EPERM -3 +#define ZEBRA_ERR_RTNOEXIST -4 +#define ZEBRA_ERR_KERNEL -5 + +extern void rtm_read(struct rt_msghdr *); +extern int ifam_read(struct ifa_msghdr *); +extern int ifm_read(struct if_msghdr *); +extern int rtm_write(int, union sockunion *, union sockunion *, + union sockunion *, union sockunion *, unsigned int, + enum blackhole_type, int); +extern const struct message rtm_type_str[]; + +#ifdef __cplusplus +} +#endif + +#endif /* __ZEBRA_KERNEL_SOCKET_H */ diff --git a/zebra/label_manager.c b/zebra/label_manager.c new file mode 100644 index 0000000..2634a33 --- /dev/null +++ b/zebra/label_manager.c @@ -0,0 +1,489 @@ +/* + * Label Manager for FRR + * + * Copyright (C) 2017 by Bingen Eguzkitza, + * Volta Networks Inc. + * + * This file is part of FRRouting (FRR) + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "lib/log.h" +#include "lib/memory.h" +#include "lib/mpls.h" +#include "lib/network.h" +#include "lib/stream.h" +#include "lib/zclient.h" +#include "lib/libfrr.h" + +//#include "zebra/zserv.h" +#include "zebra/zebra_router.h" +#include "zebra/label_manager.h" +#include "zebra/zebra_errors.h" +#include "zebra/zapi_msg.h" +#include "zebra/debug.h" + +#define CONNECTION_DELAY 5 + +struct label_manager lbl_mgr; + +DEFINE_MGROUP(LBL_MGR, "Label Manager"); +DEFINE_MTYPE_STATIC(LBL_MGR, LM_CHUNK, "Label Manager Chunk"); + +/* define hooks for the basic API, so that it can be specialized or served + * externally + */ + +DEFINE_HOOK(lm_client_connect, (struct zserv *client, vrf_id_t vrf_id), + (client, vrf_id)); +DEFINE_HOOK(lm_client_disconnect, (struct zserv *client), (client)); +DEFINE_HOOK(lm_get_chunk, + (struct label_manager_chunk * *lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, vrf_id_t vrf_id), + (lmc, client, keep, size, base, vrf_id)); +DEFINE_HOOK(lm_release_chunk, + (struct zserv *client, uint32_t start, uint32_t end), + (client, start, end)); +DEFINE_HOOK(lm_cbs_inited, (), ()); + +/* define wrappers to be called in zapi_msg.c (as hooks must be called in + * source file where they were defined) + */ +void lm_client_connect_call(struct zserv *client, vrf_id_t vrf_id) +{ + hook_call(lm_client_connect, client, vrf_id); +} +void lm_get_chunk_call(struct label_manager_chunk **lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, + vrf_id_t vrf_id) +{ + hook_call(lm_get_chunk, lmc, client, keep, size, base, vrf_id); +} +void lm_release_chunk_call(struct zserv *client, uint32_t start, uint32_t end) +{ + hook_call(lm_release_chunk, client, start, end); +} + +/* forward declarations of the static functions to be used for some hooks */ +static int label_manager_connect(struct zserv *client, vrf_id_t vrf_id); +static int label_manager_disconnect(struct zserv *client); +static int label_manager_get_chunk(struct label_manager_chunk **lmc, + struct zserv *client, uint8_t keep, + uint32_t size, uint32_t base, + vrf_id_t vrf_id); +static int label_manager_release_label_chunk(struct zserv *client, + uint32_t start, uint32_t end); + +void delete_label_chunk(void *val) +{ + XFREE(MTYPE_LM_CHUNK, val); +} + +/** + * Release label chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @return Number of chunks released + */ +int release_daemon_label_chunks(struct zserv *client) +{ + struct listnode *node; + struct label_manager_chunk *lmc; + int count = 0; + int ret; + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Releasing chunks for client proto %s, instance %d, session %u", + __func__, zebra_route_string(client->proto), + client->instance, client->session_id); + + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + if (lmc->proto == client->proto && + lmc->instance == client->instance && + lmc->session_id == client->session_id && lmc->keep == 0) { + ret = release_label_chunk(lmc->proto, lmc->instance, + lmc->session_id, + lmc->start, lmc->end); + if (ret == 0) + count++; + } + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Released %d label chunks", __func__, count); + + return count; +} + +int lm_client_disconnect_cb(struct zserv *client) +{ + hook_call(lm_client_disconnect, client); + return 0; +} + +void lm_hooks_register(void) +{ + hook_register(lm_client_connect, label_manager_connect); + hook_register(lm_client_disconnect, label_manager_disconnect); + hook_register(lm_get_chunk, label_manager_get_chunk); + hook_register(lm_release_chunk, label_manager_release_label_chunk); +} +void lm_hooks_unregister(void) +{ + hook_unregister(lm_client_connect, label_manager_connect); + hook_unregister(lm_client_disconnect, label_manager_disconnect); + hook_unregister(lm_get_chunk, label_manager_get_chunk); + hook_unregister(lm_release_chunk, label_manager_release_label_chunk); +} + +/** + * Init label manager (or proxy to an external one) + */ +void label_manager_init(void) +{ + lbl_mgr.lc_list = list_new(); + lbl_mgr.lc_list->del = delete_label_chunk; + hook_register(zserv_client_close, lm_client_disconnect_cb); + + /* register default hooks for the label manager actions */ + lm_hooks_register(); + + /* notify any external module that we are done */ + hook_call(lm_cbs_inited); +} + +/* alloc and fill a label chunk */ +struct label_manager_chunk * +create_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t start, uint32_t end) +{ + /* alloc chunk, fill it and return it */ + struct label_manager_chunk *lmc = + XCALLOC(MTYPE_LM_CHUNK, sizeof(struct label_manager_chunk)); + + lmc->start = start; + lmc->end = end; + lmc->proto = proto; + lmc->instance = instance; + lmc->session_id = session_id; + lmc->keep = keep; + + return lmc; +} + +/* attempt to get a specific label chunk */ +static struct label_manager_chunk * +assign_specific_label_chunk(uint8_t proto, unsigned short instance, + uint32_t session_id, uint8_t keep, uint32_t size, + uint32_t base) +{ + struct label_manager_chunk *lmc; + struct listnode *node, *next = NULL; + struct listnode *first_node = NULL; + struct listnode *last_node = NULL; + struct listnode *insert_node = NULL; + + /* precompute last label from base and size */ + uint32_t end = base + size - 1; + + /* sanities */ + if ((base < MPLS_LABEL_UNRESERVED_MIN) + || (end > MPLS_LABEL_UNRESERVED_MAX)) { + zlog_err("Invalid LM request arguments: base: %u, size: %u", + base, size); + return NULL; + } + + /* Scan the existing chunks to see if the requested range of labels + * falls inside any of such chunks */ + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + + /* skip chunks for labels < base */ + if (base > lmc->end) + continue; + + /* requested range is not covered by any existing, free chunk. + * Therefore, need to insert a chunk */ + if ((end < lmc->start) && !first_node) { + insert_node = node; + break; + } + + if (!first_node) + first_node = node; + + /* if chunk is used, cannot honor request */ + if (lmc->proto != NO_PROTO) + return NULL; + + if (end <= lmc->end) { + last_node = node; + break; + } + } + + /* insert chunk between existing chunks */ + if (insert_node) { + lmc = create_label_chunk(proto, instance, session_id, keep, + base, end); + listnode_add_before(lbl_mgr.lc_list, insert_node, lmc); + return lmc; + } + + if (first_node) { + /* get node past the last one, if there */ + if (last_node) + last_node = listnextnode(last_node); + + /* delete node coming after the above chunk whose labels are + * included in the previous one */ + for (node = first_node; node && (node != last_node); + node = next) { + struct label_manager_chunk *death; + + next = listnextnode(node); + death = listgetdata(node); + list_delete_node(lbl_mgr.lc_list, node); + delete_label_chunk(death); + } + + lmc = create_label_chunk(proto, instance, session_id, keep, + base, end); + if (last_node) + listnode_add_before(lbl_mgr.lc_list, last_node, lmc); + else + listnode_add(lbl_mgr.lc_list, lmc); + + return lmc; + } else { + /* create a new chunk past all the existing ones and link at + * tail */ + lmc = create_label_chunk(proto, instance, session_id, keep, + base, end); + listnode_add(lbl_mgr.lc_list, lmc); + return lmc; + } +} + +/** + * Core function, assigns label chunks + * + * It first searches through the list to check if there's one available + * (previously released). Otherwise it creates and assigns a new one + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param keep If set, avoid garbage collection + * @param size Size of the label chunk + * @param base Desired starting label of the chunk; if MPLS_LABEL_BASE_ANY it does not apply + * @return Pointer to the assigned label chunk, or NULL if the request could not be satisfied + */ +struct label_manager_chunk * +assign_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t size, uint32_t base) +{ + struct label_manager_chunk *lmc; + struct listnode *node; + uint32_t prev_end = MPLS_LABEL_UNRESERVED_MIN; + + /* handle chunks request with a specific base label */ + if (base != MPLS_LABEL_BASE_ANY) + return assign_specific_label_chunk(proto, instance, session_id, + keep, size, base); + + /* appease scan-build, who gets confused by the use of macros */ + assert(lbl_mgr.lc_list); + + /* first check if there's one available */ + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + if (lmc->proto == NO_PROTO + && lmc->end - lmc->start + 1 == size) { + lmc->proto = proto; + lmc->instance = instance; + lmc->session_id = session_id; + lmc->keep = keep; + return lmc; + } + /* check if we hadve a "hole" behind us that we can squeeze into + */ + if ((lmc->start > prev_end) && (lmc->start - prev_end > size)) { + lmc = create_label_chunk(proto, instance, session_id, + keep, prev_end + 1, + prev_end + size); + listnode_add_before(lbl_mgr.lc_list, node, lmc); + return lmc; + } + prev_end = lmc->end; + } + /* otherwise create a new one */ + uint32_t start_free; + + if (list_isempty(lbl_mgr.lc_list)) + start_free = MPLS_LABEL_UNRESERVED_MIN; + else + start_free = ((struct label_manager_chunk *)listgetdata( + listtail(lbl_mgr.lc_list))) + ->end + + 1; + + if (start_free > MPLS_LABEL_UNRESERVED_MAX - size + 1) { + flog_err(EC_ZEBRA_LM_EXHAUSTED_LABELS, + "Reached max labels. Start: %u, size: %u", start_free, + size); + return NULL; + } + + /* create chunk and link at tail */ + lmc = create_label_chunk(proto, instance, session_id, keep, start_free, + start_free + size - 1); + listnode_add(lbl_mgr.lc_list, lmc); + return lmc; +} + +/** + * Release label chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param client Client zapi session + * @param start First label of the chunk + * @param end Last label of the chunk + * @return 0 on success + */ +static int label_manager_release_label_chunk(struct zserv *client, + uint32_t start, uint32_t end) +{ + return release_label_chunk(client->proto, client->instance, + client->session_id, start, end); +} + +/** + * Core function, release no longer used label chunks + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param session_id Zclient session ID, to identify the zclient session + * @param start First label of the chunk + * @param end Last label of the chunk + * @return 0 on success, -1 otherwise + */ +int release_label_chunk(uint8_t proto, unsigned short instance, + uint32_t session_id, uint32_t start, uint32_t end) +{ + struct listnode *node; + struct label_manager_chunk *lmc; + int ret = -1; + + /* check that size matches */ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("Releasing label chunk: %u - %u", start, end); + /* find chunk and disown */ + for (ALL_LIST_ELEMENTS_RO(lbl_mgr.lc_list, node, lmc)) { + if (lmc->start != start) + continue; + if (lmc->end != end) + continue; + if (lmc->proto != proto || lmc->instance != instance || + lmc->session_id != session_id) { + flog_err(EC_ZEBRA_LM_DAEMON_MISMATCH, + "%s: Daemon mismatch!!", __func__); + continue; + } + lmc->proto = NO_PROTO; + lmc->instance = 0; + lmc->session_id = 0; + lmc->keep = 0; + ret = 0; + break; + } + if (ret != 0) + flog_err(EC_ZEBRA_LM_UNRELEASED_CHUNK, + "%s: Label chunk not released!!", __func__); + + return ret; +} + +/* default functions to be called on hooks */ +static int label_manager_connect(struct zserv *client, vrf_id_t vrf_id) +{ + /* + * Release previous labels of same protocol and instance. + * This is done in case it restarted from an unexpected shutdown. + */ + release_daemon_label_chunks(client); + return zsend_label_manager_connect_response(client, vrf_id, 0); +} +static int label_manager_disconnect(struct zserv *client) +{ + release_daemon_label_chunks(client); + return 0; +} +static int label_manager_get_chunk(struct label_manager_chunk **lmc, + struct zserv *client, uint8_t keep, + uint32_t size, uint32_t base, + vrf_id_t vrf_id) +{ + *lmc = assign_label_chunk(client->proto, client->instance, + client->session_id, keep, size, base); + return lm_get_chunk_response(*lmc, client, vrf_id); +} + +/* Respond to a connect request */ +int lm_client_connect_response(uint8_t proto, uint16_t instance, + uint32_t session_id, vrf_id_t vrf_id, + uint8_t result) +{ + struct zserv *client = zserv_find_client_session(proto, instance, + session_id); + if (!client) { + zlog_err("%s: could not find client for daemon %s instance %u session %u", + __func__, zebra_route_string(proto), instance, + session_id); + return 1; + } + return zsend_label_manager_connect_response(client, vrf_id, result); +} + +/* Respond to a get_chunk request */ +int lm_get_chunk_response(struct label_manager_chunk *lmc, struct zserv *client, + vrf_id_t vrf_id) +{ + if (!lmc) + flog_err(EC_ZEBRA_LM_CANNOT_ASSIGN_CHUNK, + "Unable to assign Label Chunk to %s instance %u", + zebra_route_string(client->proto), client->instance); + else if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("Assigned Label Chunk %u - %u to %s instance %u", + lmc->start, lmc->end, + zebra_route_string(client->proto), client->instance); + + return zsend_assign_label_chunk_response(client, vrf_id, lmc); +} + +void label_manager_close(void) +{ + list_delete(&lbl_mgr.lc_list); +} diff --git a/zebra/label_manager.h b/zebra/label_manager.h new file mode 100644 index 0000000..8636c79 --- /dev/null +++ b/zebra/label_manager.h @@ -0,0 +1,129 @@ +/* + * Label Manager header + * + * Copyright (C) 2017 by Bingen Eguzkitza, + * Volta Networks Inc. + * + * This file is part of FRRouting (FRR) + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _LABEL_MANAGER_H +#define _LABEL_MANAGER_H + +#include <stdint.h> + +#include "lib/linklist.h" +#include "lib/thread.h" +#include "lib/hook.h" + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NO_PROTO 0 + +/* + * Label chunk struct + * Client daemon which the chunk belongs to can be identified by a tuple of: + * proto (daemon protocol) + instance + zapi session_id + * If the client then passes a non-empty value to keep field when it requests + * for chunks, the chunks won't be garbage collected and the client will be + * responsible for releasing them. + * Otherwise, if the keep field is not set (value 0) for the chunk, it will be + * automatically released when the client disconnects or when it reconnects + * (in case it died unexpectedly, we can know it's the same because it will have + * the same proto+instance+session values) + */ +struct label_manager_chunk { + uint8_t proto; + unsigned short instance; + uint32_t session_id; + uint8_t keep; + uint32_t start; /* First label of the chunk */ + uint32_t end; /* Last label of the chunk */ +}; + +/* declare hooks for the basic API, so that it can be specialized or served + * externally. Also declare a hook when those functions have been registered, + * so that any external module wanting to replace those can react + */ + +DECLARE_HOOK(lm_client_connect, (struct zserv *client, vrf_id_t vrf_id), + (client, vrf_id)); +DECLARE_HOOK(lm_client_disconnect, (struct zserv *client), (client)); +DECLARE_HOOK(lm_get_chunk, + (struct label_manager_chunk * *lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, vrf_id_t vrf_id), + (lmc, client, keep, size, base, vrf_id)); +DECLARE_HOOK(lm_release_chunk, + (struct zserv *client, uint32_t start, uint32_t end), + (client, start, end)); +DECLARE_HOOK(lm_cbs_inited, (), ()); + + +/* declare wrappers to be called in zapi_msg.c (as hooks must be called in + * source file where they were defined) + */ +void lm_client_connect_call(struct zserv *client, vrf_id_t vrf_id); +void lm_get_chunk_call(struct label_manager_chunk **lmc, struct zserv *client, + uint8_t keep, uint32_t size, uint32_t base, + vrf_id_t vrf_id); +void lm_release_chunk_call(struct zserv *client, uint32_t start, + uint32_t end); + +/* API for an external LM to return responses for requests */ +int lm_client_connect_response(uint8_t proto, uint16_t instance, + uint32_t session_id, vrf_id_t vrf_id, + uint8_t result); +int lm_get_chunk_response(struct label_manager_chunk *lmc, struct zserv *client, + vrf_id_t vrf_id); + +/* convenience function to allocate an lmc to be consumed by the above API */ +struct label_manager_chunk * +create_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t start, uint32_t end); +void delete_label_chunk(void *val); + +/* register/unregister callbacks for hooks */ +void lm_hooks_register(void); +void lm_hooks_unregister(void); + +/* + * Main label manager struct + * Holds a linked list of label chunks. + */ +struct label_manager { + struct list *lc_list; +}; + +void label_manager_init(void); +struct label_manager_chunk * +assign_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t size, uint32_t base); +int release_label_chunk(uint8_t proto, unsigned short instance, + uint32_t session_id, uint32_t start, uint32_t end); +int lm_client_disconnect_cb(struct zserv *client); +int release_daemon_label_chunks(struct zserv *client); +void label_manager_close(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LABEL_MANAGER_H */ diff --git a/zebra/main.c b/zebra/main.c new file mode 100644 index 0000000..3a8aa4c --- /dev/null +++ b/zebra/main.c @@ -0,0 +1,482 @@ +/* zebra daemon main routine. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include <lib/version.h> +#include "getopt.h" +#include "command.h" +#include "thread.h" +#include "filter.h" +#include "memory.h" +#include "prefix.h" +#include "log.h" +#include "plist.h" +#include "privs.h" +#include "sigevent.h" +#include "vrf.h" +#include "libfrr.h" +#include "routemap.h" +#include "routing_nb.h" + +#include "zebra/zebra_router.h" +#include "zebra/zebra_errors.h" +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/debug.h" +#include "zebra/router-id.h" +#include "zebra/irdp.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_ns.h" +#include "zebra/redistribute.h" +#include "zebra/zebra_mpls.h" +#include "zebra/label_manager.h" +#include "zebra/zebra_netns_notify.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_routemap.h" +#include "zebra/zebra_nb.h" +#include "zebra/zebra_opaque.h" +#include "zebra/zebra_srte.h" +#include "zebra/zebra_srv6.h" +#include "zebra/zebra_srv6_vty.h" + +#define ZEBRA_PTM_SUPPORT + +/* process id. */ +pid_t pid; + +/* Pacify zclient.o in libfrr, which expects this variable. */ +struct thread_master *master; + +/* Route retain mode flag. */ +int retain_mode = 0; + +int graceful_restart; + +bool v6_rr_semantics = false; + +/* Receive buffer size for kernel control sockets */ +#define RCVBUFSIZE_MIN 4194304 +#ifdef HAVE_NETLINK +uint32_t rcvbufsize = RCVBUFSIZE_MIN; +#else +uint32_t rcvbufsize = 128 * 1024; +#endif + +#define OPTION_V6_RR_SEMANTICS 2000 +#define OPTION_ASIC_OFFLOAD 2001 + +/* Command line options. */ +const struct option longopts[] = { + {"batch", no_argument, NULL, 'b'}, + {"allow_delete", no_argument, NULL, 'a'}, + {"socket", required_argument, NULL, 'z'}, + {"ecmp", required_argument, NULL, 'e'}, + {"retain", no_argument, NULL, 'r'}, + {"graceful_restart", required_argument, NULL, 'K'}, + {"asic-offload", optional_argument, NULL, OPTION_ASIC_OFFLOAD}, +#ifdef HAVE_NETLINK + {"vrfwnetns", no_argument, NULL, 'n'}, + {"nl-bufsize", required_argument, NULL, 's'}, + {"v6-rr-semantics", no_argument, NULL, OPTION_V6_RR_SEMANTICS}, +#endif /* HAVE_NETLINK */ + {0}}; + +zebra_capabilities_t _caps_p[] = {ZCAP_NET_ADMIN, ZCAP_SYS_ADMIN, + ZCAP_NET_RAW, +#ifdef HAVE_DPDK + ZCAP_IPC_LOCK, ZCAP_READ_SEARCH, + ZCAP_SYS_RAWIO +#endif +}; + +/* zebra privileges to run with */ +struct zebra_privs_t zserv_privs = { +#if defined(FRR_USER) && defined(FRR_GROUP) + .user = FRR_USER, + .group = FRR_GROUP, +#endif +#ifdef VTY_GROUP + .vty_group = VTY_GROUP, +#endif + .caps_p = _caps_p, + .cap_num_p = array_size(_caps_p), + .cap_num_i = 0}; + +/* SIGHUP handler. */ +static void sighup(void) +{ + zlog_info("SIGHUP received"); + + /* Reload of config file. */ + ; +} + +/* SIGINT handler. */ +static void sigint(void) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + struct listnode *ln, *nn; + struct zserv *client; + static bool sigint_done; + + if (sigint_done) + return; + + sigint_done = true; + + zlog_notice("Terminating on signal"); + + atomic_store_explicit(&zrouter.in_shutdown, true, + memory_order_relaxed); + + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv_stop_ra_all(); + + frr_early_fini(); + + /* Stop the opaque module pthread */ + zebra_opaque_stop(); + + zebra_dplane_pre_finish(); + + /* Clean up GR related info. */ + zebra_gr_stale_client_cleanup(zrouter.stale_client_list); + list_delete_all_node(zrouter.stale_client_list); + + /* Clean up zapi clients and server module */ + for (ALL_LIST_ELEMENTS(zrouter.client_list, ln, nn, client)) + zserv_close_client(client); + + zserv_close(); + list_delete_all_node(zrouter.client_list); + + /* Once all the zclients are cleaned up, clean up the opaque module */ + zebra_opaque_finish(); + + zebra_ptm_finish(); + + if (retain_mode) { + zebra_nhg_mark_keep(); + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + if (zvrf) + SET_FLAG(zvrf->flags, ZEBRA_VRF_RETAIN); + } + } + + if (zrouter.lsp_process_q) + work_queue_free_and_null(&zrouter.lsp_process_q); + + vrf_terminate(); + + ns_walk_func(zebra_ns_early_shutdown, NULL, NULL); + zebra_ns_notify_close(); + + access_list_reset(); + prefix_list_reset(); + /* + * zebra_routemap_finish will + * 1 set rmap upd timer to 0 so that rmap update wont be scheduled again + * 2 Put off the rmap update thread + * 3 route_map_finish + */ + zebra_routemap_finish(); + + rib_update_finish(); + + list_delete(&zrouter.client_list); + + /* Indicate that all new dplane work has been enqueued. When that + * work is complete, the dataplane will enqueue an event + * with the 'finalize' function. + */ + zebra_dplane_finish(); +} + +/* + * Final shutdown step for the zebra main thread. This is run after all + * async update processing has completed. + */ +void zebra_finalize(struct thread *dummy) +{ + zlog_info("Zebra final shutdown"); + + /* Final shutdown of ns resources */ + ns_walk_func(zebra_ns_final_shutdown, NULL, NULL); + + /* Stop dplane thread and finish any cleanup */ + zebra_dplane_shutdown(); + + zebra_router_terminate(); + + ns_terminate(); + frr_fini(); + exit(0); +} + +/* SIGUSR1 handler. */ +static void sigusr1(void) +{ + zlog_rotate(); +} + +struct frr_signal_t zebra_signals[] = { + { + .signal = SIGHUP, + .handler = &sighup, + }, + { + .signal = SIGUSR1, + .handler = &sigusr1, + }, + { + .signal = SIGINT, + .handler = &sigint, + }, + { + .signal = SIGTERM, + .handler = &sigint, + }, +}; + +static const struct frr_yang_module_info *const zebra_yang_modules[] = { + &frr_filter_info, + &frr_interface_info, + &frr_route_map_info, + &frr_zebra_info, + &frr_vrf_info, + &frr_routing_info, + &frr_zebra_route_map_info, +}; + +FRR_DAEMON_INFO( + zebra, ZEBRA, .vty_port = ZEBRA_VTY_PORT, .flags = FRR_NO_ZCLIENT, + + .proghelp = + "Daemon which manages kernel routing table management and\nredistribution between different routing protocols.", + + .signals = zebra_signals, .n_signals = array_size(zebra_signals), + + .privs = &zserv_privs, + + .yang_modules = zebra_yang_modules, + .n_yang_modules = array_size(zebra_yang_modules), +); + +/* Main startup routine. */ +int main(int argc, char **argv) +{ + // int batch_mode = 0; + char *zserv_path = NULL; + struct sockaddr_storage dummy; + socklen_t dummylen; + bool asic_offload = false; + bool notify_on_ack = true; + + graceful_restart = 0; + vrf_configure_backend(VRF_BACKEND_VRF_LITE); + + frr_preinit(&zebra_di, argc, argv); + + frr_opt_add( + "baz:e:rK:s:" +#ifdef HAVE_NETLINK + "n" +#endif + , + longopts, + " -b, --batch Runs in batch mode\n" + " -a, --allow_delete Allow other processes to delete zebra routes\n" + " -z, --socket Set path of zebra socket\n" + " -e, --ecmp Specify ECMP to use.\n" + " -r, --retain When program terminates, retain added route by zebra.\n" + " -K, --graceful_restart Graceful restart at the kernel level, timer in seconds for expiration\n" + " -A, --asic-offload FRR is interacting with an asic underneath the linux kernel\n" +#ifdef HAVE_NETLINK + " -s, --nl-bufsize Set netlink receive buffer size\n" + " -n, --vrfwnetns Use NetNS as VRF backend\n" + " --v6-rr-semantics Use v6 RR semantics\n" +#else + " -s, Set kernel socket receive buffer size\n" +#endif /* HAVE_NETLINK */ + ); + + while (1) { + int opt = frr_getopt(argc, argv, NULL); + + if (opt == EOF) + break; + + switch (opt) { + case 0: + break; + case 'b': + // batch_mode = 1; + break; + case 'a': + zrouter.allow_delete = true; + break; + case 'e': { + unsigned long int parsed_multipath = + strtoul(optarg, NULL, 10); + if (parsed_multipath == 0 + || parsed_multipath > MULTIPATH_NUM + || parsed_multipath > UINT32_MAX) { + flog_err( + EC_ZEBRA_BAD_MULTIPATH_NUM, + "Multipath Number specified must be less than %u and greater than 0", + MULTIPATH_NUM); + return 1; + } + zrouter.multipath_num = parsed_multipath; + break; + } + case 'z': + zserv_path = optarg; + if (!frr_zclient_addr(&dummy, &dummylen, optarg)) { + fprintf(stderr, + "Invalid zserv socket path: %s\n", + optarg); + exit(1); + } + break; + case 'r': + retain_mode = 1; + break; + case 'K': + graceful_restart = atoi(optarg); + break; + case 's': + rcvbufsize = atoi(optarg); + if (rcvbufsize < RCVBUFSIZE_MIN) + fprintf(stderr, + "Rcvbufsize is smaller than recommended value: %d\n", + RCVBUFSIZE_MIN); + break; +#ifdef HAVE_NETLINK + case 'n': + vrf_configure_backend(VRF_BACKEND_NETNS); + break; + case OPTION_V6_RR_SEMANTICS: + v6_rr_semantics = true; + break; + case OPTION_ASIC_OFFLOAD: + if (!strcmp(optarg, "notify_on_offload")) + notify_on_ack = false; + if (!strcmp(optarg, "notify_on_ack")) + notify_on_ack = true; + asic_offload = true; + break; +#endif /* HAVE_NETLINK */ + default: + frr_help_exit(1); + } + } + + zrouter.master = frr_init(); + + /* Zebra related initialize. */ + zebra_router_init(asic_offload, notify_on_ack); + zserv_init(); + rib_init(); + zebra_if_init(); + zebra_debug_init(); + + /* + * Initialize NS( and implicitly the VRF module), and make kernel + * routing socket. */ + zebra_ns_init(); + router_id_cmd_init(); + zebra_vty_init(); + access_list_init(); + prefix_list_init(); + rtadv_cmd_init(); +/* PTM socket */ +#ifdef ZEBRA_PTM_SUPPORT + zebra_ptm_init(); +#endif + + zebra_mpls_init(); + zebra_mpls_vty_init(); + zebra_pw_vty_init(); + zebra_pbr_init(); + zebra_opaque_init(); + zebra_srte_init(); + zebra_srv6_init(); + zebra_srv6_vty_init(); + + /* For debug purpose. */ + /* SET_FLAG (zebra_debug_event, ZEBRA_DEBUG_EVENT); */ + + /* Process the configuration file. Among other configuration + * directives we can meet those installing static routes. Such + * requests will not be executed immediately, but queued in + * zebra->ribq structure until we enter the main execution loop. + * The notifications from kernel will show originating PID equal + * to that after daemon() completes (if ever called). + */ + frr_config_fork(); + + /* After we have successfully acquired the pidfile, we can be sure + * about being the only copy of zebra process, which is submitting + * changes to the FIB. + * Clean up zebra-originated routes. The requests will be sent to OS + * immediately, so originating PID in notifications from kernel + * will be equal to the current getpid(). To know about such routes, + * we have to have route_read() called before. + */ + zrouter.startup_time = monotime(NULL); + thread_add_timer(zrouter.master, rib_sweep_route, NULL, + graceful_restart, &zrouter.sweeper); + + /* Needed for BSD routing socket. */ + pid = getpid(); + + /* Start dataplane system */ + zebra_dplane_start(); + + /* Start the ted module, before zserv */ + zebra_opaque_start(); + + /* Start Zebra API server */ + zserv_start(zserv_path); + + /* Init label manager */ + label_manager_init(); + + /* RNH init */ + zebra_rnh_init(); + + /* Config handler Init */ + zebra_evpn_init(); + + /* Error init */ + zebra_error_init(); + + frr_run(zrouter.master); + + /* Not reached... */ + return 0; +} diff --git a/zebra/netconf_netlink.c b/zebra/netconf_netlink.c new file mode 100644 index 0000000..4c30544 --- /dev/null +++ b/zebra/netconf_netlink.c @@ -0,0 +1,243 @@ +/* + * netconf_netlink.c - netconf interaction with the kernel using + * netlink + * Copyright (C) 2021 Nvidia, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include <zebra.h> + +#ifdef HAVE_NETLINK /* Netlink OSes only */ + +#include <ns.h> + +#include "linux/netconf.h" + +#include "lib/lib_errors.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_dplane.h" +#include "zebra/kernel_netlink.h" +#include "zebra/netconf_netlink.h" +#include "zebra/debug.h" + +static struct rtattr *netconf_rta(struct netconfmsg *ncm) +{ + return (struct rtattr *)((char *)ncm + + NLMSG_ALIGN(sizeof(struct netconfmsg))); +} + +/* + * Handle netconf update about a single interface: create dplane + * context, and enqueue for processing in the main zebra pthread. + */ +static int +netlink_netconf_dplane_update(ns_id_t ns_id, afi_t afi, ifindex_t ifindex, + enum dplane_netconf_status_e mpls_on, + enum dplane_netconf_status_e mcast_on, + enum dplane_netconf_status_e linkdown_on) +{ + struct zebra_dplane_ctx *ctx; + + ctx = dplane_ctx_alloc(); + dplane_ctx_set_op(ctx, DPLANE_OP_INTF_NETCONFIG); + dplane_ctx_set_ns_id(ctx, ns_id); + dplane_ctx_set_afi(ctx, afi); + dplane_ctx_set_ifindex(ctx, ifindex); + + dplane_ctx_set_netconf_mpls(ctx, mpls_on); + dplane_ctx_set_netconf_mcast(ctx, mcast_on); + dplane_ctx_set_netconf_linkdown(ctx, linkdown_on); + + /* Enqueue ctx for main pthread to process */ + dplane_provider_enqueue_to_zebra(ctx); + + return 0; +} + +/* + * Parse and process an incoming netlink netconf update. + */ +int netlink_netconf_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct netconfmsg *ncm; + struct rtattr *tb[NETCONFA_MAX + 1] = {}; + int len; + ifindex_t ifindex; + uint32_t ival; + afi_t afi; + enum dplane_netconf_status_e mpls_on = DPLANE_NETCONF_STATUS_UNKNOWN; + enum dplane_netconf_status_e mcast_on = DPLANE_NETCONF_STATUS_UNKNOWN; + enum dplane_netconf_status_e linkdown_on = + DPLANE_NETCONF_STATUS_UNKNOWN; + + if (h->nlmsg_type != RTM_NEWNETCONF && h->nlmsg_type != RTM_DELNETCONF) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct netconfmsg)); + if (len < 0) { + zlog_err("%s: Message received from netlink is of a broken size: %d, min %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct netconfmsg))); + return -1; + } + + ncm = NLMSG_DATA(h); + + /* + * FRR does not have an internal representation of afi_t for + * the MPLS Address Family that the kernel has. So let's + * just call it v4. This is ok because the kernel appears + * to do a good job of not sending data that is mixed/matched + * across families + */ +#ifdef AF_MPLS + if (ncm->ncm_family == AF_MPLS) + afi = AFI_IP; + else +#endif /* AF_MPLS */ + afi = family2afi(ncm->ncm_family); + + netlink_parse_rtattr(tb, NETCONFA_MAX, netconf_rta(ncm), len); + + if (!tb[NETCONFA_IFINDEX]) { + zlog_err("NETCONF message received from netlink without an ifindex"); + return 0; + } + + ifindex = *(ifindex_t *)RTA_DATA(tb[NETCONFA_IFINDEX]); + + if (tb[NETCONFA_INPUT]) { + ival = *(uint32_t *)RTA_DATA(tb[NETCONFA_INPUT]); + if (ival != 0) + mpls_on = DPLANE_NETCONF_STATUS_ENABLED; + else + mpls_on = DPLANE_NETCONF_STATUS_DISABLED; + } + + if (tb[NETCONFA_MC_FORWARDING]) { + ival = *(uint32_t *)RTA_DATA(tb[NETCONFA_MC_FORWARDING]); + if (ival != 0) + mcast_on = DPLANE_NETCONF_STATUS_ENABLED; + else + mcast_on = DPLANE_NETCONF_STATUS_DISABLED; + } + + if (tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]) { + ival = *(uint32_t *)RTA_DATA( + tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]); + if (ival != 0) + linkdown_on = DPLANE_NETCONF_STATUS_ENABLED; + else + linkdown_on = DPLANE_NETCONF_STATUS_DISABLED; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: interface %u is mpls on: %d multicast on: %d linkdown: %d", + __func__, ifindex, mpls_on, mcast_on, linkdown_on); + + /* Create a dplane context and pass it along for processing */ + netlink_netconf_dplane_update(ns_id, afi, ifindex, mpls_on, mcast_on, + linkdown_on); + + return 0; +} + +/* + * Request info from the host OS. This only sends the request; any replies + * are processed asynchronously. + */ +int netlink_request_netconf(int sockfd) +{ + struct nlsock *nls; + struct { + struct nlmsghdr n; + struct netconfmsg ncm; + char buf[1024]; + } req = {}; + + nls = kernel_netlink_nlsock_lookup(sockfd); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: nlsock %s", __func__, nls ? nls->name : "NULL"); + + if (nls == NULL) + return -1; + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct netconfmsg)); + req.n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETNETCONF; + req.ncm.ncm_family = AF_UNSPEC; + + return netlink_request(nls, &req); +} + +extern struct zebra_privs_t zserv_privs; +/* + * Currently netconf has no ability to set from netlink. + * So we've received a request to do this work in the data plane. + * as such we need to set the value via the /proc system + */ +enum netlink_msg_status netlink_put_intf_netconfig(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + const char *ifname = dplane_ctx_get_ifname(ctx); + enum dplane_netconf_status_e mpls_on = dplane_ctx_get_netconf_mpls(ctx); + char set[64]; + char mpls_proc[PATH_MAX]; + int fd, ret = FRR_NETLINK_ERROR; + + snprintf(mpls_proc, sizeof(mpls_proc), + "/proc/sys/net/mpls/conf/%s/input", ifname); + + if (mpls_on == DPLANE_NETCONF_STATUS_ENABLED) + snprintf(set, sizeof(set), "1\n"); + else if (mpls_on == DPLANE_NETCONF_STATUS_DISABLED) + snprintf(set, sizeof(set), "0\n"); + else { + flog_err_sys( + EC_LIB_DEVELOPMENT, + "%s: Expected interface %s to be set to ENABLED or DISABLED was %d", + __func__, ifname, mpls_on); + return ret; + } + + frr_with_privs (&zserv_privs) { + fd = open(mpls_proc, O_WRONLY); + if (fd < 0) { + flog_err_sys( + EC_LIB_SOCKET, + "%s: Unable to open %s for writing: %s(%d)", + __func__, mpls_proc, safe_strerror(errno), + errno); + return ret; + } + if (write(fd, set, 2) == 2) + ret = FRR_NETLINK_SUCCESS; + else + flog_err_sys(EC_LIB_SOCKET, + "%s: Unsuccessful write to %s: %s(%d)", + __func__, mpls_proc, safe_strerror(errno), + errno); + close(fd); + } + return ret; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/netconf_netlink.h b/zebra/netconf_netlink.h new file mode 100644 index 0000000..1b3450b --- /dev/null +++ b/zebra/netconf_netlink.h @@ -0,0 +1,52 @@ +/* + * netconf_netlink.h - netconf interaction with the kernel using + * netlink + * Copyright (C) 2021 Nvidia, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#ifndef __NETCONF_NETLINK_H__ +#define __NETCONF_NETLINK_H__ + +#ifdef HAVE_NETLINK /* Netlink-only module */ + +#include "zebra/zebra_ns.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Parse and handle a NETCONF message. */ +extern int netlink_netconf_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +/* Request info from the host OS. */ +int netlink_request_netconf(int sockfd); + +struct nl_batch; + +extern enum netlink_msg_status +netlink_put_intf_netconfig(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* NETCONF_NETLINK_H */ diff --git a/zebra/redistribute.c b/zebra/redistribute.c new file mode 100644 index 0000000..4a8fe93 --- /dev/null +++ b/zebra/redistribute.c @@ -0,0 +1,930 @@ +/* Redistribution Handler + * Copyright (C) 1998 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "vector.h" +#include "vty.h" +#include "command.h" +#include "prefix.h" +#include "table.h" +#include "stream.h" +#include "zclient.h" +#include "linklist.h" +#include "log.h" +#include "vrf.h" +#include "srcdest_table.h" + +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_routemap.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/router-id.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_errors.h" + +#define ZEBRA_PTM_SUPPORT + +/* array holding redistribute info about table redistribution */ +/* bit AFI is set if that AFI is redistributing routes from this table */ +static int zebra_import_table_used[AFI_MAX][ZEBRA_KERNEL_TABLE_MAX]; +static uint32_t zebra_import_table_distance[AFI_MAX][ZEBRA_KERNEL_TABLE_MAX]; + +int is_zebra_import_table_enabled(afi_t afi, vrf_id_t vrf_id, uint32_t table_id) +{ + /* + * Make sure that what we are called with actualy makes sense + */ + if (afi == AFI_MAX) + return 0; + + if (is_zebra_valid_kernel_table(table_id) && + table_id < ZEBRA_KERNEL_TABLE_MAX) + return zebra_import_table_used[afi][table_id]; + return 0; +} + +static void zebra_redistribute_default(struct zserv *client, vrf_id_t vrf_id) +{ + int afi; + struct prefix p; + struct route_table *table; + struct route_node *rn; + struct route_entry *newre; + + for (afi = AFI_IP; afi <= AFI_IP6; afi++) { + + if (!vrf_bitmap_check(client->redist_default[afi], vrf_id)) + continue; + + /* Lookup table. */ + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + if (!table) + continue; + + /* Lookup default route. */ + memset(&p, 0, sizeof(p)); + p.family = afi2family(afi); + rn = route_node_lookup(table, &p); + if (!rn) + continue; + + RNODE_FOREACH_RE (rn, newre) { + if (CHECK_FLAG(newre->flags, ZEBRA_FLAG_SELECTED)) + zsend_redistribute_route( + ZEBRA_REDISTRIBUTE_ROUTE_ADD, client, + rn, newre); + } + + route_unlock_node(rn); + } +} + +/* Redistribute routes. */ +static void zebra_redistribute(struct zserv *client, int type, + unsigned short instance, vrf_id_t vrf_id, + int afi) +{ + struct route_entry *newre; + struct route_table *table; + struct route_node *rn; + + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + if (!table) + return; + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) + RNODE_FOREACH_RE (rn, newre) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s: client %s %pRN(%u:%u) checking: selected=%d, type=%d, distance=%d, metric=%d zebra_check_addr=%d", + __func__, + zebra_route_string(client->proto), rn, + vrf_id, newre->instance, + !!CHECK_FLAG(newre->flags, + ZEBRA_FLAG_SELECTED), + newre->type, newre->distance, + newre->metric, + zebra_check_addr(&rn->p)); + + if (!CHECK_FLAG(newre->flags, ZEBRA_FLAG_SELECTED)) + continue; + if ((type != ZEBRA_ROUTE_ALL + && (newre->type != type + || newre->instance != instance))) + continue; + if (!zebra_check_addr(&rn->p)) + continue; + + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_ADD, + client, rn, newre); + } +} + +/* + * Function to check if prefix is candidate for + * redistribute. + */ +static bool zebra_redistribute_check(const struct route_node *rn, + const struct route_entry *re, + struct zserv *client) +{ + struct zebra_vrf *zvrf; + afi_t afi; + + /* Process only if there is valid re */ + if (!re) + return false; + + afi = family2afi(rn->p.family); + zvrf = vrf_info_lookup(re->vrf_id); + if (re->vrf_id == VRF_DEFAULT && zvrf->table_id != re->table) + return false; + + /* If default route and redistributed */ + if (is_default_prefix(&rn->p) && + vrf_bitmap_check(client->redist_default[afi], re->vrf_id)) + return true; + + /* If redistribute in enabled for zebra route all */ + if (vrf_bitmap_check(client->redist[afi][ZEBRA_ROUTE_ALL], re->vrf_id)) + return true; + + /* + * If multi-instance then check for route + * redistribution for given instance. + */ + if (re->instance) { + if (redist_check_instance(&client->mi_redist[afi][re->type], + re->instance)) + return true; + else + return false; + } + + /* If redistribution is enabled for give route type. */ + if (vrf_bitmap_check(client->redist[afi][re->type], re->vrf_id)) + return true; + + return false; +} + +/* Either advertise a route for redistribution to registered clients or */ +/* withdraw redistribution if add cannot be done for client */ +void redistribute_update(const struct route_node *rn, + const struct route_entry *re, + const struct route_entry *prev_re) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "(%u:%u):%pRN(%u): Redist update re %p (%s), old %p (%s)", + re->vrf_id, re->table, rn, re->instance, re, + zebra_route_string(re->type), prev_re, + prev_re ? zebra_route_string(prev_re->type) : "None"); + + if (!zebra_check_addr(&rn->p)) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("Redist update filter prefix %pRN", rn); + return; + } + + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (zebra_redistribute_check(rn, re, client)) { + if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug( + "%s: client %s %pRN(%u:%u), type=%d, distance=%d, metric=%d", + __func__, + zebra_route_string(client->proto), rn, + re->vrf_id, re->table, re->type, + re->distance, re->metric); + } + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_ADD, + client, rn, re); + } else if (zebra_redistribute_check(rn, prev_re, client)) + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_DEL, + client, rn, prev_re); + } +} + +/* + * During a route delete, where 'new_re' is NULL, redist a delete to all + * clients registered for the type of 'old_re'. + * During a route update, redist a delete to any clients who will not see + * an update when the new route is installed. There are cases when a client + * may have seen a redist for 'old_re', but will not see + * the redist for 'new_re'. + */ +void redistribute_delete(const struct route_node *rn, + const struct route_entry *old_re, + const struct route_entry *new_re) +{ + struct listnode *node, *nnode; + struct zserv *client; + vrf_id_t vrfid; + + if (old_re) + vrfid = old_re->vrf_id; + else if (new_re) + vrfid = new_re->vrf_id; + else + return; + + if (IS_ZEBRA_DEBUG_RIB) { + uint8_t old_inst, new_inst; + uint32_t table = 0; + + old_inst = new_inst = 0; + + if (old_re) { + old_inst = old_re->instance; + table = old_re->table; + } + if (new_re) { + new_inst = new_re->instance; + table = new_re->table; + } + + zlog_debug( + "%u:%u%pRN: Redist del: re %p (%u:%s), new re %p (%u:%s)", + vrfid, table, rn, old_re, old_inst, + old_re ? zebra_route_string(old_re->type) : "None", + new_re, new_inst, + new_re ? zebra_route_string(new_re->type) : "None"); + } + + /* Skip invalid (e.g. linklocal) prefix */ + if (!zebra_check_addr(&rn->p)) { + if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug( + "%u:%pRN: Redist del old: skipping invalid prefix", + vrfid, rn); + } + return; + } + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + /* + * Skip this client if it will receive an update for the + * 'new' re + */ + if (zebra_redistribute_check(rn, new_re, client)) + continue; + + /* Send a delete for the 'old' re to any subscribed client. */ + if (zebra_redistribute_check(rn, old_re, client)) + zsend_redistribute_route(ZEBRA_REDISTRIBUTE_ROUTE_DEL, + client, rn, old_re); + } +} + + +void zebra_redistribute_add(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + int type = 0; + unsigned short instance; + + STREAM_GETC(msg, afi); + STREAM_GETC(msg, type); + STREAM_GETW(msg, instance); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: client proto %s afi=%d, wants %s, vrf %s(%u), instance=%d", + __func__, zebra_route_string(client->proto), afi, + zebra_route_string(type), VRF_LOGNAME(zvrf->vrf), + zvrf_id(zvrf), instance); + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %d does not exist", __func__, afi); + return; + } + + if (type == 0 || type >= ZEBRA_ROUTE_MAX) { + zlog_debug("%s: Specified Route Type %d does not exist", + __func__, type); + return; + } + + if (instance) { + if (!redist_check_instance(&client->mi_redist[afi][type], + instance)) { + redist_add_instance(&client->mi_redist[afi][type], + instance); + zebra_redistribute(client, type, instance, + zvrf_id(zvrf), afi); + } + } else { + if (!vrf_bitmap_check(client->redist[afi][type], + zvrf_id(zvrf))) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: setting vrf %s(%u) redist bitmap", + __func__, VRF_LOGNAME(zvrf->vrf), + zvrf_id(zvrf)); + vrf_bitmap_set(client->redist[afi][type], + zvrf_id(zvrf)); + zebra_redistribute(client, type, 0, zvrf_id(zvrf), afi); + } + } + +stream_failure: + return; +} + +void zebra_redistribute_delete(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + int type = 0; + unsigned short instance; + + STREAM_GETC(msg, afi); + STREAM_GETC(msg, type); + STREAM_GETW(msg, instance); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: client proto %s afi=%d, no longer wants %s, vrf %s(%u), instance=%d", + __func__, zebra_route_string(client->proto), afi, + zebra_route_string(type), VRF_LOGNAME(zvrf->vrf), + zvrf_id(zvrf), instance); + + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %d does not exist", __func__, afi); + return; + } + + if (type == 0 || type >= ZEBRA_ROUTE_MAX) { + zlog_debug("%s: Specified Route Type %d does not exist", + __func__, type); + return; + } + + /* + * NOTE: no need to withdraw the previously advertised routes. The + * clients + * themselves should keep track of the received routes from zebra and + * withdraw them when necessary. + */ + if (instance) + redist_del_instance(&client->mi_redist[afi][type], instance); + else + vrf_bitmap_unset(client->redist[afi][type], zvrf_id(zvrf)); + +stream_failure: + return; +} + +void zebra_redistribute_default_add(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + + STREAM_GETC(msg, afi); + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %u does not exist", __func__, afi); + return; + } + + vrf_bitmap_set(client->redist_default[afi], zvrf_id(zvrf)); + zebra_redistribute_default(client, zvrf_id(zvrf)); + +stream_failure: + return; +} + +void zebra_redistribute_default_delete(ZAPI_HANDLER_ARGS) +{ + afi_t afi = 0; + + STREAM_GETC(msg, afi); + + if (afi == 0 || afi >= AFI_MAX) { + flog_warn(EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + "%s: Specified afi %u does not exist", __func__, afi); + return; + } + + vrf_bitmap_unset(client->redist_default[afi], zvrf_id(zvrf)); + +stream_failure: + return; +} + +/* Interface up information. */ +void zebra_interface_up_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_UP %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + if (ifp->ptm_status || !ifp->ptm_enable) { + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, + client)) { + /* Do not send unsolicited messages to synchronous + * clients. + */ + if (client->synchronous) + continue; + + zsend_interface_update(ZEBRA_INTERFACE_UP, + client, ifp); + zsend_interface_link_params(client, ifp); + } + } +} + +/* Interface down information. */ +void zebra_interface_down_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_DOWN %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_interface_update(ZEBRA_INTERFACE_DOWN, client, ifp); + } +} + +/* Interface information update. */ +void zebra_interface_add_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_ADD %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + client->ifadd_cnt++; + zsend_interface_add(client, ifp); + zsend_interface_link_params(client, ifp); + } +} + +void zebra_interface_delete_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_DELETE %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + client->ifdel_cnt++; + zsend_interface_delete(client, ifp); + } +} + +/* Interface address addition. */ +void zebra_interface_address_add_update(struct interface *ifp, + struct connected *ifc) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_ADDRESS_ADD %pFX on %s vrf %s(%u)", + ifc->address, ifp->name, ifp->vrf->name, + ifp->vrf->vrf_id); + + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) + flog_warn( + EC_ZEBRA_ADVERTISING_UNUSABLE_ADDR, + "advertising address to clients that is not yet usable."); + + zebra_vxlan_add_del_gw_macip(ifp, ifc->address, 1); + + router_id_add_address(ifc); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) { + client->connected_rt_add_cnt++; + zsend_interface_address(ZEBRA_INTERFACE_ADDRESS_ADD, + client, ifp, ifc); + } + } +} + +/* Interface address deletion. */ +void zebra_interface_address_delete_update(struct interface *ifp, + struct connected *ifc) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_ADDRESS_DELETE %pFX on %s vrf %s(%u)", + ifc->address, ifp->name, ifp->vrf->name, + ifp->vrf->vrf_id); + + zebra_vxlan_add_del_gw_macip(ifp, ifc->address, 0); + + router_id_del_address(ifc); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + if (CHECK_FLAG(ifc->conf, ZEBRA_IFC_REAL)) { + client->connected_rt_del_cnt++; + zsend_interface_address(ZEBRA_INTERFACE_ADDRESS_DELETE, + client, ifp, ifc); + } + } +} + +/* Interface VRF change. May need to delete from clients not interested in + * the new VRF. Note that this function is invoked *prior* to the VRF change. + */ +void zebra_interface_vrf_update_del(struct interface *ifp, vrf_id_t new_vrf_id) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_VRF_UPDATE/DEL %s VRF Id %u -> %u", + ifp->name, ifp->vrf->vrf_id, new_vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + /* Need to delete if the client is not interested in the new + * VRF. */ + zsend_interface_update(ZEBRA_INTERFACE_DOWN, client, ifp); + client->ifdel_cnt++; + zsend_interface_delete(client, ifp); + zsend_interface_vrf_update(client, ifp, new_vrf_id); + } +} + +/* Interface VRF change. This function is invoked *post* VRF change and sends an + * add to clients who are interested in the new VRF but not in the old VRF. + */ +void zebra_interface_vrf_update_add(struct interface *ifp, vrf_id_t old_vrf_id) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_VRF_UPDATE/ADD %s VRF Id %u -> %u", + ifp->name, old_vrf_id, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + /* Need to add if the client is interested in the new VRF. */ + client->ifadd_cnt++; + zsend_interface_add(client, ifp); + zsend_interface_addresses(client, ifp); + } +} + +int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re, const char *rmap_name) +{ + struct route_entry *newre; + struct route_entry *same; + struct prefix p; + struct nexthop_group *ng; + route_map_result_t ret = RMAP_PERMITMATCH; + afi_t afi; + + afi = family2afi(rn->p.family); + if (rmap_name) + ret = zebra_import_table_route_map_check( + afi, re->type, re->instance, &rn->p, + re->nhe->nhg.nexthop, + zvrf->vrf->vrf_id, re->tag, rmap_name); + + if (ret != RMAP_PERMITMATCH) { + UNSET_FLAG(re->flags, ZEBRA_FLAG_SELECTED); + zebra_del_import_table_entry(zvrf, rn, re); + return 0; + } + + prefix_copy(&p, &rn->p); + + RNODE_FOREACH_RE (rn, same) { + if (CHECK_FLAG(same->status, ROUTE_ENTRY_REMOVED)) + continue; + + if (same->type == re->type && same->instance == re->instance + && same->table == re->table + && same->type != ZEBRA_ROUTE_CONNECT) + break; + } + + if (same) { + UNSET_FLAG(same->flags, ZEBRA_FLAG_SELECTED); + zebra_del_import_table_entry(zvrf, rn, same); + } + + newre = zebra_rib_route_entry_new( + 0, ZEBRA_ROUTE_TABLE, re->table, re->flags, re->nhe_id, + zvrf->table_id, re->metric, re->mtu, + zebra_import_table_distance[afi][re->table], re->tag); + + ng = nexthop_group_new(); + copy_nexthops(&ng->nexthop, re->nhe->nhg.nexthop, NULL); + + rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre, ng, false); + + return 0; +} + +int zebra_del_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re) +{ + struct prefix p; + afi_t afi; + + afi = family2afi(rn->p.family); + prefix_copy(&p, &rn->p); + + rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_TABLE, + re->table, re->flags, &p, NULL, re->nhe->nhg.nexthop, + re->nhe_id, zvrf->table_id, re->metric, re->distance, + false); + + return 0; +} + +/* Assuming no one calls this with the main routing table */ +int zebra_import_table(afi_t afi, vrf_id_t vrf_id, uint32_t table_id, + uint32_t distance, const char *rmap_name, int add) +{ + struct route_table *table; + struct route_entry *re; + struct route_node *rn; + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(vrf_id); + + if (!is_zebra_valid_kernel_table(table_id) + || (table_id == RT_TABLE_MAIN)) + return -1; + + if (afi >= AFI_MAX) + return -1; + + table = zebra_vrf_get_table_with_table_id(afi, SAFI_UNICAST, vrf_id, + table_id); + if (table == NULL) { + return 0; + } else if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug("%s routes from table %d", + add ? "Importing" : "Unimporting", table_id); + } + + if (add) { + if (rmap_name) + zebra_add_import_table_route_map(afi, rmap_name, + table_id); + else { + rmap_name = + zebra_get_import_table_route_map(afi, table_id); + if (rmap_name) { + zebra_del_import_table_route_map(afi, table_id); + rmap_name = NULL; + } + } + + zebra_import_table_used[afi][table_id] = 1; + zebra_import_table_distance[afi][table_id] = distance; + } else { + zebra_import_table_used[afi][table_id] = 0; + zebra_import_table_distance[afi][table_id] = + ZEBRA_TABLE_DISTANCE_DEFAULT; + + rmap_name = zebra_get_import_table_route_map(afi, table_id); + if (rmap_name) { + zebra_del_import_table_route_map(afi, table_id); + rmap_name = NULL; + } + } + + for (rn = route_top(table); rn; rn = route_next(rn)) { + /* For each entry in the non-default routing table, + * add the entry in the main table + */ + if (!rn->info) + continue; + + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + break; + } + + if (!re) + continue; + + if (((afi == AFI_IP) && (rn->p.family == AF_INET)) + || ((afi == AFI_IP6) && (rn->p.family == AF_INET6))) { + if (add) + zebra_add_import_table_entry(zvrf, rn, re, + rmap_name); + else + zebra_del_import_table_entry(zvrf, rn, re); + } + } + return 0; +} + +int zebra_import_table_config(struct vty *vty, vrf_id_t vrf_id) +{ + int i; + afi_t afi; + int write = 0; + char afi_str[AFI_MAX][10] = {"", "ip", "ipv6", "ethernet"}; + const char *rmap_name; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (i = 1; i < ZEBRA_KERNEL_TABLE_MAX; i++) { + if (!is_zebra_import_table_enabled(afi, vrf_id, i)) + continue; + + if (zebra_import_table_distance[afi][i] + != ZEBRA_TABLE_DISTANCE_DEFAULT) { + vty_out(vty, "%s import-table %d distance %d", + afi_str[afi], i, + zebra_import_table_distance[afi][i]); + } else { + vty_out(vty, "%s import-table %d", afi_str[afi], + i); + } + + rmap_name = zebra_get_import_table_route_map(afi, i); + if (rmap_name) + vty_out(vty, " route-map %s", rmap_name); + + vty_out(vty, "\n"); + write = 1; + } + } + + return write; +} + +static void zebra_import_table_rm_update_vrf_afi(struct zebra_vrf *zvrf, + afi_t afi, int table_id, + const char *rmap) +{ + struct route_table *table; + struct route_entry *re; + struct route_node *rn; + const char *rmap_name; + + rmap_name = zebra_get_import_table_route_map(afi, table_id); + if ((!rmap_name) || (strcmp(rmap_name, rmap) != 0)) + return; + + table = zebra_vrf_get_table_with_table_id(afi, SAFI_UNICAST, + zvrf->vrf->vrf_id, table_id); + if (!table) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: Table id=%d not found", __func__, + table_id); + return; + } + + for (rn = route_top(table); rn; rn = route_next(rn)) { + /* + * For each entry in the non-default routing table, + * add the entry in the main table + */ + if (!rn->info) + continue; + + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + break; + } + + if (!re) + continue; + + if (((afi == AFI_IP) && (rn->p.family == AF_INET)) + || ((afi == AFI_IP6) && (rn->p.family == AF_INET6))) + zebra_add_import_table_entry(zvrf, rn, re, rmap_name); + } + + return; +} + +static void zebra_import_table_rm_update_vrf(struct zebra_vrf *zvrf, + const char *rmap) +{ + afi_t afi; + int i; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (i = 1; i < ZEBRA_KERNEL_TABLE_MAX; i++) { + if (!is_zebra_import_table_enabled( + afi, zvrf->vrf->vrf_id, i)) + continue; + + zebra_import_table_rm_update_vrf_afi(zvrf, afi, i, + rmap); + } + } +} + +void zebra_import_table_rm_update(const char *rmap) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + + if (!zvrf) + continue; + + zebra_import_table_rm_update_vrf(zvrf, rmap); + } +} + +/* Interface parameters update */ +void zebra_interface_parameters_update(struct interface *ifp) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_INTERFACE_LINK_PARAMS %s vrf %s(%u)", + ifp->name, ifp->vrf->name, ifp->vrf->vrf_id); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_interface_link_params(client, ifp); + } +} diff --git a/zebra/redistribute.h b/zebra/redistribute.h new file mode 100644 index 0000000..ac257d6 --- /dev/null +++ b/zebra/redistribute.h @@ -0,0 +1,96 @@ +/* + * Redistribution Handler + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_REDISTRIBUTE_H +#define _ZEBRA_REDISTRIBUTE_H + +#include "table.h" +#include "vty.h" +#include "vrf.h" + +#include "zebra/zserv.h" +#include "zebra/rib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* ZAPI command handlers */ +extern void zebra_redistribute_add(ZAPI_HANDLER_ARGS); +extern void zebra_redistribute_delete(ZAPI_HANDLER_ARGS); +extern void zebra_redistribute_default_add(ZAPI_HANDLER_ARGS); +extern void zebra_redistribute_default_delete(ZAPI_HANDLER_ARGS); +/* ----------------- */ + +extern void redistribute_update(const struct route_node *rn, + const struct route_entry *re, + const struct route_entry *prev_re); +/* + * During a route delete, where 'new_re' is NULL, redist a delete to all + * clients registered for the type of 'old_re'. + * During a route update, redist a delete to any clients who will not see + * an update when the new route is installed. There are cases when a client + * may have seen a redist for 'old_re', but will not see + * the redist for 'new_re'. + */ +void redistribute_delete(const struct route_node *rn, + const struct route_entry *old_re, + const struct route_entry *new_re); + +extern void zebra_interface_up_update(struct interface *ifp); +extern void zebra_interface_down_update(struct interface *ifp); + +extern void zebra_interface_add_update(struct interface *ifp); +extern void zebra_interface_delete_update(struct interface *ifp); + +extern void zebra_interface_address_add_update(struct interface *ifp, + struct connected *c); +extern void zebra_interface_address_delete_update(struct interface *ifp, + struct connected *c); +extern void zebra_interface_parameters_update(struct interface *ifp); +extern void zebra_interface_vrf_update_del(struct interface *ifp, + vrf_id_t new_vrf_id); +extern void zebra_interface_vrf_update_add(struct interface *ifp, + vrf_id_t old_vrf_id); + +extern int zebra_import_table(afi_t afi, vrf_id_t vrf_id, + uint32_t table_id, uint32_t distance, + const char *rmap_name, int add); + +extern int zebra_add_import_table_entry(struct zebra_vrf *zvrf, + struct route_node *rn, + struct route_entry *re, + const char *rmap_name); +extern int zebra_del_import_table_entry(struct zebra_vrf *zvrf, + struct route_node *rn, + struct route_entry *re); +extern int is_zebra_import_table_enabled(afi_t, vrf_id_t vrf_id, + uint32_t table_id); + +extern int zebra_import_table_config(struct vty *, vrf_id_t vrf_id); + +extern void zebra_import_table_rm_update(const char *rmap); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_REDISTRIBUTE_H */ diff --git a/zebra/rib.h b/zebra/rib.h new file mode 100644 index 0000000..d6e4e78 --- /dev/null +++ b/zebra/rib.h @@ -0,0 +1,633 @@ +/* + * Routing Information Base header + * Copyright (C) 1997 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_RIB_H +#define _ZEBRA_RIB_H + +#include "zebra.h" +#include "memory.h" +#include "hook.h" +#include "typesafe.h" +#include "linklist.h" +#include "prefix.h" +#include "table.h" +#include "queue.h" +#include "nexthop.h" +#include "nexthop_group.h" +#include "vrf.h" +#include "if.h" +#include "mpls.h" +#include "srcdest_table.h" +#include "zebra/zebra_nhg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +DECLARE_MGROUP(ZEBRA); + +DECLARE_MTYPE(RE); + +PREDECL_LIST(rnh_list); + +/* Nexthop structure. */ +struct rnh { + uint8_t flags; + +#define ZEBRA_NHT_CONNECTED 0x1 +#define ZEBRA_NHT_DELETED 0x2 +#define ZEBRA_NHT_RESOLVE_VIA_DEFAULT 0x4 + + /* VRF identifier. */ + vrf_id_t vrf_id; + + afi_t afi; + safi_t safi; + + uint32_t seqno; + + struct route_entry *state; + struct prefix resolved_route; + struct list *client_list; + + /* pseudowires dependent on this nh */ + struct list *zebra_pseudowire_list; + + struct route_node *node; + + /* + * if this has been filtered for the client + */ + int filtered[ZEBRA_ROUTE_MAX]; + + struct rnh_list_item rnh_list_item; +}; + +#define DISTANCE_INFINITY 255 +#define ZEBRA_KERNEL_TABLE_MAX 252 /* support for no more than this rt tables */ + +PREDECL_LIST(re_list); + +struct re_opaque { + uint16_t length; + uint8_t data[]; +}; + +struct route_entry { + /* Link list. */ + struct re_list_item next; + + /* Nexthop group, shared/refcounted, based on the nexthop(s) + * provided by the owner of the route + */ + struct nhg_hash_entry *nhe; + + /* Nexthop group from FIB (optional), reflecting what is actually + * installed in the FIB if that differs. The 'backup' group is used + * when backup nexthops are present in the route's nhg. + */ + struct nexthop_group fib_ng; + struct nexthop_group fib_backup_ng; + + /* Nexthop group hash entry IDs. The "installed" id is the id + * used in linux/netlink, if available. + */ + uint32_t nhe_id; + uint32_t nhe_installed_id; + + /* Tag */ + route_tag_t tag; + + /* Uptime. */ + time_t uptime; + + /* Type of this route. */ + int type; + + /* VRF identifier. */ + vrf_id_t vrf_id; + + /* Which routing table */ + uint32_t table; + + /* Metric */ + uint32_t metric; + + /* MTU */ + uint32_t mtu; + uint32_t nexthop_mtu; + + /* Flags of this route. + * This flag's definition is in lib/zebra.h ZEBRA_FLAG_* and is exposed + * to clients via Zserv + */ + uint32_t flags; + + /* RIB internal status */ + uint32_t status; +#define ROUTE_ENTRY_REMOVED 0x1 +/* The Route Entry has changed */ +#define ROUTE_ENTRY_CHANGED 0x2 +/* The Label has changed on the Route entry */ +#define ROUTE_ENTRY_LABELS_CHANGED 0x4 +/* Route is queued for Installation into the Data Plane */ +#define ROUTE_ENTRY_QUEUED 0x8 +/* Route is installed into the Data Plane */ +#define ROUTE_ENTRY_INSTALLED 0x10 +/* Route has Failed installation into the Data Plane in some manner */ +#define ROUTE_ENTRY_FAILED 0x20 +/* Route has a 'fib' set of nexthops, probably because the installed set + * differs from the rib/normal set of nexthops. + */ +#define ROUTE_ENTRY_USE_FIB_NHG 0x40 + + /* Sequence value incremented for each dataplane operation */ + uint32_t dplane_sequence; + + /* Source protocol instance */ + uint16_t instance; + + /* Distance. */ + uint8_t distance; + + struct re_opaque *opaque; +}; + +#define RIB_SYSTEM_ROUTE(R) RSYSTEM_ROUTE((R)->type) + +#define RIB_KERNEL_ROUTE(R) RKERNEL_ROUTE((R)->type) + +/* meta-queue structure: + * sub-queue 0: nexthop group objects + * sub-queue 1: EVPN/VxLAN objects + * sub-queue 2: Early Route Processing + * sub-queue 3: Early Label Processing + * sub-queue 4: connected + * sub-queue 5: kernel + * sub-queue 6: static + * sub-queue 7: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP + * sub-queue 8: iBGP, eBGP + * sub-queue 9: any other origin (if any) typically those that + * don't generate routes + */ +#define MQ_SIZE 10 +struct meta_queue { + struct list *subq[MQ_SIZE]; + uint32_t size; /* sum of lengths of all subqueues */ +}; + +/* + * Structure that represents a single destination (prefix). + */ +typedef struct rib_dest_t_ { + + /* + * Back pointer to the route node for this destination. This helps + * us get to the prefix that this structure is for. + */ + struct route_node *rnode; + + /* + * Doubly-linked list of routes for this prefix. + */ + struct re_list_head routes; + + struct route_entry *selected_fib; + + /* + * Flags, see below. + */ + uint32_t flags; + + /* + * The list of nht prefixes that have ended up + * depending on this route node. + * After route processing is returned from + * the data plane we will run evaluate_rnh + * on these prefixes. + */ + struct rnh_list_head nht; + + /* + * Linkage to put dest on the FPM processing queue. + */ + TAILQ_ENTRY(rib_dest_t_) fpm_q_entries; + +} rib_dest_t; + +DECLARE_LIST(rnh_list, struct rnh, rnh_list_item); +DECLARE_LIST(re_list, struct route_entry, next); + +#define RIB_ROUTE_QUEUED(x) (1 << (x)) +// If MQ_SIZE is modified this value needs to be updated. +#define RIB_ROUTE_ANY_QUEUED 0x3F + +/* + * The maximum qindex that can be used. + */ +#define ZEBRA_MAX_QINDEX (MQ_SIZE - 1) + +/* + * This flag indicates that a given prefix has been 'advertised' to + * the FPM to be installed in the forwarding plane. + */ +#define RIB_DEST_SENT_TO_FPM (1 << (ZEBRA_MAX_QINDEX + 1)) + +/* + * This flag is set when we need to send an update to the FPM about a + * dest. + */ +#define RIB_DEST_UPDATE_FPM (1 << (ZEBRA_MAX_QINDEX + 2)) + +#define RIB_DEST_UPDATE_LSPS (1 << (ZEBRA_MAX_QINDEX + 3)) + +/* + * Macro to iterate over each route for a destination (prefix). + */ +#define RE_DEST_FOREACH_ROUTE(dest, re) \ + for ((re) = (dest) ? re_list_first(&((dest)->routes)) : NULL; (re); \ + (re) = re_list_next(&((dest)->routes), (re))) + +/* + * Same as above, but allows the current node to be unlinked. + */ +#define RE_DEST_FOREACH_ROUTE_SAFE(dest, re, next) \ + for ((re) = (dest) ? re_list_first(&((dest)->routes)) : NULL; \ + (re) && ((next) = re_list_next(&((dest)->routes), (re)), 1); \ + (re) = (next)) + +#define RE_DEST_FIRST_ROUTE(dest, re) \ + ((re) = (dest) ? re_list_first(&((dest)->routes)) : NULL) + +#define RE_DEST_NEXT_ROUTE(dest, re) \ + ((re) = (dest) ? re_list_next(&((dest)->routes), (re)) : NULL) + +#define RNODE_FOREACH_RE(rn, re) \ + RE_DEST_FOREACH_ROUTE (rib_dest_from_rnode(rn), re) + +#define RNODE_FOREACH_RE_SAFE(rn, re, next) \ + RE_DEST_FOREACH_ROUTE_SAFE (rib_dest_from_rnode(rn), re, next) + +#define RNODE_FIRST_RE(rn, re) RE_DEST_FIRST_ROUTE(rib_dest_from_rnode(rn), re) + +#define RNODE_NEXT_RE(rn, re) RE_DEST_NEXT_ROUTE(rib_dest_from_rnode(rn), re) + +/* + * rib_table_info_t + * + * Structure that is hung off of a route_table that holds information about + * the table. + */ +struct rib_table_info { + + /* + * Back pointer to zebra_vrf. + */ + struct zebra_vrf *zvrf; + afi_t afi; + safi_t safi; + uint32_t table_id; +}; + +enum rib_tables_iter_state { + RIB_TABLES_ITER_S_INIT, + RIB_TABLES_ITER_S_ITERATING, + RIB_TABLES_ITER_S_DONE +}; + +/* + * Structure that holds state for iterating over all tables in the + * Routing Information Base. + */ +typedef struct rib_tables_iter_t_ { + vrf_id_t vrf_id; + int afi_safi_ix; + + enum rib_tables_iter_state state; +} rib_tables_iter_t; + +/* Events/reasons triggering a RIB update. */ +enum rib_update_event { + RIB_UPDATE_KERNEL, + RIB_UPDATE_RMAP_CHANGE, + RIB_UPDATE_OTHER, + RIB_UPDATE_MAX +}; +void rib_update_finish(void); + +int route_entry_update_nhe(struct route_entry *re, + struct nhg_hash_entry *new_nhghe); + +/* NHG replace has happend, we have to update route_entry pointers to new one */ +void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry); + +#define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re) +extern void _route_entry_dump(const char *func, union prefixconstptr pp, + union prefixconstptr src_pp, + const struct route_entry *re); + +struct route_entry * +zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, uint8_t instance, + uint32_t flags, uint32_t nhe_id, uint32_t table_id, + uint32_t metric, uint32_t mtu, uint8_t distance, + route_tag_t tag); + +#define ZEBRA_RIB_LOOKUP_ERROR -1 +#define ZEBRA_RIB_FOUND_EXACT 0 +#define ZEBRA_RIB_FOUND_NOGATE 1 +#define ZEBRA_RIB_FOUND_CONNECTED 2 +#define ZEBRA_RIB_NOTFOUND 3 + +extern int is_zebra_valid_kernel_table(uint32_t table_id); +extern int is_zebra_main_routing_table(uint32_t table_id); +extern int zebra_check_addr(const struct prefix *p); + +extern void rib_delnode(struct route_node *rn, struct route_entry *re); +extern void rib_install_kernel(struct route_node *rn, struct route_entry *re, + struct route_entry *old); +extern void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re); + +/* NOTE: + * All rib_add function will not just add prefix into RIB, but + * also implicitly withdraw equal prefix of same type. */ +extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, + unsigned short instance, uint32_t flags, struct prefix *p, + struct prefix_ipv6 *src_p, const struct nexthop *nh, + uint32_t nhe_id, uint32_t table_id, uint32_t metric, + uint32_t mtu, uint8_t distance, route_tag_t tag, + bool startup); +/* + * Multipath route apis. + */ +extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, struct route_entry *re, + struct nexthop_group *ng, bool startup); +/* + * -1 -> some sort of error + * 0 -> an add + * 1 -> an update + */ +extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, + struct route_entry *re, + struct nhg_hash_entry *nhe, bool startup); + +extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, + unsigned short instance, uint32_t flags, + struct prefix *p, struct prefix_ipv6 *src_p, + const struct nexthop *nh, uint32_t nhe_id, + uint32_t table_id, uint32_t metric, uint8_t distance, + bool fromkernel); + +extern struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, + const union g_addr *addr, + struct route_node **rn_out); +extern struct route_entry *rib_match_ipv4_multicast(vrf_id_t vrf_id, + struct in_addr addr, + struct route_node **rn_out); +extern struct route_entry *rib_match_ipv6_multicast(vrf_id_t vrf_id, + struct in6_addr addr, + struct route_node **rn_out); + +extern struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, + vrf_id_t vrf_id); + +extern void rib_update(enum rib_update_event event); +extern void rib_update_table(struct route_table *table, + enum rib_update_event event, int rtype); +extern void rib_sweep_route(struct thread *t); +extern void rib_sweep_table(struct route_table *table); +extern void rib_close_table(struct route_table *table); +extern void rib_init(void); +extern unsigned long rib_score_proto(uint8_t proto, unsigned short instance); +extern unsigned long rib_score_proto_table(uint8_t proto, + unsigned short instance, + struct route_table *table); + +extern int rib_queue_add(struct route_node *rn); + +struct nhg_ctx; /* Forward declaration */ + +/* Enqueue incoming nhg from OS for processing */ +extern int rib_queue_nhg_ctx_add(struct nhg_ctx *ctx); + +/* Enqueue incoming nhg from proto daemon for processing */ +extern int rib_queue_nhe_add(struct nhg_hash_entry *nhe); + +/* Enqueue evpn route for processing */ +int zebra_rib_queue_evpn_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix); +int zebra_rib_queue_evpn_route_del(vrf_id_t vrf_id, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix); +/* Enqueue EVPN remote ES for processing */ +int zebra_rib_queue_evpn_rem_es_add(const esi_t *esi, + const struct in_addr *vtep_ip, + bool esr_rxed, uint8_t df_alg, + uint16_t df_pref); +int zebra_rib_queue_evpn_rem_es_del(const esi_t *esi, + const struct in_addr *vtep_ip); +/* Enqueue EVPN remote macip update for processing */ +int zebra_rib_queue_evpn_rem_macip_del(vni_t vni, const struct ethaddr *macaddr, + const struct ipaddr *ip, + struct in_addr vtep_ip); +int zebra_rib_queue_evpn_rem_macip_add(vni_t vni, const struct ethaddr *macaddr, + const struct ipaddr *ipaddr, + uint8_t flags, uint32_t seq, + struct in_addr vtep_ip, + const esi_t *esi); +/* Enqueue VXLAN remote vtep update for processing */ +int zebra_rib_queue_evpn_rem_vtep_add(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip, + int flood_control); +int zebra_rib_queue_evpn_rem_vtep_del(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip); + +extern void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf); +extern int zebra_rib_labeled_unicast(struct route_entry *re); +extern struct route_table *rib_table_ipv6; + +extern void rib_unlink(struct route_node *rn, struct route_entry *re); +extern int rib_gc_dest(struct route_node *rn); +extern struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter); + +extern uint8_t route_distance(int type); + +extern void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, + bool rt_delete); + +/* + * rib_find_rn_from_ctx + * + * Returns a lock increased route_node for the appropriate + * table and prefix specified by the context. Developer + * should unlock the node when done. + */ +extern struct route_node * +rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx); + +/* + * Inline functions. + */ + +/* + * rib_table_info + */ +static inline struct rib_table_info *rib_table_info(struct route_table *table) +{ + return (struct rib_table_info *)route_table_get_info(table); +} + +/* + * rib_dest_from_rnode + */ +static inline rib_dest_t *rib_dest_from_rnode(struct route_node *rn) +{ + return (rib_dest_t *)rn->info; +} + +/* + * rnode_to_ribs + * + * Returns a pointer to the list of routes corresponding to the given + * route_node. + */ +static inline struct route_entry *rnode_to_ribs(struct route_node *rn) +{ + rib_dest_t *dest; + + dest = rib_dest_from_rnode(rn); + if (!dest) + return NULL; + + return re_list_first(&dest->routes); +} + +/* + * rib_dest_prefix + */ +static inline struct prefix *rib_dest_prefix(rib_dest_t *dest) +{ + return &dest->rnode->p; +} + +/* + * rib_dest_af + * + * Returns the address family that the destination is for. + */ +static inline uint8_t rib_dest_af(rib_dest_t *dest) +{ + return dest->rnode->p.family; +} + +/* + * rib_dest_table + */ +static inline struct route_table *rib_dest_table(rib_dest_t *dest) +{ + return srcdest_rnode_table(dest->rnode); +} + +/* + * rib_dest_vrf + */ +static inline struct zebra_vrf *rib_dest_vrf(rib_dest_t *dest) +{ + return rib_table_info(rib_dest_table(dest))->zvrf; +} + +/* + * Create the rib_dest_t and attach it to the specified node + */ +extern rib_dest_t *zebra_rib_create_dest(struct route_node *rn); + +/* + * rib_tables_iter_init + */ +static inline void rib_tables_iter_init(rib_tables_iter_t *iter) + +{ + memset(iter, 0, sizeof(*iter)); + iter->state = RIB_TABLES_ITER_S_INIT; +} + +/* + * rib_tables_iter_started + * + * Returns true if this iterator has started iterating over the set of + * tables. + */ +static inline int rib_tables_iter_started(rib_tables_iter_t *iter) +{ + return iter->state != RIB_TABLES_ITER_S_INIT; +} + +/* + * rib_tables_iter_cleanup + */ +static inline void rib_tables_iter_cleanup(rib_tables_iter_t *iter) +{ + iter->state = RIB_TABLES_ITER_S_DONE; +} + +DECLARE_HOOK(rib_update, (struct route_node * rn, const char *reason), + (rn, reason)); +DECLARE_HOOK(rib_shutdown, (struct route_node * rn), (rn)); + +/* + * Access installed/fib nexthops, which may be a subset of the + * rib nexthops. + */ +static inline struct nexthop_group *rib_get_fib_nhg(struct route_entry *re) +{ + /* If the fib set is a subset of the active rib set, + * use the dedicated fib list. + */ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG)) + return &(re->fib_ng); + else + return &(re->nhe->nhg); +} + +/* + * Access backup nexthop-group that represents the installed backup nexthops; + * any installed backup will be on the fib list. + */ +static inline struct nexthop_group *rib_get_fib_backup_nhg( + struct route_entry *re) +{ + return &(re->fib_backup_ng); +} + +extern void zebra_vty_init(void); + +extern pid_t pid; + +extern bool v6_rr_semantics; + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_RIB_H */ diff --git a/zebra/router-id.c b/zebra/router-id.c new file mode 100644 index 0000000..9f56cf0 --- /dev/null +++ b/zebra/router-id.c @@ -0,0 +1,614 @@ +/* + * Router ID for zebra daemon. + * + * Copyright (C) 2004 James R. Leu + * + * This file is part of Quagga routing suite. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "if.h" +#include "vty.h" +#include "sockunion.h" +#include "prefix.h" +#include "stream.h" +#include "command.h" +#include "memory.h" +#include "ioctl.h" +#include "connected.h" +#include "network.h" +#include "log.h" +#include "table.h" +#include "rib.h" +#include "vrf.h" + +#include "zebra/zebra_router.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_vrf.h" +#include "zebra/router-id.h" +#include "zebra/redistribute.h" + +static struct connected *router_id_find_node(struct list *l, + struct connected *ifc) +{ + struct listnode *node; + struct connected *c; + + for (ALL_LIST_ELEMENTS_RO(l, node, c)) + if (prefix_same(ifc->address, c->address)) + return c; + + return NULL; +} + +static int router_id_bad_address(struct connected *ifc) +{ + /* non-redistributable addresses shouldn't be used for RIDs either */ + if (!zebra_check_addr(ifc->address)) + return 1; + + return 0; +} + +static bool router_id_v6_is_any(struct prefix *p) +{ + return memcmp(&p->u.prefix6, &in6addr_any, sizeof(struct in6_addr)) + == 0; +} + +int router_id_get(afi_t afi, struct prefix *p, struct zebra_vrf *zvrf) +{ + struct listnode *node; + struct connected *c; + struct in6_addr *addr = NULL; + + switch (afi) { + case AFI_IP: + p->u.prefix4.s_addr = INADDR_ANY; + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + if (zvrf->rid_user_assigned.u.prefix4.s_addr != INADDR_ANY) + p->u.prefix4.s_addr = + zvrf->rid_user_assigned.u.prefix4.s_addr; + else if (!list_isempty(zvrf->rid_lo_sorted_list)) { + node = listtail(zvrf->rid_lo_sorted_list); + c = listgetdata(node); + p->u.prefix4.s_addr = c->address->u.prefix4.s_addr; + } else if (!list_isempty(zvrf->rid_all_sorted_list)) { + node = listtail(zvrf->rid_all_sorted_list); + c = listgetdata(node); + p->u.prefix4.s_addr = c->address->u.prefix4.s_addr; + } + return 0; + case AFI_IP6: + p->u.prefix6 = in6addr_any; + p->family = AF_INET6; + p->prefixlen = IPV6_MAX_BITLEN; + if (!router_id_v6_is_any(&zvrf->rid6_user_assigned)) + addr = &zvrf->rid6_user_assigned.u.prefix6; + else if (!list_isempty(zvrf->rid6_lo_sorted_list)) { + node = listtail(zvrf->rid6_lo_sorted_list); + c = listgetdata(node); + addr = &c->address->u.prefix6; + } else if (!list_isempty(zvrf->rid6_all_sorted_list)) { + node = listtail(zvrf->rid6_all_sorted_list); + c = listgetdata(node); + addr = &c->address->u.prefix6; + } + if (addr) + memcpy(&p->u.prefix6, addr, sizeof(struct in6_addr)); + return 0; + default: + return -1; + } +} + +static int router_id_set(afi_t afi, struct prefix *p, struct zebra_vrf *zvrf) +{ + struct prefix after, before; + struct listnode *node; + struct zserv *client; + + router_id_get(afi, &before, zvrf); + + switch (afi) { + case AFI_IP: + zvrf->rid_user_assigned.u.prefix4.s_addr = p->u.prefix4.s_addr; + break; + case AFI_IP6: + zvrf->rid6_user_assigned.u.prefix6 = p->u.prefix6; + break; + default: + return -1; + } + + router_id_get(afi, &after, zvrf); + + /* + * If we've been told that the router-id is exactly the same + * do we need to really do anything here? + */ + if (prefix_same(&before, &after)) + return 0; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_router_id_update(client, afi, &after, zvrf->vrf->vrf_id); + + return 0; +} + +void router_id_add_address(struct connected *ifc) +{ + struct list *l = NULL; + struct listnode *node; + struct prefix before; + struct prefix after; + struct zserv *client; + struct zebra_vrf *zvrf = ifc->ifp->vrf->info; + afi_t afi; + struct list *rid_lo; + struct list *rid_all; + + if (router_id_bad_address(ifc)) + return; + + switch (ifc->address->family) { + case AF_INET: + afi = AFI_IP; + rid_lo = zvrf->rid_lo_sorted_list; + rid_all = zvrf->rid_all_sorted_list; + break; + case AF_INET6: + afi = AFI_IP6; + rid_lo = zvrf->rid6_lo_sorted_list; + rid_all = zvrf->rid6_all_sorted_list; + break; + default: + return; + } + + router_id_get(afi, &before, zvrf); + + l = if_is_loopback(ifc->ifp) ? rid_lo : rid_all; + + if (!router_id_find_node(l, ifc)) + listnode_add_sort(l, ifc); + + router_id_get(afi, &after, zvrf); + + if (prefix_same(&before, &after)) + return; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_router_id_update(client, afi, &after, zvrf_id(zvrf)); +} + +void router_id_del_address(struct connected *ifc) +{ + struct connected *c; + struct list *l; + struct prefix after; + struct prefix before; + struct listnode *node; + struct zserv *client; + struct zebra_vrf *zvrf = ifc->ifp->vrf->info; + afi_t afi; + struct list *rid_lo; + struct list *rid_all; + + if (router_id_bad_address(ifc)) + return; + + switch (ifc->address->family) { + case AF_INET: + afi = AFI_IP; + rid_lo = zvrf->rid_lo_sorted_list; + rid_all = zvrf->rid_all_sorted_list; + break; + case AF_INET6: + afi = AFI_IP6; + rid_lo = zvrf->rid6_lo_sorted_list; + rid_all = zvrf->rid6_all_sorted_list; + break; + default: + return; + } + + router_id_get(afi, &before, zvrf); + + if (if_is_loopback(ifc->ifp)) + l = rid_lo; + else + l = rid_all; + + if ((c = router_id_find_node(l, ifc))) + listnode_delete(l, c); + + router_id_get(afi, &after, zvrf); + + if (prefix_same(&before, &after)) + return; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_router_id_update(client, afi, &after, zvrf_id(zvrf)); +} + +void router_id_write(struct vty *vty, struct zebra_vrf *zvrf) +{ + char space[2]; + + memset(space, 0, sizeof(space)); + + if (zvrf_id(zvrf) != VRF_DEFAULT) + snprintf(space, sizeof(space), "%s", " "); + + if (zvrf->rid_user_assigned.u.prefix4.s_addr != INADDR_ANY) { + vty_out(vty, "%sip router-id %pI4\n", space, + &zvrf->rid_user_assigned.u.prefix4); + } + if (!router_id_v6_is_any(&zvrf->rid6_user_assigned)) { + vty_out(vty, "%sipv6 router-id %pI6\n", space, + &zvrf->rid_user_assigned.u.prefix6); + } +} + +DEFUN (ip_router_id, + ip_router_id_cmd, + "ip router-id A.B.C.D vrf NAME", + IP_STR + "Manually set the router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id; + struct zebra_vrf *zvrf; + + argv_find(argv, argc, "A.B.C.D", &idx); + + if (!inet_pton(AF_INET, argv[idx]->arg, &rid.u.prefix4)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV4_MAX_BITLEN; + rid.family = AF_INET; + + argv_find(argv, argc, "NAME", &idx); + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = vrf_info_lookup(vrf_id); + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (ip_router_id, + router_id_cmd, + "router-id A.B.C.D vrf NAME", + "Manually set the router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR); + +DEFUN (ipv6_router_id, + ipv6_router_id_cmd, + "ipv6 router-id X:X::X:X vrf NAME", + IPV6_STR + "Manually set the router-id\n" + "IPv6 address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id; + struct zebra_vrf *zvrf; + + argv_find(argv, argc, "X:X::X:X", &idx); + + if (!inet_pton(AF_INET6, argv[idx]->arg, &rid.u.prefix6)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV6_MAX_BITLEN; + rid.family = AF_INET6; + + argv_find(argv, argc, "NAME", &idx); + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = vrf_info_lookup(vrf_id); + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + + +DEFUN (ip_router_id_in_vrf, + ip_router_id_in_vrf_cmd, + "ip router-id A.B.C.D", + IP_STR + "Manually set the router-id\n" + "IP address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + int idx = 0; + struct prefix rid; + + argv_find(argv, argc, "A.B.C.D", &idx); + + if (!inet_pton(AF_INET, argv[idx]->arg, &rid.u.prefix4)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV4_MAX_BITLEN; + rid.family = AF_INET; + + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (ip_router_id_in_vrf, + router_id_in_vrf_cmd, + "router-id A.B.C.D", + "Manually set the router-id\n" + "IP address to use for router-id\n"); + +DEFUN (ipv6_router_id_in_vrf, + ipv6_router_id_in_vrf_cmd, + "ipv6 router-id X:X::X:X", + IP6_STR + "Manually set the IPv6 router-id\n" + "IPV6 address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + int idx = 0; + struct prefix rid; + + argv_find(argv, argc, "X:X::X:X", &idx); + + if (!inet_pton(AF_INET6, argv[idx]->arg, &rid.u.prefix6)) + return CMD_WARNING_CONFIG_FAILED; + + rid.prefixlen = IPV6_MAX_BITLEN; + rid.family = AF_INET6; + + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (no_ip_router_id, + no_ip_router_id_cmd, + "no ip router-id [A.B.C.D vrf NAME]", + NO_STR + IP_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id = VRF_DEFAULT; + struct zebra_vrf *zvrf; + + rid.u.prefix4.s_addr = 0; + rid.prefixlen = 0; + rid.family = AF_INET; + + if (argv_find(argv, argc, "NAME", &idx)) + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = vrf_info_lookup(vrf_id); + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (no_ip_router_id, + no_router_id_cmd, + "no router-id [A.B.C.D vrf NAME]", + NO_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n" + VRF_CMD_HELP_STR); + +DEFUN (no_ipv6_router_id, + no_ipv6_router_id_cmd, + "no ipv6 router-id [X:X::X:X vrf NAME]", + NO_STR + IPV6_STR + "Remove the manually configured IPv6 router-id\n" + "IPv6 address to use for router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + struct prefix rid; + vrf_id_t vrf_id = VRF_DEFAULT; + struct zebra_vrf *zvrf; + + memset(&rid, 0, sizeof(rid)); + rid.family = AF_INET; + + if (argv_find(argv, argc, "NAME", &idx)) + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + + zvrf = vrf_info_lookup(vrf_id); + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (no_ip_router_id_in_vrf, + no_ip_router_id_in_vrf_cmd, + "no ip router-id [A.B.C.D]", + NO_STR + IP_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + struct prefix rid; + + rid.u.prefix4.s_addr = 0; + rid.prefixlen = 0; + rid.family = AF_INET; + + router_id_set(AFI_IP, &rid, zvrf); + + return CMD_SUCCESS; +} + +ALIAS (no_ip_router_id_in_vrf, + no_router_id_in_vrf_cmd, + "no router-id [A.B.C.D]", + NO_STR + "Remove the manually configured router-id\n" + "IP address to use for router-id\n"); + +DEFUN (no_ipv6_router_id_in_vrf, + no_ipv6_router_id_in_vrf_cmd, + "no ipv6 router-id [X:X::X:X]", + NO_STR + IP6_STR + "Remove the manually configured IPv6 router-id\n" + "IPv6 address to use for router-id\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + struct prefix rid; + + memset(&rid, 0, sizeof(rid)); + rid.family = AF_INET; + + router_id_set(AFI_IP6, &rid, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (show_ip_router_id, + show_ip_router_id_cmd, + "show [ip|ipv6] router-id [vrf NAME]", + SHOW_STR + IP_STR + IPV6_STR + "Show the configured router-id\n" + VRF_CMD_HELP_STR) +{ + int idx = 0; + vrf_id_t vrf_id = VRF_DEFAULT; + struct zebra_vrf *zvrf; + const char *vrf_name = "default"; + char addr_name[INET6_ADDRSTRLEN]; + int is_ipv6 = 0; + + is_ipv6 = argv_find(argv, argc, "ipv6", &idx); + + if (argv_find(argv, argc, "NAME", &idx)) { + VRF_GET_ID(vrf_id, argv[idx]->arg, false); + vrf_name = argv[idx]->arg; + } + + zvrf = vrf_info_lookup(vrf_id); + + if (zvrf != NULL) { + if (is_ipv6) { + if (router_id_v6_is_any(&zvrf->rid6_user_assigned)) + return CMD_SUCCESS; + inet_ntop(AF_INET6, &zvrf->rid6_user_assigned.u.prefix6, + addr_name, sizeof(addr_name)); + } else { + if (zvrf->rid_user_assigned.u.prefix4.s_addr + == INADDR_ANY) + return CMD_SUCCESS; + inet_ntop(AF_INET, &zvrf->rid_user_assigned.u.prefix4, + addr_name, sizeof(addr_name)); + } + + vty_out(vty, "zebra:\n"); + vty_out(vty, " router-id %s vrf %s\n", addr_name, vrf_name); + } + + return CMD_SUCCESS; +} + +static int router_id_cmp(void *a, void *b) +{ + const struct connected *ifa = (const struct connected *)a; + const struct connected *ifb = (const struct connected *)b; + + return IPV4_ADDR_CMP(&ifa->address->u.prefix4.s_addr, + &ifb->address->u.prefix4.s_addr); +} + +static int router_id_v6_cmp(void *a, void *b) +{ + const struct connected *ifa = (const struct connected *)a; + const struct connected *ifb = (const struct connected *)b; + + return IPV6_ADDR_CMP(&ifa->address->u.prefix6, + &ifb->address->u.prefix6); +} + +void router_id_cmd_init(void) +{ + install_element(CONFIG_NODE, &ip_router_id_cmd); + install_element(CONFIG_NODE, &router_id_cmd); + install_element(CONFIG_NODE, &ipv6_router_id_cmd); + install_element(CONFIG_NODE, &no_ip_router_id_cmd); + install_element(CONFIG_NODE, &no_router_id_cmd); + install_element(CONFIG_NODE, &ip_router_id_in_vrf_cmd); + install_element(VRF_NODE, &ip_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &router_id_in_vrf_cmd); + install_element(VRF_NODE, &router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &ipv6_router_id_in_vrf_cmd); + install_element(VRF_NODE, &ipv6_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &no_ipv6_router_id_cmd); + install_element(CONFIG_NODE, &no_ip_router_id_in_vrf_cmd); + install_element(VRF_NODE, &no_ip_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &no_router_id_in_vrf_cmd); + install_element(VRF_NODE, &no_router_id_in_vrf_cmd); + install_element(CONFIG_NODE, &no_ipv6_router_id_in_vrf_cmd); + install_element(VRF_NODE, &no_ipv6_router_id_in_vrf_cmd); + install_element(VIEW_NODE, &show_ip_router_id_cmd); +} + +void router_id_init(struct zebra_vrf *zvrf) +{ + zvrf->rid_all_sorted_list = &zvrf->_rid_all_sorted_list; + zvrf->rid_lo_sorted_list = &zvrf->_rid_lo_sorted_list; + zvrf->rid6_all_sorted_list = &zvrf->_rid6_all_sorted_list; + zvrf->rid6_lo_sorted_list = &zvrf->_rid6_lo_sorted_list; + + memset(zvrf->rid_all_sorted_list, 0, + sizeof(zvrf->_rid_all_sorted_list)); + memset(zvrf->rid_lo_sorted_list, 0, sizeof(zvrf->_rid_lo_sorted_list)); + memset(&zvrf->rid_user_assigned, 0, sizeof(zvrf->rid_user_assigned)); + memset(zvrf->rid6_all_sorted_list, 0, + sizeof(zvrf->_rid6_all_sorted_list)); + memset(zvrf->rid6_lo_sorted_list, 0, + sizeof(zvrf->_rid6_lo_sorted_list)); + memset(&zvrf->rid6_user_assigned, 0, sizeof(zvrf->rid6_user_assigned)); + + zvrf->rid_all_sorted_list->cmp = router_id_cmp; + zvrf->rid_lo_sorted_list->cmp = router_id_cmp; + zvrf->rid6_all_sorted_list->cmp = router_id_v6_cmp; + zvrf->rid6_lo_sorted_list->cmp = router_id_v6_cmp; + + zvrf->rid_user_assigned.family = AF_INET; + zvrf->rid_user_assigned.prefixlen = IPV4_MAX_BITLEN; + zvrf->rid6_user_assigned.family = AF_INET6; + zvrf->rid6_user_assigned.prefixlen = IPV6_MAX_BITLEN; +} diff --git a/zebra/router-id.h b/zebra/router-id.h new file mode 100644 index 0000000..4a35f66 --- /dev/null +++ b/zebra/router-id.h @@ -0,0 +1,48 @@ +/* + * Router ID for zebra daemon. + * + * Copyright (C) 2004 James R. Leu + * + * This file is part of Quagga routing suite. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ROUTER_ID_H_ +#define _ROUTER_ID_H_ + +#include <zebra.h> + +#include "memory.h" +#include "prefix.h" +#include "zclient.h" +#include "if.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void router_id_add_address(struct connected *c); +extern void router_id_del_address(struct connected *c); +extern void router_id_init(struct zebra_vrf *zvrf); +extern void router_id_cmd_init(void); +extern void router_id_write(struct vty *vty, struct zebra_vrf *zvrf); +extern int router_id_get(afi_t afi, struct prefix *p, struct zebra_vrf *zvrf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/rt.h b/zebra/rt.h new file mode 100644 index 0000000..d8a22d2 --- /dev/null +++ b/zebra/rt.h @@ -0,0 +1,133 @@ +/* + * kernel routing table update prototype. + * Copyright (C) 1998 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_RT_H +#define _ZEBRA_RT_H + +#include "prefix.h" +#include "if.h" +#include "vlan.h" +#include "vxlan.h" +#include "zebra/rib.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_dplane.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define RKERNEL_ROUTE(type) ((type) == ZEBRA_ROUTE_KERNEL) + +#define RSYSTEM_ROUTE(type) \ + ((RKERNEL_ROUTE(type)) || (type) == ZEBRA_ROUTE_CONNECT) + +#ifndef HAVE_NETLINK +/* + * Update or delete a route, nexthop, LSP, pseudowire, or vxlan MAC from the + * kernel, using info from a dataplane context. + */ +extern enum zebra_dplane_result kernel_route_update( + struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_nexthop_update(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result kernel_lsp_update( + struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_pw_update(struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_pbr_rule_update(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_intf_update(struct zebra_dplane_ctx *ctx); + +extern enum zebra_dplane_result +kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx); +extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx); + +#endif /* !HAVE_NETLINK */ + +extern int kernel_neigh_update(int cmd, int ifindex, void *addr, char *lla, + int llalen, ns_id_t ns_id, uint8_t family, + bool permanent); +extern int kernel_neigh_register(vrf_id_t vrf_id, struct zserv *client, + bool reg); +extern int kernel_interface_set_master(struct interface *master, + struct interface *slave); + +extern int mpls_kernel_init(void); + +/* Global init and deinit for platform-/OS-specific things */ +void kernel_router_init(void); +void kernel_router_terminate(void); + +extern uint32_t kernel_get_speed(struct interface *ifp, int *error); +extern int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *mroute); + +/* + * Southbound Initialization routines to get initial starting + * state. + */ +extern void interface_list(struct zebra_ns *zns); +extern void kernel_init(struct zebra_ns *zns); +extern void kernel_terminate(struct zebra_ns *zns, bool complete); +extern void macfdb_read(struct zebra_ns *zns); +extern void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if); +extern void macfdb_read_specific_mac(struct zebra_ns *zns, + struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid); +extern void neigh_read(struct zebra_ns *zns); +extern void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *ifp); +extern void neigh_read_specific_ip(const struct ipaddr *ip, + struct interface *vlan_if); +extern void route_read(struct zebra_ns *zns); +extern int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip); +extern int kernel_del_mac_nh(uint32_t nh_id); +extern int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids); +extern int kernel_del_mac_nhg(uint32_t nhg_id); + +/* + * Message batching interface. + */ +extern void kernel_update_multi(struct dplane_ctx_q *ctx_list); + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_RT_H */ diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c new file mode 100644 index 0000000..e883033 --- /dev/null +++ b/zebra/rt_netlink.c @@ -0,0 +1,4720 @@ +/* Kernel routing table updates using netlink over GNU/Linux system. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +/* The following definition is to workaround an issue in the Linux kernel + * header files with redefinition of 'struct in6_addr' in both + * netinet/in.h and linux/in6.h. + * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html + */ +#define _LINUX_IN6_H + +#include <net/if_arp.h> +#include <linux/lwtunnel.h> +#include <linux/mpls_iptunnel.h> +#include <linux/seg6_iptunnel.h> +#include <linux/seg6_local.h> +#include <linux/neighbour.h> +#include <linux/rtnetlink.h> +#include <linux/nexthop.h> + +/* Hack for GNU libc version 2. */ +#ifndef MSG_TRUNC +#define MSG_TRUNC 0x20 +#endif /* MSG_TRUNC */ + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "plist.h" +#include "plist_int.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "rib.h" +#include "thread.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "vty.h" +#include "mpls.h" +#include "vxlan.h" +#include "printfrr.h" + +#include "zebra/zapi_msg.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_mpls.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_nhg.h" +#include "zebra/zebra_mroute.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_trace.h" +#include "zebra/zebra_neigh.h" + +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif + +/* Re-defining as I am unable to include <linux/if_bridge.h> which has the + * UAPI for MAC sync. */ +#ifndef _UAPI_LINUX_IF_BRIDGE_H +#define BR_SPH_LIST_SIZE 10 +#endif + +static vlanid_t filter_vlan = 0; + +/* We capture whether the current kernel supports nexthop ids; by + * default, we'll use them if possible. There's also a configuration + * available to _disable_ use of kernel nexthops. + */ +static bool supports_nh; + +struct gw_family_t { + uint16_t filler; + uint16_t family; + union g_addr gate; +}; + +static const char ipv4_ll_buf[16] = "169.254.0.1"; +static struct in_addr ipv4_ll; + +/* Is this a ipv4 over ipv6 route? */ +static bool is_route_v4_over_v6(unsigned char rtm_family, + enum nexthop_types_t nexthop_type) +{ + if (rtm_family == AF_INET + && (nexthop_type == NEXTHOP_TYPE_IPV6 + || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX)) + return true; + + return false; +} + +/* Helper to control use of kernel-level nexthop ids */ +static bool kernel_nexthops_supported(void) +{ + return (supports_nh && !vrf_is_backend_netns() + && zebra_nhg_kernel_nexthops_enabled()); +} + +/* + * Some people may only want to use NHGs created by protos and not + * implicitly created by Zebra. This check accounts for that. + */ +static bool proto_nexthops_only(void) +{ + return zebra_nhg_proto_nexthops_only(); +} + +/* Is this a proto created NHG? */ +static bool is_proto_nhg(uint32_t id, int type) +{ + /* If type is available, use it as the source of truth */ + if (type) { + if (type != ZEBRA_ROUTE_NHG) + return true; + return false; + } + + if (id >= ZEBRA_NHG_PROTO_LOWER) + return true; + + return false; +} + +/* + * The ipv4_ll data structure is used for all 5549 + * additions to the kernel. Let's figure out the + * correct value one time instead for every + * install/remove of a 5549 type route + */ +void rt_netlink_init(void) +{ + inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll); +} + +/* + * Mapping from dataplane neighbor flags to netlink flags + */ +static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags) +{ + uint8_t flags = 0; + + if (dplane_flags & DPLANE_NTF_EXT_LEARNED) + flags |= NTF_EXT_LEARNED; + if (dplane_flags & DPLANE_NTF_ROUTER) + flags |= NTF_ROUTER; + if (dplane_flags & DPLANE_NTF_USE) + flags |= NTF_USE; + + return flags; +} + +/* + * Mapping from dataplane neighbor state to netlink state + */ +static uint16_t neigh_state_to_netlink(uint16_t dplane_state) +{ + uint16_t state = 0; + + if (dplane_state & DPLANE_NUD_REACHABLE) + state |= NUD_REACHABLE; + if (dplane_state & DPLANE_NUD_STALE) + state |= NUD_STALE; + if (dplane_state & DPLANE_NUD_NOARP) + state |= NUD_NOARP; + if (dplane_state & DPLANE_NUD_PROBE) + state |= NUD_PROBE; + if (dplane_state & DPLANE_NUD_INCOMPLETE) + state |= NUD_INCOMPLETE; + if (dplane_state & DPLANE_NUD_PERMANENT) + state |= NUD_PERMANENT; + if (dplane_state & DPLANE_NUD_FAILED) + state |= NUD_FAILED; + + return state; +} + + +static inline bool is_selfroute(int proto) +{ + if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF) + || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA) + || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG) + || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP) + || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL) + || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP) + || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC) + || (proto == RTPROT_SRTE)) { + return true; + } + + return false; +} + +static inline int zebra2proto(int proto) +{ + switch (proto) { + case ZEBRA_ROUTE_BABEL: + proto = RTPROT_BABEL; + break; + case ZEBRA_ROUTE_BGP: + proto = RTPROT_BGP; + break; + case ZEBRA_ROUTE_OSPF: + case ZEBRA_ROUTE_OSPF6: + proto = RTPROT_OSPF; + break; + case ZEBRA_ROUTE_STATIC: + proto = RTPROT_ZSTATIC; + break; + case ZEBRA_ROUTE_ISIS: + proto = RTPROT_ISIS; + break; + case ZEBRA_ROUTE_RIP: + proto = RTPROT_RIP; + break; + case ZEBRA_ROUTE_RIPNG: + proto = RTPROT_RIPNG; + break; + case ZEBRA_ROUTE_NHRP: + proto = RTPROT_NHRP; + break; + case ZEBRA_ROUTE_EIGRP: + proto = RTPROT_EIGRP; + break; + case ZEBRA_ROUTE_LDP: + proto = RTPROT_LDP; + break; + case ZEBRA_ROUTE_SHARP: + proto = RTPROT_SHARP; + break; + case ZEBRA_ROUTE_PBR: + proto = RTPROT_PBR; + break; + case ZEBRA_ROUTE_OPENFABRIC: + proto = RTPROT_OPENFABRIC; + break; + case ZEBRA_ROUTE_SRTE: + proto = RTPROT_SRTE; + break; + case ZEBRA_ROUTE_TABLE: + case ZEBRA_ROUTE_NHG: + proto = RTPROT_ZEBRA; + break; + case ZEBRA_ROUTE_CONNECT: + case ZEBRA_ROUTE_KERNEL: + proto = RTPROT_KERNEL; + break; + default: + /* + * When a user adds a new protocol this will show up + * to let them know to do something about it. This + * is intentionally a warn because we should see + * this as part of development of a new protocol + */ + zlog_debug( + "%s: Please add this protocol(%d) to proper rt_netlink.c handling", + __func__, proto); + proto = RTPROT_ZEBRA; + break; + } + + return proto; +} + +static inline int proto2zebra(int proto, int family, bool is_nexthop) +{ + switch (proto) { + case RTPROT_BABEL: + proto = ZEBRA_ROUTE_BABEL; + break; + case RTPROT_BGP: + proto = ZEBRA_ROUTE_BGP; + break; + case RTPROT_OSPF: + proto = (family == AF_INET) ? ZEBRA_ROUTE_OSPF + : ZEBRA_ROUTE_OSPF6; + break; + case RTPROT_ISIS: + proto = ZEBRA_ROUTE_ISIS; + break; + case RTPROT_RIP: + proto = ZEBRA_ROUTE_RIP; + break; + case RTPROT_RIPNG: + proto = ZEBRA_ROUTE_RIPNG; + break; + case RTPROT_NHRP: + proto = ZEBRA_ROUTE_NHRP; + break; + case RTPROT_EIGRP: + proto = ZEBRA_ROUTE_EIGRP; + break; + case RTPROT_LDP: + proto = ZEBRA_ROUTE_LDP; + break; + case RTPROT_STATIC: + case RTPROT_ZSTATIC: + proto = ZEBRA_ROUTE_STATIC; + break; + case RTPROT_SHARP: + proto = ZEBRA_ROUTE_SHARP; + break; + case RTPROT_PBR: + proto = ZEBRA_ROUTE_PBR; + break; + case RTPROT_OPENFABRIC: + proto = ZEBRA_ROUTE_OPENFABRIC; + break; + case RTPROT_SRTE: + proto = ZEBRA_ROUTE_SRTE; + break; + case RTPROT_UNSPEC: + case RTPROT_REDIRECT: + case RTPROT_KERNEL: + case RTPROT_BOOT: + case RTPROT_GATED: + case RTPROT_RA: + case RTPROT_MRT: + case RTPROT_BIRD: + case RTPROT_DNROUTED: + case RTPROT_XORP: + case RTPROT_NTK: + case RTPROT_MROUTED: + case RTPROT_KEEPALIVED: + case RTPROT_OPENR: + proto = ZEBRA_ROUTE_KERNEL; + break; + case RTPROT_ZEBRA: + if (is_nexthop) { + proto = ZEBRA_ROUTE_NHG; + break; + } + /* Intentional fall thru */ + default: + /* + * When a user adds a new protocol this will show up + * to let them know to do something about it. This + * is intentionally a warn because we should see + * this as part of development of a new protocol + */ + zlog_debug( + "%s: Please add this protocol(%d) to proper rt_netlink.c handling", + __func__, proto); + proto = ZEBRA_ROUTE_KERNEL; + break; + } + return proto; +} + +/* +Pending: create an efficient table_id (in a tree/hash) based lookup) + */ +vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + zvrf = vrf->info; + if (zvrf == NULL) + continue; + /* case vrf with netns : match the netnsid */ + if (vrf_is_backend_netns()) { + if (ns_id == zvrf_id(zvrf)) + return zvrf_id(zvrf); + } else { + /* VRF is VRF_BACKEND_VRF_LITE */ + if (zvrf->table_id != table_id) + continue; + return zvrf_id(zvrf); + } + } + + return VRF_DEFAULT; +} + +/** + * @parse_encap_mpls() - Parses encapsulated mpls attributes + * @tb: Pointer to rtattr to look for nested items in. + * @labels: Pointer to store labels in. + * + * Return: Number of mpls labels found. + */ +static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels) +{ + struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0}; + mpls_lse_t *lses = NULL; + int num_labels = 0; + uint32_t ttl = 0; + uint32_t bos = 0; + uint32_t exp = 0; + mpls_label_t label = 0; + + netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb); + lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]); + while (!bos && num_labels < MPLS_MAX_LABELS) { + mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos); + labels[num_labels++] = label; + } + + return num_labels; +} + +static enum seg6local_action_t +parse_encap_seg6local(struct rtattr *tb, + struct seg6local_context *ctx) +{ + struct rtattr *tb_encap[SEG6_LOCAL_MAX + 1] = {}; + enum seg6local_action_t act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC; + + netlink_parse_rtattr_nested(tb_encap, SEG6_LOCAL_MAX, tb); + + if (tb_encap[SEG6_LOCAL_ACTION]) + act = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_ACTION]); + + if (tb_encap[SEG6_LOCAL_NH4]) + ctx->nh4 = *(struct in_addr *)RTA_DATA( + tb_encap[SEG6_LOCAL_NH4]); + + if (tb_encap[SEG6_LOCAL_NH6]) + ctx->nh6 = *(struct in6_addr *)RTA_DATA( + tb_encap[SEG6_LOCAL_NH6]); + + if (tb_encap[SEG6_LOCAL_TABLE]) + ctx->table = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_TABLE]); + + if (tb_encap[SEG6_LOCAL_VRFTABLE]) + ctx->table = + *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_VRFTABLE]); + + return act; +} + +static int parse_encap_seg6(struct rtattr *tb, struct in6_addr *segs) +{ + struct rtattr *tb_encap[SEG6_IPTUNNEL_MAX + 1] = {}; + struct seg6_iptunnel_encap *ipt = NULL; + struct in6_addr *segments = NULL; + + netlink_parse_rtattr_nested(tb_encap, SEG6_IPTUNNEL_MAX, tb); + + /* + * TODO: It's not support multiple SID list. + */ + if (tb_encap[SEG6_IPTUNNEL_SRH]) { + ipt = (struct seg6_iptunnel_encap *) + RTA_DATA(tb_encap[SEG6_IPTUNNEL_SRH]); + segments = ipt->srh[0].segments; + *segs = segments[0]; + return 1; + } + + return 0; +} + + +static struct nexthop +parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb, + enum blackhole_type bh_type, int index, void *prefsrc, + void *gate, afi_t afi, vrf_id_t vrf_id) +{ + struct interface *ifp = NULL; + struct nexthop nh = {0}; + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC; + struct seg6local_context seg6l_ctx = {}; + struct in6_addr seg6_segs = {}; + int num_segs = 0; + + vrf_id_t nh_vrf_id = vrf_id; + size_t sz = (afi == AFI_IP) ? 4 : 16; + + if (bh_type == BLACKHOLE_UNSPEC) { + if (index && !gate) + nh.type = NEXTHOP_TYPE_IFINDEX; + else if (index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX + : NEXTHOP_TYPE_IPV6_IFINDEX; + else if (!index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4 + : NEXTHOP_TYPE_IPV6; + else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + } else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + nh.ifindex = index; + if (prefsrc) + memcpy(&nh.src, prefsrc, sz); + if (gate) + memcpy(&nh.gate, gate, sz); + + if (index) { + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index); + if (ifp) + nh_vrf_id = ifp->vrf->vrf_id; + } + nh.vrf_id = nh_vrf_id; + + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels); + } + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6_LOCAL) { + seg6l_act = parse_encap_seg6local(tb[RTA_ENCAP], &seg6l_ctx); + } + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6) { + num_segs = parse_encap_seg6(tb[RTA_ENCAP], &seg6_segs); + } + + if (rtm->rtm_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + + if (rtm->rtm_flags & RTNH_F_LINKDOWN) + SET_FLAG(nh.flags, NEXTHOP_FLAG_LINKDOWN); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels); + + if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) + nexthop_add_srv6_seg6local(&nh, seg6l_act, &seg6l_ctx); + + if (num_segs) + nexthop_add_srv6_seg6(&nh, &seg6_segs); + + return nh; +} + +static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id, + struct nexthop_group *ng, + struct rtmsg *rtm, + struct rtnexthop *rtnh, + struct rtattr **tb, + void *prefsrc, vrf_id_t vrf_id) +{ + void *gate = NULL; + struct interface *ifp = NULL; + int index = 0; + /* MPLS labels */ + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC; + struct seg6local_context seg6l_ctx = {}; + struct in6_addr seg6_segs = {}; + int num_segs = 0; + struct rtattr *rtnh_tb[RTA_MAX + 1] = {}; + + int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + vrf_id_t nh_vrf_id = vrf_id; + + for (;;) { + struct nexthop *nh = NULL; + + if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len) + break; + + index = rtnh->rtnh_ifindex; + if (index) { + /* + * Yes we are looking this up + * for every nexthop and just + * using the last one looked + * up right now + */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + index); + if (ifp) + nh_vrf_id = ifp->vrf->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT", + __func__, index); + nh_vrf_id = VRF_DEFAULT; + } + } else + nh_vrf_id = vrf_id; + + if (rtnh->rtnh_len > sizeof(*rtnh)) { + netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh), + rtnh->rtnh_len - sizeof(*rtnh)); + if (rtnh_tb[RTA_GATEWAY]) + gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]); + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls( + rtnh_tb[RTA_ENCAP], labels); + } + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6_LOCAL) { + seg6l_act = parse_encap_seg6local( + rtnh_tb[RTA_ENCAP], &seg6l_ctx); + } + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_SEG6) { + num_segs = parse_encap_seg6(rtnh_tb[RTA_ENCAP], + &seg6_segs); + } + } + + if (gate && rtm->rtm_family == AF_INET) { + if (index) + nh = nexthop_from_ipv4_ifindex( + gate, prefsrc, index, nh_vrf_id); + else + nh = nexthop_from_ipv4(gate, prefsrc, + nh_vrf_id); + } else if (gate && rtm->rtm_family == AF_INET6) { + if (index) + nh = nexthop_from_ipv6_ifindex( + gate, index, nh_vrf_id); + else + nh = nexthop_from_ipv6(gate, nh_vrf_id); + } else + nh = nexthop_from_ifindex(index, nh_vrf_id); + + if (nh) { + nh->weight = rtnh->rtnh_hops + 1; + + if (num_labels) + nexthop_add_labels(nh, ZEBRA_LSP_STATIC, + num_labels, labels); + + if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) + nexthop_add_srv6_seg6local(nh, seg6l_act, + &seg6l_ctx); + + if (num_segs) + nexthop_add_srv6_seg6(nh, &seg6_segs); + + if (rtnh->rtnh_flags & RTNH_F_ONLINK) + SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK); + + /* Add to temporary list */ + nexthop_group_add_sorted(ng, nh); + } + + if (rtnh->rtnh_len == 0) + break; + + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + + uint8_t nhop_num = nexthop_group_nexthop_num(ng); + + return nhop_num; +} + +/* Looking up routing table by netlink interface. */ +static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, + int startup) +{ + int len; + struct rtmsg *rtm; + struct rtattr *tb[RTA_MAX + 1]; + uint32_t flags = 0; + struct prefix p; + struct prefix_ipv6 src_p = {}; + vrf_id_t vrf_id; + bool selfroute; + + char anyaddr[16] = {0}; + + int proto = ZEBRA_ROUTE_KERNEL; + int index = 0; + int table; + int metric = 0; + uint32_t mtu = 0; + uint8_t distance = 0; + route_tag_t tag = 0; + uint32_t nhe_id = 0; + + void *dest = NULL; + void *gate = NULL; + void *prefsrc = NULL; /* IPv4 preferred source host address */ + void *src = NULL; /* IPv6 srcdest source prefix */ + enum blackhole_type bh_type = BLACKHOLE_UNSPEC; + + frrtrace(3, frr_zebra, netlink_route_change_read_unicast, h, ns_id, + startup); + + rtm = NLMSG_DATA(h); + + if (startup && h->nlmsg_type != RTM_NEWROUTE) + return 0; + switch (rtm->rtm_type) { + case RTN_UNICAST: + break; + case RTN_BLACKHOLE: + bh_type = BLACKHOLE_NULL; + break; + case RTN_UNREACHABLE: + bh_type = BLACKHOLE_REJECT; + break; + case RTN_PROHIBIT: + bh_type = BLACKHOLE_ADMINPROHIB; + break; + default: + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Route rtm_type: %s(%d) intentionally ignoring", + nl_rttype_to_str(rtm->rtm_type), + rtm->rtm_type); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct rtmsg))); + return -1; + } + + netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len); + + if (rtm->rtm_flags & RTM_F_CLONED) + return 0; + if (rtm->rtm_protocol == RTPROT_REDIRECT) + return 0; + if (rtm->rtm_protocol == RTPROT_KERNEL) + return 0; + + selfroute = is_selfroute(rtm->rtm_protocol); + + if (!startup && selfroute + && h->nlmsg_type == RTM_NEWROUTE + && !zrouter.asic_offloaded) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Route type: %d Received that we think we have originated, ignoring", + rtm->rtm_protocol); + return 0; + } + + /* We don't care about change notifications for the MPLS table. */ + /* TODO: Revisit this. */ + if (rtm->rtm_family == AF_MPLS) + return 0; + + /* Table corresponding to route. */ + if (tb[RTA_TABLE]) + table = *(int *)RTA_DATA(tb[RTA_TABLE]); + else + table = rtm->rtm_table; + + /* Map to VRF */ + vrf_id = vrf_lookup_by_table(table, ns_id); + if (vrf_id == VRF_DEFAULT) { + if (!is_zebra_valid_kernel_table(table) + && !is_zebra_main_routing_table(table)) + return 0; + } + + if (rtm->rtm_flags & RTM_F_TRAP) + flags |= ZEBRA_FLAG_TRAPPED; + if (rtm->rtm_flags & RTM_F_OFFLOAD) + flags |= ZEBRA_FLAG_OFFLOADED; + if (rtm->rtm_flags & RTM_F_OFFLOAD_FAILED) + flags |= ZEBRA_FLAG_OFFLOAD_FAILED; + + /* Route which inserted by Zebra. */ + if (selfroute) { + flags |= ZEBRA_FLAG_SELFROUTE; + proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false); + } + if (tb[RTA_OIF]) + index = *(int *)RTA_DATA(tb[RTA_OIF]); + + if (tb[RTA_DST]) + dest = RTA_DATA(tb[RTA_DST]); + else + dest = anyaddr; + + if (tb[RTA_SRC]) + src = RTA_DATA(tb[RTA_SRC]); + else + src = anyaddr; + + if (tb[RTA_PREFSRC]) + prefsrc = RTA_DATA(tb[RTA_PREFSRC]); + + if (tb[RTA_GATEWAY]) + gate = RTA_DATA(tb[RTA_GATEWAY]); + + if (tb[RTA_NH_ID]) + nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]); + + if (tb[RTA_PRIORITY]) + metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]); + +#if defined(SUPPORT_REALMS) + if (tb[RTA_FLOW]) + tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]); +#endif + + if (tb[RTA_METRICS]) { + struct rtattr *mxrta[RTAX_MAX + 1]; + + netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]), + RTA_PAYLOAD(tb[RTA_METRICS])); + + if (mxrta[RTAX_MTU]) + mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]); + } + + if (rtm->rtm_family == AF_INET) { + p.family = AF_INET; + if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) { + zlog_err( + "Invalid destination prefix length: %u received from kernel route change", + rtm->rtm_dst_len); + return -1; + } + memcpy(&p.u.prefix4, dest, 4); + p.prefixlen = rtm->rtm_dst_len; + + if (rtm->rtm_src_len != 0) { + flog_warn( + EC_ZEBRA_UNSUPPORTED_V4_SRCDEST, + "unsupported IPv4 sourcedest route (dest %pFX vrf %u)", + &p, vrf_id); + return 0; + } + + /* Force debug below to not display anything for source */ + src_p.prefixlen = 0; + } else if (rtm->rtm_family == AF_INET6) { + p.family = AF_INET6; + if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) { + zlog_err( + "Invalid destination prefix length: %u received from kernel route change", + rtm->rtm_dst_len); + return -1; + } + memcpy(&p.u.prefix6, dest, 16); + p.prefixlen = rtm->rtm_dst_len; + + src_p.family = AF_INET6; + if (rtm->rtm_src_len > IPV6_MAX_BITLEN) { + zlog_err( + "Invalid source prefix length: %u received from kernel route change", + rtm->rtm_src_len); + return -1; + } + memcpy(&src_p.prefix, src, 16); + src_p.prefixlen = rtm->rtm_src_len; + } else { + /* We only handle the AFs we handle... */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: unknown address-family %u", __func__, + rtm->rtm_family); + return 0; + } + + /* + * For ZEBRA_ROUTE_KERNEL types: + * + * The metric/priority of the route received from the kernel + * is a 32 bit number. We are going to interpret the high + * order byte as the Admin Distance and the low order 3 bytes + * as the metric. + * + * This will allow us to do two things: + * 1) Allow the creation of kernel routes that can be + * overridden by zebra. + * 2) Allow the old behavior for 'most' kernel route types + * if a user enters 'ip route ...' v4 routes get a metric + * of 0 and v6 routes get a metric of 1024. Both of these + * values will end up with a admin distance of 0, which + * will cause them to win for the purposes of zebra. + */ + if (proto == ZEBRA_ROUTE_KERNEL) { + distance = (metric >> 24) & 0xFF; + metric = (metric & 0x00FFFFFF); + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + char buf2[PREFIX_STRLEN]; + + zlog_debug( + "%s %pFX%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d", + nl_msg_type_to_str(h->nlmsg_type), &p, + src_p.prefixlen ? " from " : "", + src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2)) + : "", + vrf_id_to_name(vrf_id), vrf_id, table, metric, + distance); + } + + afi_t afi = AFI_IP; + if (rtm->rtm_family == AF_INET6) + afi = AFI_IP6; + + if (h->nlmsg_type == RTM_NEWROUTE) { + struct route_entry *re; + struct nexthop_group *ng = NULL; + + re = zebra_rib_route_entry_new(vrf_id, proto, 0, flags, nhe_id, + table, metric, mtu, distance, + tag); + if (!nhe_id) + ng = nexthop_group_new(); + + if (!tb[RTA_MULTIPATH]) { + struct nexthop *nexthop, nh; + + if (!nhe_id) { + nh = parse_nexthop_unicast( + ns_id, rtm, tb, bh_type, index, prefsrc, + gate, afi, vrf_id); + + nexthop = nexthop_new(); + *nexthop = nh; + nexthop_group_add_sorted(ng, nexthop); + } + } else { + /* This is a multipath route */ + struct rtnexthop *rtnh = + (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]); + + if (!nhe_id) { + uint8_t nhop_num; + + /* Use temporary list of nexthops; parse + * message payload's nexthops. + */ + nhop_num = + parse_multipath_nexthops_unicast( + ns_id, ng, rtm, rtnh, tb, + prefsrc, vrf_id); + + zserv_nexthop_num_warn( + __func__, (const struct prefix *)&p, + nhop_num); + + if (nhop_num == 0) { + nexthop_group_delete(&ng); + ng = NULL; + } + } + } + if (nhe_id || ng) + rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p, re, ng, + startup); + else { + /* + * I really don't see how this is possible + * but since we are testing for it let's + * let the end user know why the route + * that was just received was swallowed + * up and forgotten + */ + zlog_err( + "%s: %pFX multipath RTM_NEWROUTE has a invalid nexthop group from the kernel", + __func__, &p); + XFREE(MTYPE_RE, re); + } + } else { + if (nhe_id) { + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, + &p, &src_p, NULL, nhe_id, table, metric, + distance, true); + } else { + if (!tb[RTA_MULTIPATH]) { + struct nexthop nh; + + nh = parse_nexthop_unicast( + ns_id, rtm, tb, bh_type, index, prefsrc, + gate, afi, vrf_id); + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, &nh, 0, table, + metric, distance, true); + } else { + /* XXX: need to compare the entire list of + * nexthops here for NLM_F_APPEND stupidity */ + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, NULL, 0, table, + metric, distance, true); + } + } + } + + return 0; +} + +static struct mcast_route_data *mroute = NULL; + +static int netlink_route_change_read_multicast(struct nlmsghdr *h, + ns_id_t ns_id, int startup) +{ + int len; + struct rtmsg *rtm; + struct rtattr *tb[RTA_MAX + 1]; + struct mcast_route_data *m; + int iif = 0; + int count; + int oif[256]; + int oif_count = 0; + char oif_list[256] = "\0"; + vrf_id_t vrf; + int table; + + assert(mroute); + m = mroute; + + rtm = NLMSG_DATA(h); + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + + netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len); + + if (tb[RTA_TABLE]) + table = *(int *)RTA_DATA(tb[RTA_TABLE]); + else + table = rtm->rtm_table; + + vrf = vrf_lookup_by_table(table, ns_id); + + if (tb[RTA_IIF]) + iif = *(int *)RTA_DATA(tb[RTA_IIF]); + + if (tb[RTA_SRC]) { + if (rtm->rtm_family == RTNL_FAMILY_IPMR) + m->src.ipaddr_v4 = + *(struct in_addr *)RTA_DATA(tb[RTA_SRC]); + else + m->src.ipaddr_v6 = + *(struct in6_addr *)RTA_DATA(tb[RTA_SRC]); + } + + if (tb[RTA_DST]) { + if (rtm->rtm_family == RTNL_FAMILY_IPMR) + m->grp.ipaddr_v4 = + *(struct in_addr *)RTA_DATA(tb[RTA_DST]); + else + m->grp.ipaddr_v6 = + *(struct in6_addr *)RTA_DATA(tb[RTA_DST]); + } + + if (tb[RTA_EXPIRES]) + m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]); + + if (tb[RTA_MULTIPATH]) { + struct rtnexthop *rtnh = + (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]); + + len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + for (;;) { + if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len) + break; + + oif[oif_count] = rtnh->rtnh_ifindex; + oif_count++; + + if (rtnh->rtnh_len == 0) + break; + + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + } + + if (rtm->rtm_family == RTNL_FAMILY_IPMR) { + SET_IPADDR_V4(&m->src); + SET_IPADDR_V4(&m->grp); + } else if (rtm->rtm_family == RTNL_FAMILY_IP6MR) { + SET_IPADDR_V6(&m->src); + SET_IPADDR_V6(&m->grp); + } else { + zlog_warn("%s: Invalid rtm_family received", __func__); + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + struct interface *ifp = NULL; + struct zebra_vrf *zvrf = NULL; + + for (count = 0; count < oif_count; count++) { + ifp = if_lookup_by_index(oif[count], vrf); + char temp[256]; + + snprintf(temp, sizeof(temp), "%s(%d) ", + ifp ? ifp->name : "Unknown", oif[count]); + strlcat(oif_list, temp, sizeof(oif_list)); + } + zvrf = zebra_vrf_lookup_by_id(vrf); + ifp = if_lookup_by_index(iif, vrf); + zlog_debug( + "MCAST VRF: %s(%d) %s (%pIA,%pIA) IIF: %s(%d) OIF: %s jiffies: %lld", + zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type), + &m->src, &m->grp, ifp ? ifp->name : "Unknown", iif, + oif_list, m->lastused); + } + return 0; +} + +int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct rtmsg *rtm; + + rtm = NLMSG_DATA(h); + + if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) { + /* If this is not route add/delete message print warning. */ + zlog_debug("Kernel message: %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), ns_id); + return 0; + } + + switch (rtm->rtm_family) { + case AF_INET: + case AF_INET6: + break; + + case RTNL_FAMILY_IPMR: + case RTNL_FAMILY_IP6MR: + /* notifications on IPMR are irrelevant to zebra, we only care + * about responses to RTM_GETROUTE requests we sent. + */ + return 0; + + default: + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel route change: %s", + rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + /* Connected route. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s %s %s proto %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(rtm->rtm_family), + nl_rttype_to_str(rtm->rtm_type), + nl_rtproto_to_str(rtm->rtm_protocol), ns_id); + + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct rtmsg))); + return -1; + } + + /* these are "magic" kernel-managed *unicast* routes used for + * outputting locally generated multicast traffic (which uses unicast + * handling on Linux because ~reasons~. + */ + if (rtm->rtm_type == RTN_MULTICAST) + return 0; + + netlink_route_change_read_unicast(h, ns_id, startup); + return 0; +} + +/* Request for specific route information from the kernel */ +static int netlink_request_route(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct rtmsg rtm; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.rtm.rtm_family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* Routing table read function using netlink interface. Only called + bootstrap time. */ +int netlink_route_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get IPv4 routing table. */ + ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_route_change_read_unicast, + &zns->netlink_cmd, &dp_info, 0, true); + if (ret < 0) + return ret; + + /* Get IPv6 routing table. */ + ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_route_change_read_unicast, + &zns->netlink_cmd, &dp_info, 0, true); + if (ret < 0) + return ret; + + return 0; +} + +/* + * The function returns true if the gateway info could be added + * to the message, otherwise false is returned. + */ +static bool _netlink_route_add_gateway_info(uint8_t route_family, + uint8_t gw_family, + struct nlmsghdr *nlmsg, + size_t req_size, int bytelen, + const struct nexthop *nexthop) +{ + if (route_family == AF_MPLS) { + struct gw_family_t gw_fam; + + gw_fam.family = gw_family; + if (gw_family == AF_INET) + memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen); + else + memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen); + if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family, + bytelen + 2)) + return false; + } else { + if (!(nexthop->rparent + && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) { + if (gw_family == AF_INET) { + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, + &nexthop->gate.ipv4, bytelen)) + return false; + } else { + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, + &nexthop->gate.ipv6, bytelen)) + return false; + } + } + } + + return true; +} + +static int build_label_stack(struct mpls_label_stack *nh_label, + mpls_lse_t *out_lse, char *label_buf, + size_t label_buf_size) +{ + char label_buf1[20]; + int num_labels = 0; + + for (int i = 0; nh_label && i < nh_label->num_labels; i++) { + if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) + continue; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (!num_labels) + snprintf(label_buf, label_buf_size, "label %u", + nh_label->label[i]); + else { + snprintf(label_buf1, sizeof(label_buf1), "/%u", + nh_label->label[i]); + strlcat(label_buf, label_buf1, label_buf_size); + } + } + + out_lse[num_labels] = + mpls_lse_encode(nh_label->label[i], 0, 0, 0); + num_labels++; + } + + return num_labels; +} + +static bool _netlink_route_encode_label_info(struct mpls_label_stack *nh_label, + struct nlmsghdr *nlmsg, + size_t buflen, struct rtmsg *rtmsg, + char *label_buf, + size_t label_buf_size) +{ + mpls_lse_t out_lse[MPLS_MAX_LABELS]; + int num_labels; + + /* + * label_buf is *only* currently used within debugging. + * As such when we assign it we are guarding it inside + * a debug test. If you want to change this make sure + * you fix this assumption + */ + label_buf[0] = '\0'; + + num_labels = + build_label_stack(nh_label, out_lse, label_buf, label_buf_size); + + if (num_labels) { + /* Set the BoS bit */ + out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT); + + if (rtmsg->rtm_family == AF_MPLS) { + if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse, + num_labels * sizeof(mpls_lse_t))) + return false; + } else { + struct rtattr *nest; + + if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_MPLS)) + return false; + + nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP); + if (!nest) + return false; + + if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST, + &out_lse, + num_labels * sizeof(mpls_lse_t))) + return false; + nl_attr_nest_end(nlmsg, nest); + } + } + + return true; +} + +static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop, + int family, + struct nlmsghdr *nlmsg, + size_t buflen, int bytelen) +{ + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->rmap_src.ipv4, bytelen)) + return false; + } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->src.ipv4, bytelen)) + return false; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->rmap_src.ipv6, bytelen)) + return false; + } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) { + if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC, + &nexthop->src.ipv6, bytelen)) + return false; + } + } + + return true; +} + +static ssize_t fill_seg6ipt_encap(char *buffer, size_t buflen, + const struct in6_addr *seg) +{ + struct seg6_iptunnel_encap *ipt; + struct ipv6_sr_hdr *srh; + const size_t srhlen = 24; + + /* + * Caution: Support only SINGLE-SID, not MULTI-SID + * This function only supports the case where segs represents + * a single SID. If you want to extend the SRv6 functionality, + * you should improve the Boundary Check. + * Ex. In case of set a SID-List include multiple-SIDs as an + * argument of the Transit Behavior, we must support variable + * boundary check for buflen. + */ + if (buflen < (sizeof(struct seg6_iptunnel_encap) + + sizeof(struct ipv6_sr_hdr) + 16)) + return -1; + + memset(buffer, 0, buflen); + + ipt = (struct seg6_iptunnel_encap *)buffer; + ipt->mode = SEG6_IPTUN_MODE_ENCAP; + srh = ipt->srh; + srh->hdrlen = (srhlen >> 3) - 1; + srh->type = 4; + srh->segments_left = 0; + srh->first_segment = 0; + memcpy(&srh->segments[0], seg, sizeof(struct in6_addr)); + + return srhlen + 4; +} + +/* This function takes a nexthop as argument and adds + * the appropriate netlink attributes to an existing + * netlink message. + * + * @param routedesc: Human readable description of route type + * (direct/recursive, single-/multipath) + * @param bytelen: Length of addresses in bytes. + * @param nexthop: Nexthop information + * @param nlmsg: nlmsghdr structure to fill in. + * @param req_size: The size allocated for the message. + * + * The function returns true if the nexthop could be added + * to the message, otherwise false is returned. + */ +static bool _netlink_route_build_singlepath(const struct prefix *p, + const char *routedesc, int bytelen, + const struct nexthop *nexthop, + struct nlmsghdr *nlmsg, + struct rtmsg *rtmsg, + size_t req_size, int cmd) +{ + + char label_buf[256]; + struct vrf *vrf; + char addrstr[INET6_ADDRSTRLEN]; + + assert(nexthop); + + vrf = vrf_lookup_by_id(nexthop->vrf_id); + + if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg, + req_size, rtmsg, label_buf, + sizeof(label_buf))) + return false; + + if (nexthop->nh_srv6) { + if (nexthop->nh_srv6->seg6local_action != + ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) { + struct rtattr *nest; + const struct seg6local_context *ctx; + + ctx = &nexthop->nh_srv6->seg6local_ctx; + if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_SEG6_LOCAL)) + return false; + + nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP); + if (!nest) + return false; + + switch (nexthop->nh_srv6->seg6local_action) { + case ZEBRA_SEG6_LOCAL_ACTION_END: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_X: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_X)) + return false; + if (!nl_attr_put(nlmsg, req_size, + SEG6_LOCAL_NH6, &ctx->nh6, + sizeof(struct in6_addr))) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_T: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_T)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_TABLE, + ctx->table)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DX4: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DX4)) + return false; + if (!nl_attr_put(nlmsg, req_size, + SEG6_LOCAL_NH4, &ctx->nh4, + sizeof(struct in_addr))) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DT6: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT6)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_TABLE, + ctx->table)) + return false; + break; + case ZEBRA_SEG6_LOCAL_ACTION_END_DT4: + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT4)) + return false; + if (!nl_attr_put32(nlmsg, req_size, + SEG6_LOCAL_VRFTABLE, + ctx->table)) + return false; + break; + default: + zlog_err("%s: unsupport seg6local behaviour action=%u", + __func__, + nexthop->nh_srv6->seg6local_action); + return false; + } + nl_attr_nest_end(nlmsg, nest); + } + + if (!sid_zero(&nexthop->nh_srv6->seg6_segs)) { + char tun_buf[4096]; + ssize_t tun_len; + struct rtattr *nest; + + if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE, + LWTUNNEL_ENCAP_SEG6)) + return false; + nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP); + if (!nest) + return false; + tun_len = fill_seg6ipt_encap(tun_buf, sizeof(tun_buf), + &nexthop->nh_srv6->seg6_segs); + if (tun_len < 0) + return false; + if (!nl_attr_put(nlmsg, req_size, SEG6_IPTUNNEL_SRH, + tun_buf, tun_len)) + return false; + nl_attr_nest_end(nlmsg, nest); + } + } + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + rtmsg->rtm_flags |= RTNH_F_ONLINK; + + if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) { + rtmsg->rtm_flags |= RTNH_F_ONLINK; + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4)) + return false; + if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex)) + return false; + + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET, nlmsg, req_size, bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, ipv4_ll_buf, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + return true; + } + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + /* Send deletes to the kernel without specifying the next-hop */ + if (cmd != RTM_DELROUTE) { + if (!_netlink_route_add_gateway_info( + rtmsg->rtm_family, AF_INET, nlmsg, req_size, + bytelen, nexthop)) + return false; + } + + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET, nlmsg, req_size, bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr, + sizeof(addrstr)); + zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, addrstr, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + } + } + + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, + AF_INET6, nlmsg, req_size, + bytelen, nexthop)) + return false; + + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET6, nlmsg, req_size, + bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr, + sizeof(addrstr)); + zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, addrstr, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + } + } + + /* + * We have the ifindex so we should always send it + * This is especially useful if we are doing route + * leaking. + */ + if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) { + if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex)) + return false; + } + + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) { + if (cmd == RTM_NEWROUTE) { + if (!_netlink_route_encode_nexthop_src( + nexthop, AF_INET, nlmsg, req_size, bytelen)) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)", + __func__, routedesc, p, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + + return true; +} + +/* This function appends tag value as rtnl flow attribute + * to the given netlink msg only if value is less than 256. + * Used only if SUPPORT_REALMS enabled. + * + * @param nlmsg: nlmsghdr structure to fill in. + * @param maxlen: The size allocated for the message. + * @param tag: The route tag. + * + * The function returns true if the flow attribute could + * be added to the message, otherwise false is returned. + */ +static inline bool _netlink_set_tag(struct nlmsghdr *n, unsigned int maxlen, + route_tag_t tag) +{ + if (tag > 0 && tag <= 255) { + if (!nl_attr_put32(n, maxlen, RTA_FLOW, tag)) + return false; + } + return true; +} + +/* This function takes a nexthop as argument and + * appends to the given netlink msg. If the nexthop + * defines a preferred source, the src parameter + * will be modified to point to that src, otherwise + * it will be kept unmodified. + * + * @param routedesc: Human readable description of route type + * (direct/recursive, single-/multipath) + * @param bytelen: Length of addresses in bytes. + * @param nexthop: Nexthop information + * @param nlmsg: nlmsghdr structure to fill in. + * @param req_size: The size allocated for the message. + * @param src: pointer pointing to a location where + * the prefsrc should be stored. + * + * The function returns true if the nexthop could be added + * to the message, otherwise false is returned. + */ +static bool _netlink_route_build_multipath( + const struct prefix *p, const char *routedesc, int bytelen, + const struct nexthop *nexthop, struct nlmsghdr *nlmsg, size_t req_size, + struct rtmsg *rtmsg, const union g_addr **src, route_tag_t tag) +{ + char label_buf[256]; + struct vrf *vrf; + struct rtnexthop *rtnh; + + rtnh = nl_attr_rtnh(nlmsg, req_size); + if (rtnh == NULL) + return false; + + assert(nexthop); + + vrf = vrf_lookup_by_id(nexthop->vrf_id); + + if (!_netlink_route_encode_label_info(nexthop->nh_label, nlmsg, + req_size, rtmsg, label_buf, + sizeof(label_buf))) + return false; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + rtnh->rtnh_flags |= RTNH_F_ONLINK; + + if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) { + rtnh->rtnh_flags |= RTNH_F_ONLINK; + if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4)) + return false; + rtnh->rtnh_ifindex = nexthop->ifindex; + if (nexthop->weight) + rtnh->rtnh_hops = nexthop->weight - 1; + + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->rmap_src; + else if (nexthop->src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, ipv4_ll_buf, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + nl_attr_rtnh_end(nlmsg, rtnh); + return true; + } + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET, + nlmsg, req_size, bytelen, + nexthop)) + return false; + + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->rmap_src; + else if (nexthop->src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)", + __func__, routedesc, p, &nexthop->gate.ipv4, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, + AF_INET6, nlmsg, req_size, + bytelen, nexthop)) + return false; + + if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) + *src = &nexthop->rmap_src; + else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)", + __func__, routedesc, p, &nexthop->gate.ipv6, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + + /* + * We have figured out the ifindex so we should always send it + * This is especially useful if we are doing route + * leaking. + */ + if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) + rtnh->rtnh_ifindex = nexthop->ifindex; + + /* ifindex */ + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) { + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->rmap_src; + else if (nexthop->src.ipv4.s_addr != INADDR_ANY) + *src = &nexthop->src; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)", + __func__, routedesc, p, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); + } + + if (nexthop->weight) + rtnh->rtnh_hops = nexthop->weight - 1; + + if (!_netlink_set_tag(nlmsg, req_size, tag)) + return false; + + nl_attr_rtnh_end(nlmsg, rtnh); + return true; +} + +static inline bool +_netlink_mpls_build_singlepath(const struct prefix *p, const char *routedesc, + const struct zebra_nhlfe *nhlfe, + struct nlmsghdr *nlmsg, struct rtmsg *rtmsg, + size_t req_size, int cmd) +{ + int bytelen; + uint8_t family; + + family = NHLFE_FAMILY(nhlfe); + bytelen = (family == AF_INET ? 4 : 16); + return _netlink_route_build_singlepath(p, routedesc, bytelen, + nhlfe->nexthop, nlmsg, rtmsg, + req_size, cmd); +} + + +static inline bool +_netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc, + const struct zebra_nhlfe *nhlfe, + struct nlmsghdr *nlmsg, size_t req_size, + struct rtmsg *rtmsg, const union g_addr **src) +{ + int bytelen; + uint8_t family; + + family = NHLFE_FAMILY(nhlfe); + bytelen = (family == AF_INET ? 4 : 16); + return _netlink_route_build_multipath(p, routedesc, bytelen, + nhlfe->nexthop, nlmsg, req_size, + rtmsg, src, 0); +} + +static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc) +{ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20", + routedesc, nl_msg_type_to_str(cmd), label); +} + +static int netlink_neigh_update(int cmd, int ifindex, void *addr, char *lla, + int llalen, ns_id_t ns_id, uint8_t family, + bool permanent, uint8_t protocol) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + + struct zebra_ns *zns = zebra_ns_lookup(ns_id); + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH + req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.ndm.ndm_family = family; + req.ndm.ndm_ifindex = ifindex; + req.ndm.ndm_type = RTN_UNICAST; + if (cmd == RTM_NEWNEIGH) { + if (!permanent) + req.ndm.ndm_state = NUD_REACHABLE; + else + req.ndm.ndm_state = NUD_PERMANENT; + } else + req.ndm.ndm_state = NUD_FAILED; + + nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol, + sizeof(protocol)); + req.ndm.ndm_type = RTN_UNICAST; + nl_attr_put(&req.n, sizeof(req), NDA_DST, addr, + family2addrsize(family)); + if (lla) + nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen); + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static bool nexthop_set_src(const struct nexthop *nexthop, int family, + union g_addr *src) +{ + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) { + src->ipv4 = nexthop->rmap_src.ipv4; + return true; + } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) { + src->ipv4 = nexthop->src.ipv4; + return true; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) { + src->ipv6 = nexthop->rmap_src.ipv6; + return true; + } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) { + src->ipv6 = nexthop->src.ipv6; + return true; + } + } + + return false; +} + +/* + * The function returns true if the attribute could be added + * to the message, otherwise false is returned. + */ +static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen, + struct nexthop *nh) +{ + struct rtattr *nest; + + switch (nh->nh_encap_type) { + case NET_VXLAN: + if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type)) + return false; + + nest = nl_attr_nest(n, nlen, RTA_ENCAP); + if (!nest) + return false; + + if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */, + nh->nh_encap.vni)) + return false; + nl_attr_nest_end(n, nest); + break; + } + + return true; +} + +/* + * Routing table change via netlink interface, using a dataplane context object + * + * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer + * otherwise the number of bytes written to buf. + */ +ssize_t netlink_route_multipath_msg_encode(int cmd, + struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen, + bool fpm, bool force_nhg) +{ + int bytelen; + struct nexthop *nexthop = NULL; + unsigned int nexthop_num; + const char *routedesc; + bool setsrc = false; + union g_addr src; + const struct prefix *p, *src_p; + uint32_t table_id; + struct nlsock *nl; + route_tag_t tag = 0; + + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[]; + } *req = (void *)data; + + p = dplane_ctx_get_dest(ctx); + src_p = dplane_ctx_get_src(ctx); + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + bytelen = (p->family == AF_INET ? 4 : 16); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if ((cmd == RTM_NEWROUTE) && + ((p->family == AF_INET) || v6_rr_semantics)) + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->r.rtm_family = p->family; + req->r.rtm_dst_len = p->prefixlen; + req->r.rtm_src_len = src_p ? src_p->prefixlen : 0; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; + + if (cmd == RTM_DELROUTE) + req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx)); + else + req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx)); + + /* + * blackhole routes are not RTN_UNICAST, they are + * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT + * so setting this value as a RTN_UNICAST would + * cause the route lookup of just the prefix + * to fail. So no need to specify this for + * the RTM_DELROUTE case + */ + if (cmd != RTM_DELROUTE) + req->r.rtm_type = RTN_UNICAST; + + if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen)) + return 0; + if (src_p) { + if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix, + bytelen)) + return 0; + } + + /* Metric. */ + /* Hardcode the metric for all routes coming from zebra. Metric isn't + * used + * either by the kernel or by zebra. Its purely for calculating best + * path(s) + * by the routing protocol and for communicating with protocol peers. + */ + if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY, + NL_DEFAULT_ROUTE_METRIC)) + return 0; + +#if defined(SUPPORT_REALMS) + if (cmd == RTM_DELROUTE) + tag = dplane_ctx_get_old_tag(ctx); + else + tag = dplane_ctx_get_tag(ctx); +#endif + + /* Table corresponding to this route. */ + table_id = dplane_ctx_get_table(ctx); + if (table_id < 256) + req->r.rtm_table = table_id; + else { + req->r.rtm_table = RT_TABLE_UNSPEC; + if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id)) + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: %s %pFX vrf %u(%u)", __func__, + nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx), + table_id); + + /* + * If we are not updating the route and we have received + * a route delete, then all we need to fill in is the + * prefix information to tell the kernel to schwack + * it. + */ + if (cmd == RTM_DELROUTE) { + if (!_netlink_set_tag(&req->n, datalen, tag)) + return 0; + return NLMSG_ALIGN(req->n.nlmsg_len); + } + + if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) { + struct rtattr *nest; + uint32_t mtu = dplane_ctx_get_mtu(ctx); + uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx); + + if (!mtu || (nexthop_mtu && nexthop_mtu < mtu)) + mtu = nexthop_mtu; + + nest = nl_attr_nest(&req->n, datalen, RTA_METRICS); + if (nest == NULL) + return 0; + + if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu))) + return 0; + nl_attr_nest_end(&req->n, nest); + } + + /* + * Always install blackhole routes without using nexthops, because of + * the following kernel problems: + * 1. Kernel nexthops don't suport unreachable/prohibit route types. + * 2. Blackhole kernel nexthops are deleted when loopback is down. + */ + nexthop = dplane_ctx_get_ng(ctx)->nexthop; + if (nexthop) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + nexthop = nexthop->resolved; + + if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { + switch (nexthop->bh_type) { + case BLACKHOLE_ADMINPROHIB: + req->r.rtm_type = RTN_PROHIBIT; + break; + case BLACKHOLE_REJECT: + req->r.rtm_type = RTN_UNREACHABLE; + break; + default: + req->r.rtm_type = RTN_BLACKHOLE; + break; + } + return NLMSG_ALIGN(req->n.nlmsg_len); + } + } + + if ((!fpm && kernel_nexthops_supported() + && (!proto_nexthops_only() + || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0))) + || (fpm && force_nhg)) { + /* Kernel supports nexthop objects */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %pFX nhg_id is %u", __func__, p, + dplane_ctx_get_nhe_id(ctx)); + + if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID, + dplane_ctx_get_nhe_id(ctx))) + return 0; + + /* Have to determine src still */ + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (setsrc) + break; + + setsrc = nexthop_set_src(nexthop, p->family, &src); + } + + if (setsrc) { + if (p->family == AF_INET) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv4, bytelen)) + return 0; + } else if (p->family == AF_INET6) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv6, bytelen)) + return 0; + } + } + + return NLMSG_ALIGN(req->n.nlmsg_len); + } + + /* Count overall nexthops so we can decide whether to use singlepath + * or multipath case. + */ + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + if (!NEXTHOP_IS_ACTIVE(nexthop->flags)) + continue; + + nexthop_num++; + } + + /* Singlepath case. */ + if (nexthop_num == 1) { + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + + if (setsrc) + continue; + + setsrc = nexthop_set_src(nexthop, p->family, + &src); + continue; + } + + if (NEXTHOP_IS_ACTIVE(nexthop->flags)) { + routedesc = nexthop->rparent + ? "recursive, single-path" + : "single-path"; + + if (!_netlink_set_tag(&req->n, datalen, tag)) + return 0; + + if (!_netlink_route_build_singlepath( + p, routedesc, bytelen, nexthop, + &req->n, &req->r, datalen, cmd)) + return 0; + nexthop_num++; + break; + } + + /* + * Add encapsulation information when installing via + * FPM. + */ + if (fpm) { + if (!netlink_route_nexthop_encap( + &req->n, datalen, nexthop)) + return 0; + } + } + + if (setsrc) { + if (p->family == AF_INET) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv4, bytelen)) + return 0; + } else if (p->family == AF_INET6) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv6, bytelen)) + return 0; + } + } + } else { /* Multipath case */ + struct rtattr *nest; + const union g_addr *src1 = NULL; + + nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH); + if (nest == NULL) + return 0; + + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + /* This only works for IPv4 now */ + if (setsrc) + continue; + + setsrc = nexthop_set_src(nexthop, p->family, + &src); + continue; + } + + if (NEXTHOP_IS_ACTIVE(nexthop->flags)) { + routedesc = nexthop->rparent + ? "recursive, multipath" + : "multipath"; + nexthop_num++; + + if (!_netlink_route_build_multipath( + p, routedesc, bytelen, nexthop, + &req->n, datalen, &req->r, &src1, + tag)) + return 0; + + if (!setsrc && src1) { + if (p->family == AF_INET) + src.ipv4 = src1->ipv4; + else if (p->family == AF_INET6) + src.ipv6 = src1->ipv6; + + setsrc = 1; + } + } + } + + nl_attr_nest_end(&req->n, nest); + + /* + * Add encapsulation information when installing via + * FPM. + */ + if (fpm) { + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), + nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) + continue; + if (!netlink_route_nexthop_encap( + &req->n, datalen, nexthop)) + return 0; + } + } + + + if (setsrc) { + if (p->family == AF_INET) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv4, bytelen)) + return 0; + } else if (p->family == AF_INET6) { + if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC, + &src.ipv6, bytelen)) + return 0; + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Setting source"); + } + } + + /* If there is no useful nexthop then return. */ + if (nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: No useful nexthop.", __func__); + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) +{ + uint32_t actual_table; + int suc = 0; + struct mcast_route_data *mr = (struct mcast_route_data *)in; + struct { + struct nlmsghdr n; + struct rtmsg rtm; + char buf[256]; + } req; + + mroute = mr; + struct zebra_ns *zns; + + zns = zvrf->zns; + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.n.nlmsg_type = RTM_GETROUTE; + + if (mroute->family == AF_INET) { + req.rtm.rtm_family = RTNL_FAMILY_IPMR; + req.rtm.rtm_dst_len = IPV4_MAX_BITLEN; + req.rtm.rtm_src_len = IPV4_MAX_BITLEN; + + nl_attr_put(&req.n, sizeof(req), RTA_SRC, + &mroute->src.ipaddr_v4, + sizeof(mroute->src.ipaddr_v4)); + nl_attr_put(&req.n, sizeof(req), RTA_DST, + &mroute->grp.ipaddr_v4, + sizeof(mroute->grp.ipaddr_v4)); + } else { + req.rtm.rtm_family = RTNL_FAMILY_IP6MR; + req.rtm.rtm_dst_len = IPV6_MAX_BITLEN; + req.rtm.rtm_src_len = IPV6_MAX_BITLEN; + + nl_attr_put(&req.n, sizeof(req), RTA_SRC, + &mroute->src.ipaddr_v6, + sizeof(mroute->src.ipaddr_v6)); + nl_attr_put(&req.n, sizeof(req), RTA_DST, + &mroute->grp.ipaddr_v6, + sizeof(mroute->grp.ipaddr_v6)); + } + + /* + * What? + * + * So during the namespace cleanup we started storing + * the zvrf table_id for the default table as RT_TABLE_MAIN + * which is what the normal routing table for ip routing is. + * This change caused this to break our lookups of sg data + * because prior to this change the zvrf->table_id was 0 + * and when the pim multicast kernel code saw a 0, + * it was auto-translated to RT_TABLE_DEFAULT. But since + * we are now passing in RT_TABLE_MAIN there is no auto-translation + * and the kernel goes screw you and the delicious cookies you + * are trying to give me. So now we have this little hack. + */ + if (mroute->family == AF_INET) + actual_table = (zvrf->table_id == RT_TABLE_MAIN) + ? RT_TABLE_DEFAULT + : zvrf->table_id; + else + actual_table = zvrf->table_id; + + nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table); + + suc = netlink_talk(netlink_route_change_read_multicast, &req.n, + &zns->netlink_cmd, zns, false); + + mroute = NULL; + return suc; +} + +/* Char length to debug ID with */ +#define ID_LENGTH 10 + +static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size, + uint32_t id, + const struct nh_grp *z_grp, + const uint8_t count) +{ + struct nexthop_grp grp[count]; + /* Need space for max group size, "/", and null term */ + char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1]; + char buf1[ID_LENGTH + 2]; + + buf[0] = '\0'; + + memset(grp, 0, sizeof(grp)); + + if (count) { + for (int i = 0; i < count; i++) { + grp[i].id = z_grp[i].id; + grp[i].weight = z_grp[i].weight - 1; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (i == 0) + snprintf(buf, sizeof(buf1), "group %u", + grp[i].id); + else { + snprintf(buf1, sizeof(buf1), "/%u", + grp[i].id); + strlcat(buf, buf1, sizeof(buf)); + } + } + } + if (!nl_attr_put(n, req_size, NHA_GROUP, grp, + count * sizeof(*grp))) + return false; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ID (%u): %s", __func__, id, buf); + + return true; +} + +/** + * Next hop packet encoding helper function. + * + * \param[in] cmd netlink command. + * \param[in] ctx dataplane context (information snapshot). + * \param[out] buf buffer to hold the packet. + * \param[in] buflen amount of buffer bytes. + * + * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer + * otherwise the number of bytes written to buf. + */ +ssize_t netlink_nexthop_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[]; + } *req = buf; + + mpls_lse_t out_lse[MPLS_MAX_LABELS]; + char label_buf[256]; + int num_labels = 0; + uint32_t id = dplane_ctx_get_nhe_id(ctx); + int type = dplane_ctx_get_nhe_type(ctx); + struct rtattr *nest; + uint16_t encap; + struct nlsock *nl = + kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + if (!id) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed trying to update a nexthop group in the kernel that does not have an ID"); + return -1; + } + + /* + * Nothing to do if the kernel doesn't support nexthop objects or + * we dont want to install this type of NHG + */ + if (!kernel_nexthops_supported()) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring", + __func__, id, zebra_route_string(type)); + return 0; + } + + if (proto_nexthops_only() && !is_proto_nhg(id, type)) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: nhg_id %u (%s): proto-based nexthops only, ignoring", + __func__, id, zebra_route_string(type)); + return 0; + } + + label_buf[0] = '\0'; + + if (buflen < sizeof(*req)) + return 0; + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (cmd == RTM_NEWNEXTHOP) + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->nhm.nh_family = AF_UNSPEC; + /* TODO: Scope? */ + + if (!nl_attr_put32(&req->n, buflen, NHA_ID, id)) + return 0; + + if (cmd == RTM_NEWNEXTHOP) { + /* + * We distinguish between a "group", which is a collection + * of ids, and a singleton nexthop with an id. The + * group is installed as an id that just refers to a list of + * other ids. + */ + if (dplane_ctx_get_nhe_nh_grp_count(ctx)) { + if (!_netlink_nexthop_build_group( + &req->n, buflen, id, + dplane_ctx_get_nhe_nh_grp(ctx), + dplane_ctx_get_nhe_nh_grp_count(ctx))) + return 0; + } else { + const struct nexthop *nh = + dplane_ctx_get_nhe_ng(ctx)->nexthop; + afi_t afi = dplane_ctx_get_nhe_afi(ctx); + + if (afi == AFI_IP) + req->nhm.nh_family = AF_INET; + else if (afi == AFI_IP6) + req->nhm.nh_family = AF_INET6; + + switch (nh->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY, + &nh->gate.ipv4, + IPV4_MAX_BYTELEN)) + return 0; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY, + &nh->gate.ipv6, + IPV6_MAX_BYTELEN)) + return 0; + break; + case NEXTHOP_TYPE_BLACKHOLE: + if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE, + NULL, 0)) + return 0; + /* Blackhole shouldn't have anymore attributes + */ + goto nexthop_done; + case NEXTHOP_TYPE_IFINDEX: + /* Don't need anymore info for this */ + break; + } + + if (!nh->ifindex) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update without an interface"); + return -1; + } + + if (!nl_attr_put32(&req->n, buflen, NHA_OIF, + nh->ifindex)) + return 0; + + if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK)) + req->nhm.nh_flags |= RTNH_F_ONLINK; + + num_labels = + build_label_stack(nh->nh_label, out_lse, + label_buf, sizeof(label_buf)); + + if (num_labels) { + /* Set the BoS bit */ + out_lse[num_labels - 1] |= + htonl(1 << MPLS_LS_S_SHIFT); + + /* + * TODO: MPLS unsupported for now in kernel. + */ + if (req->nhm.nh_family == AF_MPLS) + goto nexthop_done; + + encap = LWTUNNEL_ENCAP_MPLS; + if (!nl_attr_put16(&req->n, buflen, + NHA_ENCAP_TYPE, encap)) + return 0; + nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP); + if (!nest) + return 0; + if (!nl_attr_put( + &req->n, buflen, MPLS_IPTUNNEL_DST, + &out_lse, + num_labels * sizeof(mpls_lse_t))) + return 0; + + nl_attr_nest_end(&req->n, nest); + } + + if (nh->nh_srv6) { + if (nh->nh_srv6->seg6local_action != + ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) { + uint32_t action; + uint16_t encap; + struct rtattr *nest; + const struct seg6local_context *ctx; + + req->nhm.nh_family = AF_INET6; + action = nh->nh_srv6->seg6local_action; + ctx = &nh->nh_srv6->seg6local_ctx; + encap = LWTUNNEL_ENCAP_SEG6_LOCAL; + if (!nl_attr_put(&req->n, buflen, + NHA_ENCAP_TYPE, + &encap, + sizeof(uint16_t))) + return 0; + + nest = nl_attr_nest(&req->n, buflen, + NHA_ENCAP | NLA_F_NESTED); + if (!nest) + return 0; + + switch (action) { + case SEG6_LOCAL_ACTION_END: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_X: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_X)) + return 0; + if (!nl_attr_put( + &req->n, buflen, + SEG6_LOCAL_NH6, &ctx->nh6, + sizeof(struct in6_addr))) + return 0; + break; + case SEG6_LOCAL_ACTION_END_T: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_T)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_TABLE, + ctx->table)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DX4: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DX4)) + return 0; + if (!nl_attr_put( + &req->n, buflen, + SEG6_LOCAL_NH4, &ctx->nh4, + sizeof(struct in_addr))) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DT6: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT6)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_TABLE, + ctx->table)) + return 0; + break; + case SEG6_LOCAL_ACTION_END_DT4: + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_ACTION_END_DT4)) + return 0; + if (!nl_attr_put32( + &req->n, buflen, + SEG6_LOCAL_VRFTABLE, + ctx->table)) + return 0; + break; + default: + zlog_err("%s: unsupport seg6local behaviour action=%u", + __func__, action); + return 0; + } + nl_attr_nest_end(&req->n, nest); + } + + if (!sid_zero(&nh->nh_srv6->seg6_segs)) { + char tun_buf[4096]; + ssize_t tun_len; + struct rtattr *nest; + + if (!nl_attr_put16(&req->n, buflen, + NHA_ENCAP_TYPE, + LWTUNNEL_ENCAP_SEG6)) + return 0; + nest = nl_attr_nest(&req->n, buflen, + NHA_ENCAP | NLA_F_NESTED); + if (!nest) + return 0; + tun_len = fill_seg6ipt_encap(tun_buf, + sizeof(tun_buf), + &nh->nh_srv6->seg6_segs); + if (tun_len < 0) + return 0; + if (!nl_attr_put(&req->n, buflen, + SEG6_IPTUNNEL_SRH, + tun_buf, tun_len)) + return 0; + nl_attr_nest_end(&req->n, nest); + } + } + +nexthop_done: + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ", + __func__, id, nh, nh->ifindex, + vrf_id_to_name(nh->vrf_id), + nh->vrf_id, label_buf); + } + + req->nhm.nh_protocol = zebra2proto(type); + + } else if (cmd != RTM_DELNEXTHOP) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Nexthop group kernel update command (%d) does not exist", + cmd); + return -1; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd), + id); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + enum dplane_op_e op; + int cmd = 0; + + op = dplane_ctx_get_op(ctx); + if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE) + cmd = RTM_NEWNEXTHOP; + else if (op == DPLANE_OP_NH_DELETE) + cmd = RTM_DELNEXTHOP; + else { + flog_err(EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update with incorrect OP code (%u)", + op); + return -1; + } + + return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen); +} + +enum netlink_msg_status +netlink_put_nexthop_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + /* Nothing to do if the kernel doesn't support nexthop objects */ + if (!kernel_nexthops_supported()) + return FRR_NETLINK_SUCCESS; + + return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder, + false); +} + +static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf, + buflen, false, false); +} + +static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf, + buflen, false, false); +} + +enum netlink_msg_status +netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + int cmd; + const struct prefix *p = dplane_ctx_get_dest(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { + cmd = RTM_DELROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) { + cmd = RTM_NEWROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + + if (p->family == AF_INET || v6_rr_semantics) { + /* Single 'replace' operation */ + + /* + * With route replace semantics in place + * for v4 routes and the new route is a system + * route we do not install anything. + * The problem here is that the new system + * route should cause us to withdraw from + * the kernel the old non-system route + */ + if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) + && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) + return netlink_batch_add_msg( + bth, ctx, netlink_delroute_msg_encoder, + true); + } else { + /* + * So v6 route replace semantics are not in + * the kernel at this point as I understand it. + * so let's do a delete then an add. + * In the future once v6 route replace semantics + * are in we can figure out what to do here to + * allow working with old and new kernels. + * + * I'm also intentionally ignoring the failure case + * of the route delete. If that happens yeah we're + * screwed. + */ + if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) + netlink_batch_add_msg( + bth, ctx, netlink_delroute_msg_encoder, + true); + } + + cmd = RTM_NEWROUTE; + } else + return FRR_NETLINK_ERROR; + + if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))) + return FRR_NETLINK_SUCCESS; + + return netlink_batch_add_msg(bth, ctx, + cmd == RTM_NEWROUTE + ? netlink_newroute_msg_encoder + : netlink_delroute_msg_encoder, + false); +} + +/** + * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop + * + * @tb: Netlink RTA data + * @family: Address family in the nhmsg + * @ifp: Interface connected - this should be NULL, we fill it in + * @ns_id: Namspace id + * + * Return: New nexthop + */ +static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb, + unsigned char family, + struct interface **ifp, + ns_id_t ns_id) +{ + struct nexthop nh = {}; + void *gate = NULL; + enum nexthop_types_t type = 0; + int if_index = 0; + size_t sz = 0; + struct interface *ifp_lookup; + + if_index = *(int *)RTA_DATA(tb[NHA_OIF]); + + + if (tb[NHA_GATEWAY]) { + switch (family) { + case AF_INET: + type = NEXTHOP_TYPE_IPV4_IFINDEX; + sz = 4; + break; + case AF_INET6: + type = NEXTHOP_TYPE_IPV6_IFINDEX; + sz = 16; + break; + default: + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop gateway with bad address family (%d) received from kernel", + family); + return nh; + } + gate = RTA_DATA(tb[NHA_GATEWAY]); + } else + type = NEXTHOP_TYPE_IFINDEX; + + if (type) + nh.type = type; + + if (gate) + memcpy(&(nh.gate), gate, sz); + + if (if_index) + nh.ifindex = if_index; + + ifp_lookup = + if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex); + + if (ifp) + *ifp = ifp_lookup; + if (ifp_lookup) + nh.vrf_id = ifp_lookup->vrf->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT", + __func__, nh.ifindex); + + nh.vrf_id = VRF_DEFAULT; + } + + if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) { + uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]); + int num_labels = 0; + + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + + if (encap_type == LWTUNNEL_ENCAP_MPLS) + num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, + labels); + } + + return nh; +} + +static int netlink_nexthop_process_group(struct rtattr **tb, + struct nh_grp *z_grp, int z_grp_size) +{ + uint8_t count = 0; + /* linux/nexthop.h group struct */ + struct nexthop_grp *n_grp = NULL; + + n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]); + count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp)); + + if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) { + flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid nexthop group received from the kernel"); + return count; + } + + for (int i = 0; ((i < count) && (i < z_grp_size)); i++) { + z_grp[i].id = n_grp[i].id; + z_grp[i].weight = n_grp[i].weight + 1; + } + return count; +} + +/** + * netlink_nexthop_change() - Read in change about nexthops from the kernel + * + * @h: Netlink message header + * @ns_id: Namspace id + * @startup: Are we reading under startup conditions? + * + * Return: Result status + */ +int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + /* nexthop group id */ + uint32_t id; + unsigned char family; + int type; + afi_t afi = AFI_UNSPEC; + vrf_id_t vrf_id = VRF_DEFAULT; + struct interface *ifp = NULL; + struct nhmsg *nhm = NULL; + struct nexthop nh = {}; + struct nh_grp grp[MULTIPATH_NUM] = {}; + /* Count of nexthops in group array */ + uint8_t grp_count = 0; + struct rtattr *tb[NHA_MAX + 1] = {}; + + frrtrace(3, frr_zebra, netlink_nexthop_change, h, ns_id, startup); + + nhm = NLMSG_DATA(h); + + if (ns_id) + vrf_id = ns_id; + + if (startup && h->nlmsg_type != RTM_NEWNEXTHOP) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg)); + if (len < 0) { + zlog_warn( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct nhmsg))); + return -1; + } + + netlink_parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len, + NLA_F_NESTED); + + + if (!tb[NHA_ID]) { + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop group without an ID received from the kernel"); + return -1; + } + + /* We use the ID key'd nhg table for kernel updates */ + id = *((uint32_t *)RTA_DATA(tb[NHA_ID])); + + if (zebra_evpn_mh_is_fdb_nh(id)) { + /* If this is a L2 NH just ignore it */ + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x", + h->nlmsg_type, id); + } + return 0; + } + + family = nhm->nh_family; + afi = family2afi(family); + + type = proto2zebra(nhm->nh_protocol, 0, true); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s ID (%u) %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), id, + nl_family_to_str(family), ns_id); + + + if (h->nlmsg_type == RTM_NEWNEXTHOP) { + if (tb[NHA_GROUP]) { + /** + * If this is a group message its only going to have + * an array of nexthop IDs associated with it + */ + grp_count = netlink_nexthop_process_group( + tb, grp, array_size(grp)); + } else { + if (tb[NHA_BLACKHOLE]) { + /** + * This nexthop is just for blackhole-ing + * traffic, it should not have an OIF, GATEWAY, + * or ENCAP + */ + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_UNSPEC; + } else if (tb[NHA_OIF]) + /** + * This is a true new nexthop, so we need + * to parse the gateway and device info + */ + nh = netlink_nexthop_process_nh(tb, family, + &ifp, ns_id); + else { + + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid Nexthop message received from the kernel with ID (%u)", + id); + return -1; + } + SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE); + if (nhm->nh_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + vrf_id = nh.vrf_id; + } + + if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi, + type, startup)) + return -1; + + } else if (h->nlmsg_type == RTM_DELNEXTHOP) + zebra_nhg_kernel_del(id, vrf_id); + + return 0; +} + +/** + * netlink_request_nexthop() - Request nextop information from the kernel + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* route type + * + * Return: Result status + */ +static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.nhm.nh_family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + + +/** + * netlink_nexthop_read() - Nexthop read function using netlink interface + * + * @zns: Zebra name space + * + * Return: Result status + * Only called at bootstrap time. + */ +int netlink_nexthop_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get nexthop objects */ + ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd, + &dp_info, 0, true); + + if (!ret) + /* If we succesfully read in nexthop objects, + * this kernel must support them. + */ + supports_nh = true; + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug("Nexthop objects %ssupported on this kernel", + supports_nh ? "" : "not "); + + zebra_router_set_supports_nhgs(supports_nh); + + return ret; +} + + +int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen, + ns_id_t ns_id, uint8_t family, bool permanent) +{ + return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex, + addr, lla, llalen, ns_id, family, permanent, + RTPROT_ZEBRA); +} + +/** + * netlink_neigh_update_msg_encode() - Common helper api for encoding + * evpn neighbor update as netlink messages using dataplane context object. + * Here, a neighbor refers to a bridge forwarding database entry for + * either unicast forwarding or head-end replication or an IP neighbor + * entry. + * @ctx: Dataplane context + * @cmd: Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH) + * @lla: A pointer to neighbor cache link layer address + * @llalen: Length of the pointer to neighbor cache link layer + * address + * @ip: A neighbor cache n/w layer destination address + * In the case of bridge FDB, this represnts the remote + * VTEP IP. + * @replace_obj: Whether NEW request should replace existing object or + * add to the end of the list + * @family: AF_* netlink family + * @type: RTN_* route type + * @flags: NTF_* flags + * @state: NUD_* states + * @data: data buffer pointer + * @datalen: total amount of data buffer space + * @protocol: protocol information + * + * Return: 0 when the msg doesn't fit entirely in the buffer + * otherwise the number of bytes written to buf. + */ +static ssize_t netlink_neigh_update_msg_encode( + const struct zebra_dplane_ctx *ctx, int cmd, const void *lla, + int llalen, const struct ipaddr *ip, bool replace_obj, uint8_t family, + uint8_t type, uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy, + uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data, + size_t datalen, uint8_t protocol) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[]; + } *req = data; + int ipa_len; + enum dplane_op_e op; + + if (datalen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + + op = dplane_ctx_get_op(ctx); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + if (cmd == RTM_NEWNEIGH) + req->n.nlmsg_flags |= + NLM_F_CREATE + | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND); + req->n.nlmsg_type = cmd; + req->ndm.ndm_family = family; + req->ndm.ndm_type = type; + req->ndm.ndm_state = state; + req->ndm.ndm_flags = flags; + req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx); + + if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol, + sizeof(protocol))) + return 0; + + if (lla) { + if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, lla, llalen)) + return 0; + } + + if (nfy) { + struct rtattr *nest; + + nest = nl_attr_nest(&req->n, datalen, + NDA_FDB_EXT_ATTRS | NLA_F_NESTED); + if (!nest) + return 0; + + if (!nl_attr_put(&req->n, datalen, NFEA_ACTIVITY_NOTIFY, + &nfy_flags, sizeof(nfy_flags))) + return 0; + if (!nl_attr_put(&req->n, datalen, NFEA_DONT_REFRESH, NULL, 0)) + return 0; + + nl_attr_nest_end(&req->n, nest); + } + + + if (ext) { + if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags, + sizeof(ext_flags))) + return 0; + } + + if (nhg_id) { + if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id)) + return 0; + } else { + ipa_len = + IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN; + if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr, + ipa_len)) + return 0; + } + + if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) { + vlanid_t vid = dplane_ctx_mac_get_vlan(ctx); + + if (vid > 0) { + if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid)) + return 0; + } + + if (!nl_attr_put32(&req->n, datalen, NDA_MASTER, + dplane_ctx_mac_get_br_ifindex(ctx))) + return 0; + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Add remote VTEP to the flood list for this VxLAN interface (VNI). This + * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00. + */ +static ssize_t +netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd, + void *buf, size_t buflen) +{ + struct ethaddr dst_mac = {.octet = {0}}; + int proto = RTPROT_ZEBRA; + + if (dplane_ctx_get_type(ctx) != 0) + proto = zebra2proto(dplane_ctx_get_type(ctx)); + + return netlink_neigh_update_msg_encode( + ctx, cmd, (const void *)&dst_mac, ETH_ALEN, + dplane_ctx_neigh_get_ipaddr(ctx), false, PF_BRIDGE, 0, NTF_SELF, + (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/, false /*nfy*/, + 0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/, buf, buflen, + proto); +} + +#ifndef NDA_RTA +#define NDA_RTA(r) \ + ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#endif + +static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) +{ + struct ndmsg *ndm; + struct interface *ifp; + struct zebra_if *zif; + struct rtattr *tb[NDA_MAX + 1]; + struct interface *br_if; + struct ethaddr mac; + vlanid_t vid = 0; + struct in_addr vtep_ip; + int vid_present = 0, dst_present = 0; + char vid_buf[20]; + char dst_buf[30]; + bool sticky; + bool local_inactive = false; + bool dp_static = false; + uint32_t nhg_id = 0; + + ndm = NLMSG_DATA(h); + + /* We only process macfdb notifications if EVPN is enabled */ + if (!is_evpn_enabled()) + return 0; + + /* Parse attributes and extract fields of interest. Do basic + * validation of the fields. + */ + netlink_parse_rtattr_flags(tb, NDA_MAX, NDA_RTA(ndm), len, + NLA_F_NESTED); + + if (!tb[NDA_LLADDR]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s AF_BRIDGE IF %u - no LLADDR", + nl_msg_type_to_str(h->nlmsg_type), + ndm->ndm_ifindex); + return 0; + } + + if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu", + nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex, + (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR])); + return 0; + } + + memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN); + + if (tb[NDA_VLAN]) { + vid_present = 1; + vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]); + snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid); + } + + if (tb[NDA_DST]) { + /* TODO: Only IPv4 supported now. */ + dst_present = 1; + memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]), + IPV4_MAX_BYTELEN); + snprintfrr(dst_buf, sizeof(dst_buf), " dst %pI4", + &vtep_ip); + } + + if (tb[NDA_NH_ID]) + nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]); + + if (ndm->ndm_state & NUD_STALE) + local_inactive = true; + + if (tb[NDA_FDB_EXT_ATTRS]) { + struct rtattr *attr = tb[NDA_FDB_EXT_ATTRS]; + struct rtattr *nfea_tb[NFEA_MAX + 1] = {0}; + + netlink_parse_rtattr_nested(nfea_tb, NFEA_MAX, attr); + if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) { + uint8_t nfy_flags; + + nfy_flags = *(uint8_t *)RTA_DATA( + nfea_tb[NFEA_ACTIVITY_NOTIFY]); + if (nfy_flags & FDB_NOTIFY_BIT) + dp_static = true; + if (nfy_flags & FDB_NOTIFY_INACTIVE_BIT) + local_inactive = true; + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %pEA%s nhg %d", + nl_msg_type_to_str(h->nlmsg_type), + ndm->ndm_ifindex, vid_present ? vid_buf : "", + ndm->ndm_state, ndm->ndm_flags, &mac, + dst_present ? dst_buf : "", nhg_id); + + /* The interface should exist. */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + ndm->ndm_ifindex); + if (!ifp || !ifp->info) + return 0; + + /* The interface should be something we're interested in. */ + if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) + return 0; + + zif = (struct zebra_if *)ifp->info; + if ((br_if = zif->brslave_info.br_if) == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master", + nl_msg_type_to_str(h->nlmsg_type), ifp->name, + ndm->ndm_ifindex, + zif->brslave_info.bridge_ifindex); + return 0; + } + + sticky = !!(ndm->ndm_flags & NTF_STICKY); + + if (filter_vlan && vid != filter_vlan) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug(" Filtered due to filter vlan: %d", + filter_vlan); + return 0; + } + + /* If add or update, do accordingly if learnt on a "local" interface; if + * the notification is over VxLAN, this has to be related to + * multi-homing, + * so perform an implicit delete of any local entry (if it exists). + */ + if (h->nlmsg_type == RTM_NEWNEIGH) { + /* Drop "permanent" entries. */ + if (ndm->ndm_state & NUD_PERMANENT) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + " Dropping entry because of NUD_PERMANENT"); + return 0; + } + + if (IS_ZEBRA_IF_VXLAN(ifp)) + return zebra_vxlan_dp_network_mac_add( + ifp, br_if, &mac, vid, nhg_id, sticky, + !!(ndm->ndm_flags & NTF_EXT_LEARNED)); + + return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid, + sticky, local_inactive, dp_static); + } + + /* This is a delete notification. + * Ignore the notification with IP dest as it may just signify that the + * MAC has moved from remote to local. The exception is the special + * all-zeros MAC that represents the BUM flooding entry; we may have + * to readd it. Otherwise, + * 1. For a MAC over VxLan, check if it needs to be refreshed(readded) + * 2. For a MAC over "local" interface, delete the mac + * Note: We will get notifications from both bridge driver and VxLAN + * driver. + */ + if (nhg_id) + return 0; + + if (dst_present) { + u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + + if (!memcmp(zero_mac, mac.octet, ETH_ALEN)) + return zebra_vxlan_check_readd_vtep(ifp, vtep_ip); + return 0; + } + + if (IS_ZEBRA_IF_VXLAN(ifp)) + return zebra_vxlan_dp_network_mac_del(ifp, br_if, &mac, vid); + + return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid); +} + +static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ndmsg *ndm; + + if (h->nlmsg_type != RTM_NEWNEIGH) + return 0; + + /* Length validity. */ + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg)); + if (len < 0) + return -1; + + /* We are interested only in AF_BRIDGE notifications. */ + ndm = NLMSG_DATA(h); + if (ndm->ndm_family != AF_BRIDGE) + return 0; + + return netlink_macfdb_change(h, len, ns_id); +} + +/* Request for MAC FDB information from the kernel */ +static int netlink_request_macs(struct nlsock *netlink_cmd, int family, + int type, ifindex_t master_ifindex) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifm; + char buf[256]; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.ifm.ifi_family = family; + if (master_ifindex) + nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex); + + return netlink_request(netlink_cmd, &req); +} + +/* + * MAC forwarding database read using netlink interface. This is invoked + * at startup. + */ +int netlink_macfdb_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get bridge FDB table. */ + ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH, + 0); + if (ret < 0) + return ret; + /* We are reading entire table. */ + filter_vlan = 0; + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 0, true); + + return ret; +} + +/* + * MAC forwarding database read using netlink interface. This is for a + * specific bridge and matching specific access VLAN (if VLAN-aware bridge). + */ +int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if) +{ + struct zebra_if *br_zif; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl; + struct zebra_dplane_info dp_info; + int ret = 0; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Save VLAN we're filtering on, if needed. */ + br_zif = (struct zebra_if *)br_if->info; + zif = (struct zebra_if *)ifp->info; + vxl = &zif->l2info.vxl; + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) + filter_vlan = vxl->access_vlan; + + /* Get bridge FDB table for specific bridge - we do the VLAN filtering. + */ + ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH, + br_if->ifindex); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 0, false); + + /* Reset VLAN filter. */ + filter_vlan = 0; + return ret; +} + + +/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */ +static int netlink_request_specific_mac_in_bridge(struct zebra_ns *zns, + int family, int type, + struct interface *br_if, + const struct ethaddr *mac, + vlanid_t vid) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + struct zebra_if *br_zif; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_type = type; /* RTM_GETNEIGH */ + req.n.nlmsg_flags = NLM_F_REQUEST; + req.ndm.ndm_family = family; /* AF_BRIDGE */ + /* req.ndm.ndm_state = NUD_REACHABLE; */ + + nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6); + + br_zif = (struct zebra_if *)br_if->info; + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) && vid > 0) + nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid); + + nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, br_if->ifindex); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: Tx family %s IF %s(%u) vrf %s(%u) MAC %pEA vid %u", + __func__, nl_family_to_str(req.ndm.ndm_family), + br_if->name, br_if->ifindex, br_if->vrf->name, + br_if->vrf->vrf_id, mac, vid); + + return netlink_request(&zns->netlink_cmd, &req); +} + +int netlink_macfdb_read_specific_mac(struct zebra_ns *zns, + struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid) +{ + int ret = 0; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get bridge FDB table for specific bridge - we do the VLAN filtering. + */ + ret = netlink_request_specific_mac_in_bridge(zns, AF_BRIDGE, + RTM_GETNEIGH, + br_if, mac, vid); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd, + &dp_info, 1, false); + + return ret; +} + +/* + * Netlink-specific handler for MAC updates using dataplane context object. + */ +ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data, + size_t datalen) +{ + struct ipaddr vtep_ip; + vlanid_t vid; + ssize_t total; + int cmd; + uint8_t flags; + uint16_t state; + uint32_t nhg_id; + uint32_t update_flags; + bool nfy = false; + uint8_t nfy_flags = 0; + int proto = RTPROT_ZEBRA; + + if (dplane_ctx_get_type(ctx) != 0) + proto = zebra2proto(dplane_ctx_get_type(ctx)); + + cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL + ? RTM_NEWNEIGH : RTM_DELNEIGH; + + flags = NTF_MASTER; + state = NUD_REACHABLE; + + update_flags = dplane_ctx_mac_get_update_flags(ctx); + if (update_flags & DPLANE_MAC_REMOTE) { + flags |= NTF_SELF; + if (dplane_ctx_mac_is_sticky(ctx)) { + /* NUD_NOARP prevents the entry from expiring */ + state |= NUD_NOARP; + /* sticky the entry from moving */ + flags |= NTF_STICKY; + } else { + flags |= NTF_EXT_LEARNED; + } + /* if it was static-local previously we need to clear the + * notify flags on replace with remote + */ + if (update_flags & DPLANE_MAC_WAS_STATIC) + nfy = true; + } else { + /* local mac */ + if (update_flags & DPLANE_MAC_SET_STATIC) { + nfy_flags |= FDB_NOTIFY_BIT; + state |= NUD_NOARP; + } + + if (update_flags & DPLANE_MAC_SET_INACTIVE) + nfy_flags |= FDB_NOTIFY_INACTIVE_BIT; + + nfy = true; + } + + nhg_id = dplane_ctx_mac_get_nhg_id(ctx); + vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx)); + SET_IPADDR_V4(&vtep_ip); + + if (IS_ZEBRA_DEBUG_KERNEL) { + char vid_buf[20]; + const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx); + + vid = dplane_ctx_mac_get_vlan(ctx); + if (vid > 0) + snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid); + else + vid_buf[0] = '\0'; + + zlog_debug( + "Tx %s family %s IF %s(%u)%s %sMAC %pEA dst %pIA nhg %u%s%s%s%s%s", + nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE), + dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx), + vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "", + mac, &vtep_ip, nhg_id, + (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "", + (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync" + : "", + (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "", + (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive" + : "", + nfy ? " nfy" : ""); + } + + total = netlink_neigh_update_msg_encode( + ctx, cmd, (const void *)dplane_ctx_mac_get_addr(ctx), ETH_ALEN, + &vtep_ip, true, AF_BRIDGE, 0, flags, state, nhg_id, nfy, + nfy_flags, false /*ext*/, 0 /*ext_flags*/, data, datalen, + proto); + + return total; +} + +/* + * In the event the kernel deletes ipv4 link-local neighbor entries created for + * 5549 support, re-install them. + */ +static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif, + struct interface *ifp, struct ipaddr *ip, + bool handle_failed) +{ + if (ndm->ndm_family != AF_INET) + return; + + if (!zif->v6_2_v4_ll_neigh_entry) + return; + + if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr) + return; + + if (handle_failed && ndm->ndm_state & NUD_FAILED) { + zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling", + ifp->name); + return; + } + + if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true); +} + +#define NUD_VALID \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \ + | NUD_DELAY) +#define NUD_LOCAL_ACTIVE \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE) + +static int netlink_nbr_entry_state_to_zclient(int nbr_state) +{ + /* an exact match is done between + * - netlink neighbor state values: NDM_XXX (see in linux/neighbour.h) + * - zclient neighbor state values: ZEBRA_NEIGH_STATE_XXX + * (see in lib/zclient.h) + */ + return nbr_state; +} +static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) +{ + struct ndmsg *ndm; + struct interface *ifp; + struct zebra_if *zif; + struct rtattr *tb[NDA_MAX + 1]; + struct interface *link_if; + struct ethaddr mac; + struct ipaddr ip; + char buf[ETHER_ADDR_STRLEN]; + int mac_present = 0; + bool is_ext; + bool is_router; + bool local_inactive; + uint32_t ext_flags = 0; + bool dp_static = false; + int l2_len = 0; + int cmd; + + ndm = NLMSG_DATA(h); + + /* The interface should exist. */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + ndm->ndm_ifindex); + if (!ifp || !ifp->info) + return 0; + + zif = (struct zebra_if *)ifp->info; + + /* Parse attributes and extract fields of interest. */ + netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len); + + if (!tb[NDA_DST]) { + zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(ndm->ndm_family), ifp->name, + ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id); + return 0; + } + + memset(&ip, 0, sizeof(ip)); + ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6; + memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST])); + + /* if kernel deletes our rfc5549 neighbor entry, re-install it */ + if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) { + netlink_handle_5549(ndm, zif, ifp, &ip, false); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + " Neighbor Entry Received is a 5549 entry, finished"); + return 0; + } + + /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */ + if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID)) + netlink_handle_5549(ndm, zif, ifp, &ip, true); + + /* we send link layer information to client: + * - nlmsg_type = RTM_DELNEIGH|NEWNEIGH|GETNEIGH + * - struct ipaddr ( for DEL and GET) + * - struct ethaddr mac; (for NEW) + */ + if (h->nlmsg_type == RTM_NEWNEIGH) + cmd = ZEBRA_NHRP_NEIGH_ADDED; + else if (h->nlmsg_type == RTM_GETNEIGH) + cmd = ZEBRA_NHRP_NEIGH_GET; + else if (h->nlmsg_type == RTM_DELNEIGH) + cmd = ZEBRA_NHRP_NEIGH_REMOVED; + else { + zlog_debug("%s(): unknown nlmsg type %u", __func__, + h->nlmsg_type); + return 0; + } + if (tb[NDA_LLADDR]) { + /* copy LLADDR information */ + l2_len = RTA_PAYLOAD(tb[NDA_LLADDR]); + } + if (l2_len == IPV4_MAX_BYTELEN || l2_len == 0) { + union sockunion link_layer_ipv4; + + if (l2_len) { + sockunion_family(&link_layer_ipv4) = AF_INET; + memcpy((void *)sockunion_get_addr(&link_layer_ipv4), + RTA_DATA(tb[NDA_LLADDR]), l2_len); + } else + sockunion_family(&link_layer_ipv4) = AF_UNSPEC; + zsend_nhrp_neighbor_notify( + cmd, ifp, &ip, + netlink_nbr_entry_state_to_zclient(ndm->ndm_state), + &link_layer_ipv4); + } + + if (h->nlmsg_type == RTM_GETNEIGH) + return 0; + + /* The neighbor is present on an SVI. From this, we locate the + * underlying + * bridge because we're only interested in neighbors on a VxLAN bridge. + * The bridge is located based on the nature of the SVI: + * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN + * interface + * and is linked to the bridge + * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge + * interface + * itself + */ + if (IS_ZEBRA_IF_VLAN(ifp)) { + link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + zif->link_ifindex); + if (!link_if) + return 0; + } else if (IS_ZEBRA_IF_BRIDGE(ifp)) + link_if = ifp; + else { + link_if = NULL; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + " Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring"); + } + + memset(&mac, 0, sizeof(mac)); + if (h->nlmsg_type == RTM_NEWNEIGH) { + if (tb[NDA_LLADDR]) { + if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu", + nl_msg_type_to_str( + h->nlmsg_type), + nl_family_to_str( + ndm->ndm_family), + ifp->name, ndm->ndm_ifindex, + ifp->vrf->name, + ifp->vrf->vrf_id, + (unsigned long)RTA_PAYLOAD( + tb[NDA_LLADDR])); + return 0; + } + + mac_present = 1; + memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN); + } + + is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED); + is_router = !!(ndm->ndm_flags & NTF_ROUTER); + + if (tb[NDA_EXT_FLAGS]) { + ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]); + if (ext_flags & NTF_E_MH_PEER_SYNC) + dp_static = true; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA MAC %s state 0x%x flags 0x%x ext_flags 0x%x", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(ndm->ndm_family), ifp->name, + ndm->ndm_ifindex, ifp->vrf->name, + ifp->vrf->vrf_id, &ip, + mac_present + ? prefix_mac2str(&mac, buf, sizeof(buf)) + : "", + ndm->ndm_state, ndm->ndm_flags, ext_flags); + + /* If the neighbor state is valid for use, process as an add or + * update + * else process as a delete. Note that the delete handling may + * result + * in re-adding the neighbor if it is a valid "remote" neighbor. + */ + if (ndm->ndm_state & NUD_VALID) { + if (zebra_evpn_mh_do_adv_reachable_neigh_only()) + local_inactive = + !(ndm->ndm_state & NUD_LOCAL_ACTIVE); + else + /* If EVPN-MH is not enabled we treat STALE + * neighbors as locally-active and advertise + * them + */ + local_inactive = false; + + /* Add local neighbors to the l3 interface database */ + if (is_ext) + zebra_neigh_del(ifp, &ip); + else + zebra_neigh_add(ifp, &ip, &mac); + + if (link_if) + zebra_vxlan_handle_kernel_neigh_update( + ifp, link_if, &ip, &mac, ndm->ndm_state, + is_ext, is_router, local_inactive, + dp_static); + return 0; + } + + + zebra_neigh_del(ifp, &ip); + if (link_if) + zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip); + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(ndm->ndm_family), ifp->name, + ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id, + &ip); + + /* Process the delete - it may result in re-adding the neighbor if it is + * a valid "remote" neighbor. + */ + zebra_neigh_del(ifp, &ip); + if (link_if) + zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip); + + return 0; +} + +static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + struct ndmsg *ndm; + + if (h->nlmsg_type != RTM_NEWNEIGH) + return 0; + + /* Length validity. */ + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg)); + if (len < 0) + return -1; + + /* We are interested only in AF_INET or AF_INET6 notifications. */ + ndm = NLMSG_DATA(h); + if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6) + return 0; + + return netlink_neigh_change(h, len); +} + +/* Request for IP neighbor information from the kernel */ +static int netlink_request_neigh(struct nlsock *netlink_cmd, int family, + int type, ifindex_t ifindex) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.ndm.ndm_family = family; + if (ifindex) + nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex); + + return netlink_request(netlink_cmd, &req); +} + +/* + * IP Neighbor table read using netlink interface. This is invoked + * at startup. + */ +int netlink_neigh_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get IP neighbor table. */ + ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH, + 0); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, + &dp_info, 0, true); + + return ret; +} + +/* + * IP Neighbor table read using netlink interface. This is for a specific + * VLAN device. + */ +int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if) +{ + int ret = 0; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH, + vlan_if->ifindex); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, + &dp_info, 0, false); + + return ret; +} + +/* + * Request for a specific IP in VLAN (SVI) device from IP Neighbor table, + * read using netlink interface. + */ +static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns, + int type, + const struct ipaddr *ip, + ifindex_t ifindex) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req; + int ipa_len; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = type; /* RTM_GETNEIGH */ + req.ndm.ndm_ifindex = ifindex; + + if (IS_IPADDR_V4(ip)) { + ipa_len = IPV4_MAX_BYTELEN; + req.ndm.ndm_family = AF_INET; + + } else { + ipa_len = IPV6_MAX_BYTELEN; + req.ndm.ndm_family = AF_INET6; + } + + nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: Tx %s family %s IF %u IP %pIA flags 0x%x", + __func__, nl_msg_type_to_str(type), + nl_family_to_str(req.ndm.ndm_family), ifindex, ip, + req.n.nlmsg_flags); + + return netlink_request(&zns->netlink_cmd, &req); +} + +int netlink_neigh_read_specific_ip(const struct ipaddr *ip, + struct interface *vlan_if) +{ + int ret = 0; + struct zebra_ns *zns; + struct zebra_vrf *zvrf = vlan_if->vrf->info; + struct zebra_dplane_info dp_info; + + zns = zvrf->zns; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: neigh request IF %s(%u) IP %pIA vrf %s(%u)", + __func__, vlan_if->name, vlan_if->ifindex, ip, + vlan_if->vrf->name, vlan_if->vrf->vrf_id); + + ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip, + vlan_if->ifindex); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd, + &dp_info, 1, false); + + return ret; +} + +int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id) +{ + int len; + struct ndmsg *ndm; + + if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH + || h->nlmsg_type == RTM_GETNEIGH)) + return 0; + + /* Length validity. */ + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct ndmsg))); + return -1; + } + + /* Is this a notification for the MAC FDB or IP neighbor table? */ + ndm = NLMSG_DATA(h); + if (ndm->ndm_family == AF_BRIDGE) + return netlink_macfdb_change(h, len, ns_id); + + if (ndm->ndm_type != RTN_UNICAST) + return 0; + + if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6) + return netlink_ipneigh_change(h, len, ns_id); + else { + flog_warn( + EC_ZEBRA_UNKNOWN_FAMILY, + "Invalid address family: %u received from kernel neighbor change: %s", + ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type)); + return 0; + } + + return 0; +} + +/* + * Utility neighbor-update function, using info from dplane context. + */ +static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, + int cmd, void *buf, size_t buflen) +{ + const struct ipaddr *ip; + const struct ethaddr *mac = NULL; + const struct ipaddr *link_ip = NULL; + const void *link_ptr = NULL; + char buf2[ETHER_ADDR_STRLEN]; + + int llalen; + uint8_t flags; + uint16_t state; + uint8_t family; + uint32_t update_flags; + uint32_t ext_flags = 0; + bool ext = false; + int proto = RTPROT_ZEBRA; + + if (dplane_ctx_get_type(ctx) != 0) + proto = zebra2proto(dplane_ctx_get_type(ctx)); + + ip = dplane_ctx_neigh_get_ipaddr(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_INSTALL + || dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_DELETE) { + link_ip = dplane_ctx_neigh_get_link_ip(ctx); + llalen = IPADDRSZ(link_ip); + link_ptr = (const void *)&(link_ip->ip.addr); + ipaddr2str(link_ip, buf2, sizeof(buf2)); + } else { + mac = dplane_ctx_neigh_get_mac(ctx); + llalen = ETH_ALEN; + link_ptr = (const void *)mac; + if (is_zero_mac(mac)) + mac = NULL; + if (mac) + prefix_mac2str(mac, buf2, sizeof(buf2)); + else + snprintf(buf2, sizeof(buf2), "null"); + } + update_flags = dplane_ctx_neigh_get_update_flags(ctx); + flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx)); + state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx)); + + family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6; + + if (update_flags & DPLANE_NEIGH_REMOTE) { + flags |= NTF_EXT_LEARNED; + /* if it was static-local previously we need to clear the + * ext flags on replace with remote + */ + if (update_flags & DPLANE_NEIGH_WAS_STATIC) + ext = true; + } else if (!(update_flags & DPLANE_NEIGH_NO_EXTENSION)) { + ext = true; + /* local neigh */ + if (update_flags & DPLANE_NEIGH_SET_STATIC) + ext_flags |= NTF_E_MH_PEER_SYNC; + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Tx %s family %s IF %s(%u) Neigh %pIA %s %s flags 0x%x state 0x%x %sext_flags 0x%x", + nl_msg_type_to_str(cmd), nl_family_to_str(family), + dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx), + ip, link_ip ? "Link " : "MAC ", buf2, flags, state, + ext ? "ext " : "", ext_flags); + + return netlink_neigh_update_msg_encode( + ctx, cmd, link_ptr, llalen, ip, true, family, RTN_UNICAST, + flags, state, 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext, + ext_flags, buf, buflen, proto); +} + +static int netlink_neigh_table_update_ctx(const struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct { + struct nlmsghdr n; + struct ndtmsg ndtm; + char buf[]; + } *req = data; + struct rtattr *nest; + uint8_t family; + ifindex_t idx; + uint32_t val; + + if (datalen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + family = dplane_ctx_neightable_get_family(ctx); + idx = dplane_ctx_get_ifindex(ctx); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE; + req->n.nlmsg_type = RTM_SETNEIGHTBL; + req->ndtm.ndtm_family = family; + + nl_attr_put(&req->n, datalen, NDTA_NAME, + family == AF_INET ? "arp_cache" : "ndisc_cache", 10); + nest = nl_attr_nest(&req->n, datalen, NDTA_PARMS); + if (nest == NULL) + return 0; + if (!nl_attr_put(&req->n, datalen, NDTPA_IFINDEX, &idx, sizeof(idx))) + return 0; + val = dplane_ctx_neightable_get_app_probes(ctx); + if (!nl_attr_put(&req->n, datalen, NDTPA_APP_PROBES, &val, sizeof(val))) + return 0; + val = dplane_ctx_neightable_get_mcast_probes(ctx); + if (!nl_attr_put(&req->n, datalen, NDTPA_MCAST_PROBES, &val, + sizeof(val))) + return 0; + val = dplane_ctx_neightable_get_ucast_probes(ctx); + if (!nl_attr_put(&req->n, datalen, NDTPA_UCAST_PROBES, &val, + sizeof(val))) + return 0; + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + ssize_t ret; + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen); + break; + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_IP_DELETE: + ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen); + break; + case DPLANE_OP_VTEP_ADD: + ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf, + buflen); + break; + case DPLANE_OP_VTEP_DELETE: + ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf, + buflen); + break; + case DPLANE_OP_NEIGH_TABLE_UPDATE: + ret = netlink_neigh_table_update_ctx(ctx, buf, buflen); + break; + default: + ret = -1; + } + + return ret; +} + +/* + * Update MAC, using dataplane context object. + */ + +enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx, + false); +} + +enum netlink_msg_status +netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder, + false); +} + +/* + * MPLS label forwarding table change via netlink interface, using dataplane + * context information. + */ +ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + mpls_lse_t lse; + const struct nhlfe_list_head *head; + const struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop = NULL; + unsigned int nexthop_num; + const char *routedesc; + int route_type; + struct prefix p = {0}; + struct nlsock *nl = + kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[0]; + } *req = buf; + + if (buflen < sizeof(*req)) + return 0; + + memset(req, 0, sizeof(*req)); + + /* + * Count # nexthops so we can decide whether to use singlepath + * or multipath case. + */ + nexthop_num = 0; + head = dplane_ctx_get_nhlfe_list(ctx); + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + if (cmd == RTM_NEWROUTE) { + /* Count all selected NHLFEs */ + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + nexthop_num++; + } else { /* DEL */ + /* Count all installed NHLFEs */ + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + nexthop_num++; + } + } + + if ((nexthop_num == 0) || + (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE))) + return 0; + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->r.rtm_family = AF_MPLS; + req->r.rtm_table = RT_TABLE_MAIN; + req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; + req->r.rtm_type = RTN_UNICAST; + + if (cmd == RTM_NEWROUTE) { + /* We do a replace to handle update. */ + req->n.nlmsg_flags |= NLM_F_REPLACE; + + /* set the protocol value if installing */ + route_type = re_type_from_lsp_type( + dplane_ctx_get_best_nhlfe(ctx)->type); + req->r.rtm_protocol = zebra2proto(route_type); + } + + /* Fill destination */ + lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1); + if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t))) + return 0; + + /* Fill nexthops (paths) based on single-path or multipath. The paths + * chosen depend on the operation. + */ + if (nexthop_num == 1) { + routedesc = "single-path"; + _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx), + routedesc); + + nexthop_num = 0; + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if ((cmd == RTM_NEWROUTE + && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE))) + || (cmd == RTM_DELROUTE + && (CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB)))) { + /* Add the gateway */ + if (!_netlink_mpls_build_singlepath( + &p, routedesc, nhlfe, &req->n, + &req->r, buflen, cmd)) + return false; + + nexthop_num++; + break; + } + } + } else { /* Multipath case */ + struct rtattr *nest; + const union g_addr *src1 = NULL; + + nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH); + if (!nest) + return 0; + + routedesc = "multipath"; + _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx), + routedesc); + + nexthop_num = 0; + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if ((cmd == RTM_NEWROUTE + && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE))) + || (cmd == RTM_DELROUTE + && (CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB)))) { + nexthop_num++; + + /* Build the multipath */ + if (!_netlink_mpls_build_multipath( + &p, routedesc, nhlfe, &req->n, + buflen, &req->r, &src1)) + return 0; + } + } + + /* Add the multipath */ + nl_attr_nest_end(&req->n, nest); + } + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/**************************************************************************** +* This code was developed in a branch that didn't have dplane APIs for +* MAC updates. Hence the use of the legacy style. It will be moved to +* the new dplane style pre-merge to master. XXX +*/ +static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_NEWNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = zebra_vrf_get_evpn(); + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_INET; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY, + &vtep_ip, IPV4_MAX_BYTELEN)) + return -1; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Tx %s fdb-nh 0x%x %pI4", + nl_msg_type_to_str(cmd), nh_id, &vtep_ip); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static int netlink_fdb_nh_del(uint32_t nh_id) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_DELNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = zebra_vrf_get_evpn(); + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_UNSPEC; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id)) + return -1; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Tx %s fdb-nh 0x%x", + nl_msg_type_to_str(cmd), nh_id); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_NEWNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + struct nexthop_grp grp[nh_cnt]; + uint32_t i; + + zvrf = zebra_vrf_get_evpn(); + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_UNSPEC; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0)) + return -1; + memset(&grp, 0, sizeof(grp)); + for (i = 0; i < nh_cnt; ++i) { + grp[i].id = nh_ids[i].id; + grp[i].weight = nh_ids[i].weight; + } + if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP, + grp, nh_cnt * sizeof(struct nexthop_grp))) + return -1; + + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + char vtep_str[ES_VTEP_LIST_STR_SZ]; + char nh_buf[16]; + + vtep_str[0] = '\0'; + for (i = 0; i < nh_cnt; ++i) { + snprintf(nh_buf, sizeof(nh_buf), "%u ", + grp[i].id); + strlcat(vtep_str, nh_buf, sizeof(vtep_str)); + } + + zlog_debug("Tx %s fdb-nhg 0x%x %s", + nl_msg_type_to_str(cmd), nhg_id, vtep_str); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + false); +} + +static int netlink_fdb_nhg_del(uint32_t nhg_id) +{ + return netlink_fdb_nh_del(nhg_id); +} + +int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip) +{ + return netlink_fdb_nh_update(nh_id, vtep_ip); +} + +int kernel_del_mac_nh(uint32_t nh_id) +{ + return netlink_fdb_nh_del(nh_id); +} + +int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids); +} + +int kernel_del_mac_nhg(uint32_t nhg_id) +{ + return netlink_fdb_nhg_del(nhg_id); +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h new file mode 100644 index 0000000..b1af4b2 --- /dev/null +++ b/zebra/rt_netlink.h @@ -0,0 +1,158 @@ +/* Header file exported by rt_netlink.c to zebra. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_RT_NETLINK_H +#define _ZEBRA_RT_NETLINK_H + +#ifdef HAVE_NETLINK + +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_dplane.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NL_DEFAULT_ROUTE_METRIC 20 + +/* + * Additional protocol strings to push into routes + * If we add anything new here please make sure + * to update: + * zebra2proto Function + * proto2zebra Function + * is_selfroute Function + * tools/frr To flush the route upon exit + * + * Finally update this file to allow iproute2 to + * know about this new route. + * tools/etc/iproute2/rt_protos.d + */ +#define RTPROT_BGP 186 +#define RTPROT_ISIS 187 +#define RTPROT_OSPF 188 +#define RTPROT_RIP 189 +#define RTPROT_RIPNG 190 +#if !defined(RTPROT_BABEL) +#define RTPROT_BABEL 42 +#endif +#define RTPROT_NHRP 191 +#define RTPROT_EIGRP 192 +#define RTPROT_LDP 193 +#define RTPROT_SHARP 194 +#define RTPROT_PBR 195 +#define RTPROT_ZSTATIC 196 +#define RTPROT_OPENFABRIC 197 +#define RTPROT_SRTE 198 + +void rt_netlink_init(void); + +/* MPLS label forwarding table change, using dataplane context information. */ +extern ssize_t netlink_mpls_multipath_msg_encode(int cmd, + struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen); + +extern ssize_t netlink_route_multipath_msg_encode(int cmd, + struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen, + bool fpm, bool force_nhg); +extern ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, + void *data, size_t datalen); + +extern int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); +extern int netlink_route_read(struct zebra_ns *zns); + +extern int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, + int startup); +extern int netlink_nexthop_read(struct zebra_ns *zns); +extern ssize_t netlink_nexthop_msg_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen); + +extern ssize_t netlink_lsp_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen); + +extern int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id); +extern int netlink_macfdb_read(struct zebra_ns *zns); +extern int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, + struct interface *ifp, + struct interface *br_if); +extern int netlink_neigh_read(struct zebra_ns *zns); +extern int netlink_neigh_read_for_vlan(struct zebra_ns *zns, + struct interface *vlan_if); +extern int netlink_macfdb_read_specific_mac(struct zebra_ns *zns, + struct interface *br_if, + const struct ethaddr *mac, + uint16_t vid); +extern int netlink_neigh_read_specific_ip(const struct ipaddr *ip, + struct interface *vlan_if); +extern vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id); + +struct nl_batch; +extern enum netlink_msg_status +netlink_put_route_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_nexthop_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_mac_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_neigh_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_lsp_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); +extern enum netlink_msg_status +netlink_put_pw_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef NETLINK_DEBUG +const char *nlmsg_type2str(uint16_t type); +const char *af_type2str(int type); +const char *ifi_type2str(int type); +const char *rta_type2str(int type); +const char *rtm_type2str(int type); +const char *ifla_pdr_type2str(int type); +const char *ifla_info_type2str(int type); +const char *rtm_protocol2str(int type); +const char *rtm_scope2str(int type); +const char *rtm_rta2str(int type); +const char *neigh_rta2str(int type); +const char *ifa_rta2str(int type); +const char *nhm_rta2str(int type); +const char *frh_rta2str(int type); +const char *frh_action2str(uint8_t action); +const char *nlmsg_flags2str(uint16_t flags, char *buf, size_t buflen); +const char *if_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *rtm_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *neigh_state2str(uint32_t flags, char *buf, size_t buflen); +const char *neigh_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *ifa_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *nh_flags2str(uint32_t flags, char *buf, size_t buflen); + +void nl_dump(void *msg, size_t msglen); +#endif /* NETLINK_DEBUG */ + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_RT_NETLINK_H */ diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c new file mode 100644 index 0000000..1f3f66a --- /dev/null +++ b/zebra/rt_socket.c @@ -0,0 +1,435 @@ +/* + * Kernel routing table updates by routing socket. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#ifdef __OpenBSD__ +#include <netmpls/mpls.h> +#endif + +#include "if.h" +#include "prefix.h" +#include "sockunion.h" +#include "log.h" +#include "privs.h" +#include "vxlan.h" +#include "lib_errors.h" + +#include "zebra/debug.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/kernel_socket.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_errors.h" + +extern struct zebra_privs_t zserv_privs; + +#ifdef __OpenBSD__ +static int kernel_rtm_add_labels(struct mpls_label_stack *nh_label, + struct sockaddr_mpls *smpls) +{ + if (nh_label->num_labels > 1) { + flog_warn(EC_ZEBRA_MAX_LABELS_PUSH, + "%s: can't push %u labels at once (maximum is 1)", + __func__, nh_label->num_labels); + return -1; + } + + memset(smpls, 0, sizeof(*smpls)); + smpls->smpls_len = sizeof(*smpls); + smpls->smpls_family = AF_MPLS; + smpls->smpls_label = htonl(nh_label->label[0] << MPLS_LABEL_OFFSET); + + return 0; +} +#endif + +/* Interface between zebra message and rtm message. */ +static int kernel_rtm(int cmd, const struct prefix *p, + const struct nexthop_group *ng, uint32_t metric) + +{ + union sockunion sin_dest, sin_mask, sin_gate; +#ifdef __OpenBSD__ + struct sockaddr_mpls smpls; +#endif + union sockunion *smplsp = NULL; + struct nexthop *nexthop; + int nexthop_num = 0; + ifindex_t ifindex = 0; + bool gate = false; + int error; + char gate_buf[INET6_BUFSIZ]; + enum blackhole_type bh_type = BLACKHOLE_UNSPEC; + + /* + * We only have the ability to ADD or DELETE at this point + * in time. + */ + if (cmd != RTM_ADD && cmd != RTM_DELETE) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %pFX odd command %s", __func__, p, + lookup_msg(rtm_type_str, cmd, NULL)); + return 0; + } + + memset(&sin_dest, 0, sizeof(sin_dest)); + memset(&sin_gate, 0, sizeof(sin_gate)); + memset(&sin_mask, 0, sizeof(sin_mask)); + + switch (p->family) { + case AF_INET: + sin_dest.sin.sin_family = AF_INET; + sin_dest.sin.sin_addr = p->u.prefix4; + sin_gate.sin.sin_family = AF_INET; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_dest.sin.sin_len = sizeof(struct sockaddr_in); + sin_gate.sin.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + case AF_INET6: + sin_dest.sin6.sin6_family = AF_INET6; + sin_dest.sin6.sin6_addr = p->u.prefix6; + sin_gate.sin6.sin6_family = AF_INET6; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_dest.sin6.sin6_len = sizeof(struct sockaddr_in6); + sin_gate.sin6.sin6_len = sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + } + + /* Make gateway. */ + for (ALL_NEXTHOPS_PTR(ng, nexthop)) { + /* + * We only want to use the actual good nexthops + */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE) || + !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + smplsp = NULL; + gate = false; + snprintf(gate_buf, sizeof(gate_buf), "NULL"); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + sin_gate.sin.sin_addr = nexthop->gate.ipv4; + sin_gate.sin.sin_family = AF_INET; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_gate.sin.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + ifindex = nexthop->ifindex; + gate = true; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + sin_gate.sin6.sin6_addr = nexthop->gate.ipv6; + sin_gate.sin6.sin6_family = AF_INET6; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_gate.sin6.sin6_len = sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + ifindex = nexthop->ifindex; +/* Under kame set interface index to link local address */ +#ifdef KAME + +#define SET_IN6_LINKLOCAL_IFINDEX(a, i) \ + do { \ + (a).s6_addr[2] = ((i) >> 8) & 0xff; \ + (a).s6_addr[3] = (i)&0xff; \ + } while (0) + + if (IN6_IS_ADDR_LINKLOCAL(&sin_gate.sin6.sin6_addr)) + SET_IN6_LINKLOCAL_IFINDEX( + sin_gate.sin6.sin6_addr, + ifindex); +#endif /* KAME */ + + gate = true; + break; + case NEXTHOP_TYPE_IFINDEX: + ifindex = nexthop->ifindex; + break; + case NEXTHOP_TYPE_BLACKHOLE: + bh_type = nexthop->bh_type; + switch (p->family) { + case AF_INET: { + struct in_addr loopback; + + loopback.s_addr = htonl(INADDR_LOOPBACK); + sin_gate.sin.sin_addr = loopback; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_gate.sin.sin_len = + sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + gate = true; + } break; + case AF_INET6: { + struct in6_addr loopback; + + inet_pton(AF_INET6, "::1", &loopback); + + sin_gate.sin6.sin6_addr = loopback; + sin_gate.sin6.sin6_family = AF_INET6; + +#ifdef HAVE_STRUCTSOCKADDR_SA_LEN + sin_gate.sin6.sin6_len = + sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCTSOCKADDR_SA_LEN */ + gate = true; + } break; + } + } + + switch (p->family) { + case AF_INET: + masklen2ip(p->prefixlen, &sin_mask.sin.sin_addr); + sin_mask.sin.sin_family = AF_INET; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_mask.sin.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + case AF_INET6: + masklen2ip6(p->prefixlen, &sin_mask.sin6.sin6_addr); + sin_mask.sin6.sin6_family = AF_INET6; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin_mask.sin6.sin6_len = sizeof(struct sockaddr_in6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + break; + } + +#ifdef __OpenBSD__ + if (nexthop->nh_label) { + if (kernel_rtm_add_labels(nexthop->nh_label, + &smpls) != 0) + continue; + smplsp = (union sockunion *)&smpls; + } +#endif + error = rtm_write(cmd, &sin_dest, &sin_mask, + gate ? &sin_gate : NULL, smplsp, + ifindex, bh_type, metric); + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (!gate) { + zlog_debug( + "%s: %pFX: attention! gate not found for re", + __func__, p); + } else { + switch (p->family) { + case AF_INET: + inet_ntop(AF_INET, + &sin_gate.sin.sin_addr, + gate_buf, sizeof(gate_buf)); + break; + + case AF_INET6: + inet_ntop(AF_INET6, + &sin_gate.sin6.sin6_addr, + gate_buf, sizeof(gate_buf)); + break; + + default: + snprintf(gate_buf, sizeof(gate_buf), + "(invalid-af)"); + break; + } + } + } + switch (error) { + /* We only flag nexthops as being in FIB if + * rtm_write() did its work. */ + case ZEBRA_ERR_NOERROR: + nexthop_num++; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %pFX: successfully did NH %s", + __func__, p, gate_buf); + if (cmd == RTM_ADD) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + break; + + /* The only valid case for this error is + * kernel's failure to install a multipath + * route, which is common for FreeBSD. This + * should be ignored silently, but logged as an error + * otherwise. + */ + case ZEBRA_ERR_RTEXIST: + if (cmd != RTM_ADD) + flog_err(EC_LIB_SYSTEM_CALL, + "%s: rtm_write() returned %d for command %d", + __func__, error, cmd); + continue; + + /* Note any unexpected status returns */ + case ZEBRA_ERR_RTNOEXIST: + if (cmd != RTM_DELETE) + flog_err(EC_LIB_SYSTEM_CALL, + "%s: rtm_write() returned %d for command %d", + __func__, error, cmd); + break; + default: + flog_err( + EC_LIB_SYSTEM_CALL, + "%s: %pFX: rtm_write() unexpectedly returned %d for command %s", + __func__, p, error, + lookup_msg(rtm_type_str, cmd, NULL)); + break; + } + } /* for (ALL_NEXTHOPS(...))*/ + + /* If there was no useful nexthop, then complain. */ + if (nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: No useful nexthops were found in RIB prefix %pFX", + __func__, p); + return 1; + } + + return 0; /*XXX*/ +} + +/* + * Update or delete a prefix from the kernel, + * using info from a dataplane context struct. + */ +enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS; + uint32_t type, old_type; + + if (dplane_ctx_get_src(ctx) != NULL) { + zlog_err("route add: IPv6 sourcedest routes unsupported!"); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + type = dplane_ctx_get_type(ctx); + old_type = dplane_ctx_get_old_type(ctx); + + frr_with_privs(&zserv_privs) { + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { + if (!RSYSTEM_ROUTE(type)) + kernel_rtm(RTM_DELETE, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) { + if (!RSYSTEM_ROUTE(type)) + kernel_rtm(RTM_ADD, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + /* Must do delete and add separately - + * no update available + */ + if (!RSYSTEM_ROUTE(old_type)) + kernel_rtm(RTM_DELETE, dplane_ctx_get_dest(ctx), + dplane_ctx_get_old_ng(ctx), + dplane_ctx_get_old_metric(ctx)); + + if (!RSYSTEM_ROUTE(type)) + kernel_rtm(RTM_ADD, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else { + zlog_err("Invalid routing socket update op %s (%u)", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_op(ctx)); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + } + } /* Elevated privs */ + + return res; +} + +enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +int kernel_neigh_register(vrf_id_t vrf_id, struct zserv *client, bool reg) +{ + /* TODO */ + return 0; +} + +int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen, + ns_id_t ns_id, uint8_t family, bool permanent) +{ + /* TODO */ + return 0; +} + +/* NYI on routing-socket platforms, but we've always returned 'success'... */ +enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +extern int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *mroute) +{ + return 0; +} + +/* + * Update MAC, using dataplane context object. No-op here for now. + */ +enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +extern int kernel_interface_set_master(struct interface *master, + struct interface *slave) +{ + return 0; +} + +uint32_t kernel_get_speed(struct interface *ifp, int *error) +{ + return ifp->speed; +} + +int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip) +{ + return 0; +} + +int kernel_del_mac_nh(uint32_t nh_id) +{ + return 0; +} + +int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + return 0; +} + +int kernel_del_mac_nhg(uint32_t nhg_id) +{ + return 0; +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/rtadv.c b/zebra/rtadv.c new file mode 100644 index 0000000..93590a2 --- /dev/null +++ b/zebra/rtadv.c @@ -0,0 +1,3049 @@ +/* Router advertisement + * Copyright (C) 2016 Cumulus Networks + * Copyright (C) 2005 6WIND <jean-mickael.guerin@6wind.com> + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "memory.h" +#include "sockopt.h" +#include "thread.h" +#include "if.h" +#include "stream.h" +#include "log.h" +#include "prefix.h" +#include "linklist.h" +#include "command.h" +#include "privs.h" +#include "vrf.h" +#include "ns.h" +#include "lib_errors.h" + +#include "zebra/interface.h" +#include "zebra/rtadv.h" +#include "zebra/debug.h" +#include "zebra/rib.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_router.h" + +extern struct zebra_privs_t zserv_privs; + +static uint32_t interfaces_configured_for_ra_from_bgp; + +#if defined(HAVE_RTADV) + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/rtadv_clippy.c" +#endif + +DEFINE_MTYPE_STATIC(ZEBRA, RTADV_PREFIX, "Router Advertisement Prefix"); +DEFINE_MTYPE_STATIC(ZEBRA, ADV_IF, "Advertised Interface"); + +#ifdef OPEN_BSD +#include <netinet/icmp6.h> +#endif + +/* If RFC2133 definition is used. */ +#ifndef IPV6_JOIN_GROUP +#define IPV6_JOIN_GROUP IPV6_ADD_MEMBERSHIP +#endif +#ifndef IPV6_LEAVE_GROUP +#define IPV6_LEAVE_GROUP IPV6_DROP_MEMBERSHIP +#endif + +#define ALLNODE "ff02::1" +#define ALLROUTER "ff02::2" + +/* adv list node */ +struct adv_if { + char name[INTERFACE_NAMSIZ]; + struct adv_if_list_item list_item; +}; + +static int adv_if_cmp(const struct adv_if *a, const struct adv_if *b) +{ + return if_cmp_name_func(a->name, b->name); +} + +DECLARE_SORTLIST_UNIQ(adv_if_list, struct adv_if, list_item, adv_if_cmp); + +static int rtadv_prefix_cmp(const struct rtadv_prefix *a, + const struct rtadv_prefix *b) +{ + return prefix_cmp(&a->prefix, &b->prefix); +} + +DECLARE_RBTREE_UNIQ(rtadv_prefixes, struct rtadv_prefix, item, + rtadv_prefix_cmp); + +DEFINE_MTYPE_STATIC(ZEBRA, RTADV_RDNSS, "Router Advertisement RDNSS"); +DEFINE_MTYPE_STATIC(ZEBRA, RTADV_DNSSL, "Router Advertisement DNSSL"); + +/* Order is intentional. Matches RFC4191. This array is also used for + command matching, so only modify with care. */ +static const char *const rtadv_pref_strs[] = { + "medium", "high", "INVALID", "low", 0 +}; + +enum rtadv_event { + RTADV_START, + RTADV_STOP, + RTADV_TIMER, + RTADV_TIMER_MSEC, + RTADV_READ +}; + +static void rtadv_event(struct zebra_vrf *, enum rtadv_event, int); + +static int if_join_all_router(int, struct interface *); +static int if_leave_all_router(int, struct interface *); + +static struct zebra_vrf *rtadv_interface_get_zvrf(const struct interface *ifp) +{ + /* We use the default vrf for rtadv handling except in netns */ + if (!vrf_is_backend_netns()) + return vrf_info_lookup(VRF_DEFAULT); + + return ifp->vrf->info; +} + +static int rtadv_increment_received(struct zebra_vrf *zvrf, ifindex_t *ifindex) +{ + int ret = -1; + struct interface *iface; + struct zebra_if *zif; + + iface = if_lookup_by_index(*ifindex, zvrf->vrf->vrf_id); + if (iface && iface->info) { + zif = iface->info; + zif->ra_rcvd++; + ret = 0; + } + return ret; +} + +static int rtadv_recv_packet(struct zebra_vrf *zvrf, int sock, uint8_t *buf, + int buflen, struct sockaddr_in6 *from, + ifindex_t *ifindex, int *hoplimit) +{ + int ret; + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsgptr; + struct in6_addr dst; + + char adata[1024]; + + /* Fill in message and iovec. */ + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void *)from; + msg.msg_namelen = sizeof(struct sockaddr_in6); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (void *)adata; + msg.msg_controllen = sizeof(adata); + iov.iov_base = buf; + iov.iov_len = buflen; + + /* If recvmsg fail return minus value. */ + ret = recvmsg(sock, &msg, 0); + if (ret < 0) + return ret; + + for (cmsgptr = CMSG_FIRSTHDR(&msg); cmsgptr != NULL; + cmsgptr = CMSG_NXTHDR(&msg, cmsgptr)) { + /* I want interface index which this packet comes from. */ + if (cmsgptr->cmsg_level == IPPROTO_IPV6 + && cmsgptr->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *ptr; + + ptr = (struct in6_pktinfo *)CMSG_DATA(cmsgptr); + *ifindex = ptr->ipi6_ifindex; + memcpy(&dst, &ptr->ipi6_addr, sizeof(ptr->ipi6_addr)); + } + + /* Incoming packet's hop limit. */ + if (cmsgptr->cmsg_level == IPPROTO_IPV6 + && cmsgptr->cmsg_type == IPV6_HOPLIMIT) { + int *hoptr = (int *)CMSG_DATA(cmsgptr); + *hoplimit = *hoptr; + } + } + + rtadv_increment_received(zvrf, ifindex); + return ret; +} + +#define RTADV_MSG_SIZE 4096 + +/* Send router advertisement packet. */ +static void rtadv_send_packet(int sock, struct interface *ifp, + enum ipv6_nd_suppress_ra_status stop) +{ + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsgptr; + struct in6_pktinfo *pkt; + struct sockaddr_in6 addr; + static void *adata = NULL; + unsigned char buf[RTADV_MSG_SIZE]; + struct nd_router_advert *rtadv; + int ret; + int len = 0; + struct zebra_if *zif; + struct rtadv_prefix *rprefix; + uint8_t all_nodes_addr[] = {0xff, 0x02, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1}; + struct listnode *node; + uint16_t pkt_RouterLifetime; + + /* + * Allocate control message bufffer. This is dynamic because + * CMSG_SPACE is not guaranteed not to call a function. Note that + * the size will be different on different architectures due to + * differing alignment rules. + */ + if (adata == NULL) { + /* XXX Free on shutdown. */ + adata = calloc(1, CMSG_SPACE(sizeof(struct in6_pktinfo))); + + if (adata == NULL) { + zlog_debug("%s: can't malloc control data", __func__); + exit(-1); + } + } + + /* Logging of packet. */ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s(%s:%u): Tx RA, socket %u", ifp->name, + ifp->vrf->name, ifp->ifindex, sock); + + /* Fill in sockaddr_in6. */ + memset(&addr, 0, sizeof(struct sockaddr_in6)); + addr.sin6_family = AF_INET6; +#ifdef SIN6_LEN + addr.sin6_len = sizeof(struct sockaddr_in6); +#endif /* SIN6_LEN */ + addr.sin6_port = htons(IPPROTO_ICMPV6); + IPV6_ADDR_COPY(&addr.sin6_addr, all_nodes_addr); + + /* Fetch interface information. */ + zif = ifp->info; + + /* Make router advertisement message. */ + rtadv = (struct nd_router_advert *)buf; + + rtadv->nd_ra_type = ND_ROUTER_ADVERT; + rtadv->nd_ra_code = 0; + rtadv->nd_ra_cksum = 0; + + rtadv->nd_ra_curhoplimit = zif->rtadv.AdvCurHopLimit; + + /* RFC4191: Default Router Preference is 0 if Router Lifetime is 0. */ + rtadv->nd_ra_flags_reserved = zif->rtadv.AdvDefaultLifetime == 0 + ? 0 + : zif->rtadv.DefaultPreference; + rtadv->nd_ra_flags_reserved <<= 3; + + if (zif->rtadv.AdvManagedFlag) + rtadv->nd_ra_flags_reserved |= ND_RA_FLAG_MANAGED; + if (zif->rtadv.AdvOtherConfigFlag) + rtadv->nd_ra_flags_reserved |= ND_RA_FLAG_OTHER; + if (zif->rtadv.AdvHomeAgentFlag) + rtadv->nd_ra_flags_reserved |= ND_RA_FLAG_HOME_AGENT; + /* Note that according to Neighbor Discovery (RFC 4861 [18]), + * AdvDefaultLifetime is by default based on the value of + * MaxRtrAdvInterval. AdvDefaultLifetime is used in the Router Lifetime + * field of Router Advertisements. Given that this field is expressed + * in seconds, a small MaxRtrAdvInterval value can result in a zero + * value for this field. To prevent this, routers SHOULD keep + * AdvDefaultLifetime in at least one second, even if the use of + * MaxRtrAdvInterval would result in a smaller value. -- RFC6275, 7.5 */ + pkt_RouterLifetime = + zif->rtadv.AdvDefaultLifetime != -1 + ? zif->rtadv.AdvDefaultLifetime + : MAX(1, 0.003 * zif->rtadv.MaxRtrAdvInterval); + + /* send RA lifetime of 0 before stopping. rfc4861/6.2.5 */ + rtadv->nd_ra_router_lifetime = + (stop == RA_SUPPRESS) ? htons(0) : htons(pkt_RouterLifetime); + rtadv->nd_ra_reachable = htonl(zif->rtadv.AdvReachableTime); + rtadv->nd_ra_retransmit = htonl(zif->rtadv.AdvRetransTimer); + + len = sizeof(struct nd_router_advert); + + /* If both the Home Agent Preference and Home Agent Lifetime are set to + * their default values specified above, this option SHOULD NOT be + * included in the Router Advertisement messages sent by this home + * agent. -- RFC6275, 7.4 */ + if (zif->rtadv.AdvHomeAgentFlag + && (zif->rtadv.HomeAgentPreference + || zif->rtadv.HomeAgentLifetime != -1)) { + struct nd_opt_homeagent_info *ndopt_hai = + (struct nd_opt_homeagent_info *)(buf + len); + ndopt_hai->nd_opt_hai_type = ND_OPT_HA_INFORMATION; + ndopt_hai->nd_opt_hai_len = 1; + ndopt_hai->nd_opt_hai_reserved = 0; + ndopt_hai->nd_opt_hai_preference = + htons(zif->rtadv.HomeAgentPreference); + /* 16-bit unsigned integer. The lifetime associated with the + * home + * agent in units of seconds. The default value is the same as + * the + * Router Lifetime, as specified in the main body of the Router + * Advertisement. The maximum value corresponds to 18.2 hours. + * A + * value of 0 MUST NOT be used. -- RFC6275, 7.5 */ + ndopt_hai->nd_opt_hai_lifetime = + htons(zif->rtadv.HomeAgentLifetime != -1 + ? zif->rtadv.HomeAgentLifetime + : MAX(1, pkt_RouterLifetime) /* 0 is OK + for RL, + but not + for HAL*/ + ); + len += sizeof(struct nd_opt_homeagent_info); + } + + if (zif->rtadv.AdvIntervalOption) { + struct nd_opt_adv_interval *ndopt_adv = + (struct nd_opt_adv_interval *)(buf + len); + ndopt_adv->nd_opt_ai_type = ND_OPT_ADV_INTERVAL; + ndopt_adv->nd_opt_ai_len = 1; + ndopt_adv->nd_opt_ai_reserved = 0; + ndopt_adv->nd_opt_ai_interval = + htonl(zif->rtadv.MaxRtrAdvInterval); + len += sizeof(struct nd_opt_adv_interval); + } + + /* Fill in prefix. */ + frr_each (rtadv_prefixes, zif->rtadv.prefixes, rprefix) { + struct nd_opt_prefix_info *pinfo; + + pinfo = (struct nd_opt_prefix_info *)(buf + len); + + pinfo->nd_opt_pi_type = ND_OPT_PREFIX_INFORMATION; + pinfo->nd_opt_pi_len = 4; + pinfo->nd_opt_pi_prefix_len = rprefix->prefix.prefixlen; + + pinfo->nd_opt_pi_flags_reserved = 0; + if (rprefix->AdvOnLinkFlag) + pinfo->nd_opt_pi_flags_reserved |= + ND_OPT_PI_FLAG_ONLINK; + if (rprefix->AdvAutonomousFlag) + pinfo->nd_opt_pi_flags_reserved |= ND_OPT_PI_FLAG_AUTO; + if (rprefix->AdvRouterAddressFlag) + pinfo->nd_opt_pi_flags_reserved |= ND_OPT_PI_FLAG_RADDR; + + pinfo->nd_opt_pi_valid_time = htonl(rprefix->AdvValidLifetime); + pinfo->nd_opt_pi_preferred_time = + htonl(rprefix->AdvPreferredLifetime); + pinfo->nd_opt_pi_reserved2 = 0; + + IPV6_ADDR_COPY(&pinfo->nd_opt_pi_prefix, + &rprefix->prefix.prefix); + + len += sizeof(struct nd_opt_prefix_info); + } + + /* Hardware address. */ + if (ifp->hw_addr_len != 0) { + buf[len++] = ND_OPT_SOURCE_LINKADDR; + + /* Option length should be rounded up to next octet if + the link address does not end on an octet boundary. */ + buf[len++] = (ifp->hw_addr_len + 9) >> 3; + + memcpy(buf + len, ifp->hw_addr, ifp->hw_addr_len); + len += ifp->hw_addr_len; + + /* Pad option to end on an octet boundary. */ + memset(buf + len, 0, -(ifp->hw_addr_len + 2) & 0x7); + len += -(ifp->hw_addr_len + 2) & 0x7; + } + + /* MTU */ + if (zif->rtadv.AdvLinkMTU) { + struct nd_opt_mtu *opt = (struct nd_opt_mtu *)(buf + len); + opt->nd_opt_mtu_type = ND_OPT_MTU; + opt->nd_opt_mtu_len = 1; + opt->nd_opt_mtu_reserved = 0; + opt->nd_opt_mtu_mtu = htonl(zif->rtadv.AdvLinkMTU); + len += sizeof(struct nd_opt_mtu); + } + + /* + * There is no limit on the number of configurable recursive DNS + * servers or search list entries. We don't want the RA message + * to exceed the link's MTU (risking fragmentation) or even + * blow the stack buffer allocated for it. + */ + size_t max_len = MIN(ifp->mtu6 - 40, sizeof(buf)); + + /* Recursive DNS servers */ + struct rtadv_rdnss *rdnss; + + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvRDNSSList, node, rdnss)) { + size_t opt_len = + sizeof(struct nd_opt_rdnss) + sizeof(struct in6_addr); + + if (len + opt_len > max_len) { + zlog_warn( + "%s(%s:%u): Tx RA: RDNSS option would exceed MTU, omitting it", + ifp->name, ifp->vrf->name, ifp->ifindex); + goto no_more_opts; + } + struct nd_opt_rdnss *opt = (struct nd_opt_rdnss *)(buf + len); + + opt->nd_opt_rdnss_type = ND_OPT_RDNSS; + opt->nd_opt_rdnss_len = opt_len / 8; + opt->nd_opt_rdnss_reserved = 0; + opt->nd_opt_rdnss_lifetime = htonl( + rdnss->lifetime_set + ? rdnss->lifetime + : MAX(1, 0.003 * zif->rtadv.MaxRtrAdvInterval)); + + len += sizeof(struct nd_opt_rdnss); + + IPV6_ADDR_COPY(buf + len, &rdnss->addr); + len += sizeof(struct in6_addr); + } + + /* DNS search list */ + struct rtadv_dnssl *dnssl; + + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvDNSSLList, node, dnssl)) { + size_t opt_len = sizeof(struct nd_opt_dnssl) + + ((dnssl->encoded_len + 7) & ~7); + + if (len + opt_len > max_len) { + zlog_warn( + "%s(%u): Tx RA: DNSSL option would exceed MTU, omitting it", + ifp->name, ifp->ifindex); + goto no_more_opts; + } + struct nd_opt_dnssl *opt = (struct nd_opt_dnssl *)(buf + len); + + opt->nd_opt_dnssl_type = ND_OPT_DNSSL; + opt->nd_opt_dnssl_len = opt_len / 8; + opt->nd_opt_dnssl_reserved = 0; + opt->nd_opt_dnssl_lifetime = htonl( + dnssl->lifetime_set + ? dnssl->lifetime + : MAX(1, 0.003 * zif->rtadv.MaxRtrAdvInterval)); + + len += sizeof(struct nd_opt_dnssl); + + memcpy(buf + len, dnssl->encoded_name, dnssl->encoded_len); + len += dnssl->encoded_len; + + /* Zero-pad to 8-octet boundary */ + while (len % 8) + buf[len++] = '\0'; + } + +no_more_opts: + + msg.msg_name = (void *)&addr; + msg.msg_namelen = sizeof(struct sockaddr_in6); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (void *)adata; + msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); + msg.msg_flags = 0; + iov.iov_base = buf; + iov.iov_len = len; + + cmsgptr = CMSG_FIRSTHDR(&msg); + cmsgptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + cmsgptr->cmsg_level = IPPROTO_IPV6; + cmsgptr->cmsg_type = IPV6_PKTINFO; + + pkt = (struct in6_pktinfo *)CMSG_DATA(cmsgptr); + memset(&pkt->ipi6_addr, 0, sizeof(struct in6_addr)); + pkt->ipi6_ifindex = ifp->ifindex; + + ret = sendmsg(sock, &msg, 0); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "%s(%u): Tx RA failed, socket %u error %d (%s)", + ifp->name, ifp->ifindex, sock, errno, + safe_strerror(errno)); + } else + zif->ra_sent++; +} + +static void rtadv_timer(struct thread *thread) +{ + struct zebra_vrf *zvrf = THREAD_ARG(thread); + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *zif; + int period; + + zvrf->rtadv.ra_timer = NULL; + if (adv_if_list_count(&zvrf->rtadv.adv_msec_if) == 0) { + period = 1000; /* 1 s */ + rtadv_event(zvrf, RTADV_TIMER, 1 /* 1 s */); + } else { + period = 10; /* 10 ms */ + rtadv_event(zvrf, RTADV_TIMER_MSEC, 10 /* 10 ms */); + } + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) + FOR_ALL_INTERFACES (vrf, ifp) { + if (if_is_loopback(ifp) || !if_is_operative(ifp) || + IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || + !connected_get_linklocal(ifp) || + (vrf_is_backend_netns() && + ifp->vrf->vrf_id != zvrf->vrf->vrf_id)) + continue; + + zif = ifp->info; + + if (zif->rtadv.AdvSendAdvertisements) { + if (zif->rtadv.inFastRexmit + && zif->rtadv.UseFastRexmit) { + /* We assume we fast rexmit every sec so + * no + * additional vars */ + if (--zif->rtadv.NumFastReXmitsRemain + <= 0) + zif->rtadv.inFastRexmit = 0; + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug( + "Fast RA Rexmit on interface %s(%s:%u)", + ifp->name, + ifp->vrf->name, + ifp->ifindex); + + rtadv_send_packet(zvrf->rtadv.sock, ifp, + RA_ENABLE); + } else { + zif->rtadv.AdvIntervalTimer -= period; + if (zif->rtadv.AdvIntervalTimer <= 0) { + /* FIXME: using + MaxRtrAdvInterval each + time isn't what section + 6.2.4 of RFC4861 tells to do. + */ + zif->rtadv.AdvIntervalTimer = + zif->rtadv + .MaxRtrAdvInterval; + rtadv_send_packet( + zvrf->rtadv.sock, ifp, + RA_ENABLE); + } + } + } + } +} + +static void rtadv_process_solicit(struct interface *ifp) +{ + struct zebra_vrf *zvrf; + struct zebra_if *zif; + + zvrf = rtadv_interface_get_zvrf(ifp); + assert(zvrf); + zif = ifp->info; + + /* + * If FastRetransmit is enabled, send the RA immediately. + * If not enabled but it has been more than MIN_DELAY_BETWEEN_RAS + * (3 seconds) since the last RA was sent, send it now and reset + * the timer to start at the max (configured) again. + * If not enabled and it is less than 3 seconds since the last + * RA packet was sent, set the timer for 3 seconds so the next + * one will be sent with a minimum of 3 seconds between RAs. + * RFC4861 sec 6.2.6 + */ + if ((zif->rtadv.UseFastRexmit) + || (zif->rtadv.AdvIntervalTimer <= + (zif->rtadv.MaxRtrAdvInterval - MIN_DELAY_BETWEEN_RAS))) { + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE); + zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval; + } else + zif->rtadv.AdvIntervalTimer = MIN_DELAY_BETWEEN_RAS; +} + +/* + * This function processes optional attributes off of + * end of a RA packet received. At this point in + * time we only care about this in one situation + * which is when a interface does not have a LL + * v6 address. We still need to be able to install + * the mac address for v4 to v6 resolution + */ +static void rtadv_process_optional(uint8_t *optional, unsigned int len, + struct interface *ifp, + struct sockaddr_in6 *addr) +{ + char *mac; + + while (len > 0) { + struct nd_opt_hdr *opt_hdr = (struct nd_opt_hdr *)optional; + + switch(opt_hdr->nd_opt_type) { + case ND_OPT_SOURCE_LINKADDR: + mac = (char *)(optional+2); + if_nbr_mac_to_ipv4ll_neigh_update(ifp, mac, + &addr->sin6_addr, 1); + break; + default: + break; + } + + len -= 8 * opt_hdr->nd_opt_len; + optional += 8 * opt_hdr->nd_opt_len; + } +} + +static void rtadv_process_advert(uint8_t *msg, unsigned int len, + struct interface *ifp, + struct sockaddr_in6 *addr) +{ + struct nd_router_advert *radvert; + char addr_str[INET6_ADDRSTRLEN]; + struct zebra_if *zif; + struct prefix p; + + zif = ifp->info; + + inet_ntop(AF_INET6, &addr->sin6_addr, addr_str, INET6_ADDRSTRLEN); + + if (len < sizeof(struct nd_router_advert)) { + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "%s(%s:%u): Rx RA with invalid length %d from %s", + ifp->name, ifp->vrf->name, ifp->ifindex, len, + addr_str); + return; + } + + if (!IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { + rtadv_process_optional(msg + sizeof(struct nd_router_advert), + len - sizeof(struct nd_router_advert), + ifp, addr); + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "%s(%s:%u): Rx RA with non-linklocal source address from %s", + ifp->name, ifp->vrf->name, ifp->ifindex, + addr_str); + return; + } + + radvert = (struct nd_router_advert *)msg; + +#define SIXHOUR2USEC (int64_t)6 * 60 * 60 * 1000000 + + if ((radvert->nd_ra_curhoplimit && zif->rtadv.AdvCurHopLimit) && + (radvert->nd_ra_curhoplimit != zif->rtadv.AdvCurHopLimit) && + (monotime_since(&zif->rtadv.lastadvcurhoplimit, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvcurhoplimit.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvCurHopLimit doesn't agree with %s", + ifp->name, ifp->ifindex, addr_str); + monotime(&zif->rtadv.lastadvcurhoplimit); + } + + if ((radvert->nd_ra_flags_reserved & ND_RA_FLAG_MANAGED) && + !zif->rtadv.AdvManagedFlag && + (monotime_since(&zif->rtadv.lastadvmanagedflag, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvmanagedflag.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvManagedFlag doesn't agree with %s", + ifp->name, ifp->ifindex, addr_str); + monotime(&zif->rtadv.lastadvmanagedflag); + } + + if ((radvert->nd_ra_flags_reserved & ND_RA_FLAG_OTHER) && + !zif->rtadv.AdvOtherConfigFlag && + (monotime_since(&zif->rtadv.lastadvotherconfigflag, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvotherconfigflag.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvOtherConfigFlag doesn't agree with %s", + ifp->name, ifp->ifindex, addr_str); + monotime(&zif->rtadv.lastadvotherconfigflag); + } + + if ((radvert->nd_ra_reachable && zif->rtadv.AdvReachableTime) && + (ntohl(radvert->nd_ra_reachable) != zif->rtadv.AdvReachableTime) && + (monotime_since(&zif->rtadv.lastadvreachabletime, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvreachabletime.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvReachableTime doesn't agree with %s", + ifp->name, ifp->ifindex, addr_str); + monotime(&zif->rtadv.lastadvreachabletime); + } + + if ((ntohl(radvert->nd_ra_retransmit) != + (unsigned int)zif->rtadv.AdvRetransTimer) && + (monotime_since(&zif->rtadv.lastadvretranstimer, NULL) > + SIXHOUR2USEC || + zif->rtadv.lastadvretranstimer.tv_sec == 0)) { + flog_warn( + EC_ZEBRA_RA_PARAM_MISMATCH, + "%s(%u): Rx RA - our AdvRetransTimer doesn't agree with %s", + ifp->name, ifp->ifindex, addr_str); + monotime(&zif->rtadv.lastadvretranstimer); + } + + /* Create entry for neighbor if not known. */ + p.family = AF_INET6; + IPV6_ADDR_COPY(&p.u.prefix6, &addr->sin6_addr); + p.prefixlen = IPV6_MAX_BITLEN; + + if (!nbr_connected_check(ifp, &p)) + nbr_connected_add_ipv6(ifp, &addr->sin6_addr); +} + + +static void rtadv_process_packet(uint8_t *buf, unsigned int len, + ifindex_t ifindex, int hoplimit, + struct sockaddr_in6 *from, + struct zebra_vrf *zvrf) +{ + struct icmp6_hdr *icmph; + struct interface *ifp; + struct zebra_if *zif; + char addr_str[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, &from->sin6_addr, addr_str, INET6_ADDRSTRLEN); + + /* Interface search. */ + ifp = if_lookup_by_index(ifindex, zvrf->vrf->vrf_id); + if (ifp == NULL) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "RA/RS received on unknown IF %u from %s", ifindex, + addr_str); + return; + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s(%s:%u): Rx RA/RS len %d from %s", ifp->name, + ifp->vrf->name, ifp->ifindex, len, addr_str); + + if (if_is_loopback(ifp)) + return; + + /* Check interface configuration. */ + zif = ifp->info; + if (!zif->rtadv.AdvSendAdvertisements) + return; + + /* ICMP message length check. */ + if (len < sizeof(struct icmp6_hdr)) { + zlog_debug( + "%s(%s:%u): Rx RA with Invalid ICMPV6 packet length %d", + ifp->name, ifp->vrf->name, ifp->ifindex, len); + return; + } + + icmph = (struct icmp6_hdr *)buf; + + /* ICMP message type check. */ + if (icmph->icmp6_type != ND_ROUTER_SOLICIT + && icmph->icmp6_type != ND_ROUTER_ADVERT) { + zlog_debug("%s(%s:%u): Rx RA - Unwanted ICMPV6 message type %d", + ifp->name, ifp->vrf->name, ifp->ifindex, + icmph->icmp6_type); + return; + } + + /* Hoplimit check. */ + if (hoplimit >= 0 && hoplimit != 255) { + zlog_debug("%s(%s:%u): Rx RA - Invalid hoplimit %d", ifp->name, + ifp->vrf->name, ifp->ifindex, hoplimit); + return; + } + + /* Check ICMP message type. */ + if (icmph->icmp6_type == ND_ROUTER_SOLICIT) + rtadv_process_solicit(ifp); + else if (icmph->icmp6_type == ND_ROUTER_ADVERT) + rtadv_process_advert(buf, len, ifp, from); + + return; +} + +static void rtadv_read(struct thread *thread) +{ + int sock; + int len; + uint8_t buf[RTADV_MSG_SIZE]; + struct sockaddr_in6 from; + ifindex_t ifindex = 0; + int hoplimit = -1; + struct zebra_vrf *zvrf = THREAD_ARG(thread); + + sock = THREAD_FD(thread); + zvrf->rtadv.ra_read = NULL; + + /* Register myself. */ + rtadv_event(zvrf, RTADV_READ, 0); + + len = rtadv_recv_packet(zvrf, sock, buf, sizeof(buf), &from, &ifindex, + &hoplimit); + + if (len < 0) { + flog_err_sys(EC_LIB_SOCKET, + "RA/RS recv failed, socket %u error %s", sock, + safe_strerror(errno)); + return; + } + + rtadv_process_packet(buf, (unsigned)len, ifindex, hoplimit, &from, zvrf); +} + +static int rtadv_make_socket(ns_id_t ns_id) +{ + int sock = -1; + int ret = 0; + struct icmp6_filter filter; + int error; + + frr_with_privs(&zserv_privs) { + + sock = ns_socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6, ns_id); + /* + * with privs might set errno too if it fails save + * to the side + */ + error = errno; + } + + if (sock < 0) { + zlog_warn("RTADV socket for ns: %u failure to create: %s(%u)", + ns_id, safe_strerror(error), error); + return -1; + } + + ret = setsockopt_ipv6_pktinfo(sock, 1); + if (ret < 0) { + zlog_warn("RTADV failure to set Packet Information"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_multicast_loop(sock, 0); + if (ret < 0) { + zlog_warn("RTADV failure to set multicast Loop detection"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_unicast_hops(sock, 255); + if (ret < 0) { + zlog_warn("RTADV failure to set maximum unicast hops"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_multicast_hops(sock, 255); + if (ret < 0) { + zlog_warn("RTADV failure to set maximum multicast hops"); + close(sock); + return ret; + } + ret = setsockopt_ipv6_hoplimit(sock, 1); + if (ret < 0) { + zlog_warn("RTADV failure to set maximum incoming hop limit"); + close(sock); + return ret; + } + + ICMP6_FILTER_SETBLOCKALL(&filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_ADVERT, &filter); + + ret = setsockopt(sock, IPPROTO_ICMPV6, ICMP6_FILTER, &filter, + sizeof(struct icmp6_filter)); + if (ret < 0) { + zlog_info("ICMP6_FILTER set fail: %s", safe_strerror(errno)); + close(sock); + return ret; + } + + return sock; +} + +static struct adv_if *adv_if_new(const char *name) +{ + struct adv_if *new; + + new = XCALLOC(MTYPE_ADV_IF, sizeof(struct adv_if)); + + strlcpy(new->name, name, sizeof(new->name)); + + return new; +} + +static void adv_if_free(struct adv_if *adv_if) +{ + XFREE(MTYPE_ADV_IF, adv_if); +} + +static bool adv_if_is_empty_internal(const struct adv_if_list_head *adv_if_head) +{ + return adv_if_list_count(adv_if_head) ? false : true; +} + +static struct adv_if *adv_if_add_internal(struct adv_if_list_head *adv_if_head, + const char *name) +{ + struct adv_if adv_if_lookup = {}; + struct adv_if *adv_if = NULL; + + strlcpy(adv_if_lookup.name, name, sizeof(adv_if_lookup.name)); + adv_if = adv_if_list_find(adv_if_head, &adv_if_lookup); + + if (adv_if != NULL) + return adv_if; + + adv_if = adv_if_new(adv_if_lookup.name); + adv_if_list_add(adv_if_head, adv_if); + + return NULL; +} + +static struct adv_if *adv_if_del_internal(struct adv_if_list_head *adv_if_head, + const char *name) +{ + struct adv_if adv_if_lookup = {}; + struct adv_if *adv_if = NULL; + + strlcpy(adv_if_lookup.name, name, sizeof(adv_if_lookup.name)); + adv_if = adv_if_list_find(adv_if_head, &adv_if_lookup); + + if (adv_if == NULL) + return NULL; + + adv_if_list_del(adv_if_head, adv_if); + + return adv_if; +} + +static void adv_if_clean_internal(struct adv_if_list_head *adv_if_head) +{ + struct adv_if *node = NULL; + + if (!adv_if_is_empty_internal(adv_if_head)) { + frr_each_safe (adv_if_list, adv_if_head, node) { + adv_if_list_del(adv_if_head, node); + adv_if_free(node); + } + } + + adv_if_list_fini(adv_if_head); +} + + +/* + * Add to list. On Success, return NULL, otherwise return already existing + * adv_if. + */ +static struct adv_if *adv_if_add(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_add_internal(&zvrf->rtadv.adv_if, name); + + if (adv_if != NULL) + return adv_if; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_if)); + } + + return NULL; +} + +/* + * Del from list. On Success, return the adv_if, otherwise return NULL. Caller + * frees. + */ +static struct adv_if *adv_if_del(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_del_internal(&zvrf->rtadv.adv_if, name); + + if (adv_if == NULL) + return NULL; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_if)); + } + + return adv_if; +} + +/* + * Add to list. On Success, return NULL, otherwise return already existing + * adv_if. + */ +static struct adv_if *adv_msec_if_add(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_add_internal(&zvrf->rtadv.adv_msec_if, name); + + if (adv_if != NULL) + return adv_if; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_msec_if)); + } + + return NULL; +} + +/* + * Del from list. On Success, return the adv_if, otherwise return NULL. Caller + * frees. + */ +static struct adv_if *adv_msec_if_del(struct zebra_vrf *zvrf, const char *name) +{ + struct adv_if *adv_if = NULL; + + adv_if = adv_if_del_internal(&zvrf->rtadv.adv_msec_if, name); + + if (adv_if == NULL) + return NULL; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u IF %s count: %zu", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), name, + adv_if_list_count(&zvrf->rtadv.adv_msec_if)); + } + + return adv_if; +} + +/* Clean adv_if list, called on vrf terminate */ +static void adv_if_clean(struct zebra_vrf *zvrf) +{ + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u count: %zu -> 0", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), + adv_if_list_count(&zvrf->rtadv.adv_if)); + } + + adv_if_clean_internal(&zvrf->rtadv.adv_if); +} + +/* Clean adv_msec_if list, called on vrf terminate */ +static void adv_msec_if_clean(struct zebra_vrf *zvrf) +{ + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s: %s:%u count: %zu -> 0", __func__, + VRF_LOGNAME(vrf), zvrf_id(zvrf), + adv_if_list_count(&zvrf->rtadv.adv_msec_if)); + } + + adv_if_clean_internal(&zvrf->rtadv.adv_msec_if); +} + +static struct rtadv_prefix *rtadv_prefix_new(void) +{ + return XCALLOC(MTYPE_RTADV_PREFIX, sizeof(struct rtadv_prefix)); +} + +static void rtadv_prefix_free(struct rtadv_prefix *rtadv_prefix) +{ + XFREE(MTYPE_RTADV_PREFIX, rtadv_prefix); +} + +static struct rtadv_prefix *rtadv_prefix_get(struct rtadv_prefixes_head *list, + struct prefix_ipv6 *p) +{ + struct rtadv_prefix *rprefix, ref; + + ref.prefix = *p; + + rprefix = rtadv_prefixes_find(list, &ref); + if (rprefix) + return rprefix; + + rprefix = rtadv_prefix_new(); + memcpy(&rprefix->prefix, p, sizeof(struct prefix_ipv6)); + rtadv_prefixes_add(list, rprefix); + + return rprefix; +} + +static void rtadv_prefix_set_defaults(struct rtadv_prefix *rp) +{ + rp->AdvAutonomousFlag = 1; + rp->AdvOnLinkFlag = 1; + rp->AdvRouterAddressFlag = 0; + rp->AdvPreferredLifetime = RTADV_PREFERRED_LIFETIME; + rp->AdvValidLifetime = RTADV_VALID_LIFETIME; +} + +static void rtadv_prefix_set(struct zebra_if *zif, struct rtadv_prefix *rp) +{ + struct rtadv_prefix *rprefix; + + rprefix = rtadv_prefix_get(zif->rtadv.prefixes, &rp->prefix); + + /* + * Set parameters based on where the prefix is created. + * If auto-created based on kernel address addition, set the + * default values. If created from a manual "ipv6 nd prefix" + * command, take the parameters from the manual command. Note + * that if the manual command exists, the default values will + * not overwrite the manual values. + */ + if (rp->AdvPrefixCreate == PREFIX_SRC_MANUAL) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_AUTO) + rprefix->AdvPrefixCreate = PREFIX_SRC_BOTH; + else + rprefix->AdvPrefixCreate = PREFIX_SRC_MANUAL; + + rprefix->AdvAutonomousFlag = rp->AdvAutonomousFlag; + rprefix->AdvOnLinkFlag = rp->AdvOnLinkFlag; + rprefix->AdvRouterAddressFlag = rp->AdvRouterAddressFlag; + rprefix->AdvPreferredLifetime = rp->AdvPreferredLifetime; + rprefix->AdvValidLifetime = rp->AdvValidLifetime; + } else if (rp->AdvPrefixCreate == PREFIX_SRC_AUTO) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_MANUAL) + rprefix->AdvPrefixCreate = PREFIX_SRC_BOTH; + else { + rprefix->AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set_defaults(rprefix); + } + } +} + +static int rtadv_prefix_reset(struct zebra_if *zif, struct rtadv_prefix *rp) +{ + struct rtadv_prefix *rprefix; + + rprefix = rtadv_prefixes_find(zif->rtadv.prefixes, rp); + if (rprefix != NULL) { + + /* + * When deleting an address from the list, need to take care + * it wasn't defined both automatically via kernel + * address addition as well as manually by vtysh cli. If both, + * we don't actually delete but may change the parameters + * back to default if a manually defined entry is deleted. + */ + if (rp->AdvPrefixCreate == PREFIX_SRC_MANUAL) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH) { + rprefix->AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set_defaults(rprefix); + return 1; + } + } else if (rp->AdvPrefixCreate == PREFIX_SRC_AUTO) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH) { + rprefix->AdvPrefixCreate = PREFIX_SRC_MANUAL; + return 1; + } + } + + rtadv_prefixes_del(zif->rtadv.prefixes, rprefix); + rtadv_prefix_free(rprefix); + return 1; + } else + return 0; +} + +/* Add IPv6 prefixes learned from the kernel to the RA prefix list */ +void rtadv_add_prefix(struct zebra_if *zif, const struct prefix_ipv6 *p) +{ + struct rtadv_prefix rp; + + rp.prefix = *p; + apply_mask_ipv6(&rp.prefix); + rp.AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set(zif, &rp); +} + +/* Delete IPv6 prefixes removed by the kernel from the RA prefix list */ +void rtadv_delete_prefix(struct zebra_if *zif, const struct prefix *p) +{ + struct rtadv_prefix rp; + + rp.prefix = *((struct prefix_ipv6 *)p); + apply_mask_ipv6(&rp.prefix); + rp.AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_reset(zif, &rp); +} + +static void rtadv_start_interface_events(struct zebra_vrf *zvrf, + struct zebra_if *zif) +{ + struct adv_if *adv_if = NULL; + + if (zif->ifp->ifindex == IFINDEX_INTERNAL) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s(%s) has not configured an ifindex yet, delaying until we have one", + zif->ifp->name, zvrf->vrf->name); + return; + } + + adv_if = adv_if_add(zvrf, zif->ifp->name); + if (adv_if != NULL) + return; /* Already added */ + + if_join_all_router(zvrf->rtadv.sock, zif->ifp); + + if (adv_if_list_count(&zvrf->rtadv.adv_if) == 1) + rtadv_event(zvrf, RTADV_START, 0); +} + +static void ipv6_nd_suppress_ra_set(struct interface *ifp, + enum ipv6_nd_suppress_ra_status status) +{ + struct zebra_if *zif; + struct zebra_vrf *zvrf; + struct adv_if *adv_if = NULL; + + zif = ifp->info; + + zvrf = rtadv_interface_get_zvrf(ifp); + + if (status == RA_SUPPRESS) { + /* RA is currently enabled */ + if (zif->rtadv.AdvSendAdvertisements) { + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS); + zif->rtadv.AdvSendAdvertisements = 0; + zif->rtadv.AdvIntervalTimer = 0; + + adv_if = adv_if_del(zvrf, ifp->name); + if (adv_if == NULL) + return; /* Nothing to delete */ + + adv_if_free(adv_if); + + if_leave_all_router(zvrf->rtadv.sock, ifp); + + if (adv_if_list_count(&zvrf->rtadv.adv_if) == 0) + rtadv_event(zvrf, RTADV_STOP, 0); + } + } else { + if (!zif->rtadv.AdvSendAdvertisements) { + zif->rtadv.AdvSendAdvertisements = 1; + zif->rtadv.AdvIntervalTimer = 0; + if ((zif->rtadv.MaxRtrAdvInterval >= 1000) + && zif->rtadv.UseFastRexmit) { + /* + * Enable Fast RA only when RA interval is in + * secs and Fast RA retransmit is enabled + */ + zif->rtadv.inFastRexmit = 1; + zif->rtadv.NumFastReXmitsRemain = + RTADV_NUM_FAST_REXMITS; + } + + rtadv_start_interface_events(zvrf, zif); + } + } +} + +/* + * Handle client (BGP) message to enable or disable IPv6 RA on an interface. + * Note that while the client could request RA on an interface on which the + * operator has not enabled RA, RA won't be disabled upon client request + * if the operator has explicitly enabled RA. The enable request can also + * specify a RA interval (in seconds). + */ +static void zebra_interface_radv_set(ZAPI_HANDLER_ARGS, int enable) +{ + struct stream *s; + ifindex_t ifindex; + struct interface *ifp; + struct zebra_if *zif; + uint32_t ra_interval; + + s = msg; + + /* Get interface index and RA interval. */ + STREAM_GETL(s, ifindex); + STREAM_GETL(s, ra_interval); + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s:%u: IF %u RA %s from client %s, interval %ums", + VRF_LOGNAME(vrf), zvrf_id(zvrf), ifindex, + enable ? "enable" : "disable", + zebra_route_string(client->proto), ra_interval); + } + + /* Locate interface and check VRF match. */ + ifp = if_lookup_by_index(ifindex, zvrf->vrf->vrf_id); + if (!ifp) { + struct vrf *vrf = zvrf->vrf; + + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s:%u: IF %u RA %s client %s - interface unknown", + VRF_LOGNAME(vrf), zvrf_id(zvrf), ifindex, + enable ? "enable" : "disable", + zebra_route_string(client->proto)); + return; + } + if (vrf_is_backend_netns() && ifp->vrf->vrf_id != zvrf_id(zvrf)) { + zlog_debug( + "%s:%u: IF %u RA %s client %s - VRF mismatch, IF VRF %u", + ifp->vrf->name, zvrf_id(zvrf), ifindex, + enable ? "enable" : "disable", + zebra_route_string(client->proto), ifp->vrf->vrf_id); + return; + } + + zif = ifp->info; + if (enable) { + if (!CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + interfaces_configured_for_ra_from_bgp++; + + SET_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED); + ipv6_nd_suppress_ra_set(ifp, RA_ENABLE); + if (ra_interval + && (ra_interval * 1000) < (unsigned int) zif->rtadv.MaxRtrAdvInterval + && !CHECK_FLAG(zif->rtadv.ra_configured, + VTY_RA_INTERVAL_CONFIGURED)) + zif->rtadv.MaxRtrAdvInterval = ra_interval * 1000; + } else { + if (CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + interfaces_configured_for_ra_from_bgp--; + + UNSET_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED); + if (!CHECK_FLAG(zif->rtadv.ra_configured, + VTY_RA_INTERVAL_CONFIGURED)) + zif->rtadv.MaxRtrAdvInterval = + RTADV_MAX_RTR_ADV_INTERVAL; + if (!CHECK_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED)) + ipv6_nd_suppress_ra_set(ifp, RA_SUPPRESS); + } +stream_failure: + return; +} + +/* + * send router lifetime value of zero in RAs on this interface since we're + * ceasing to advertise and want to let our neighbors know. + * RFC 4861 secion 6.2.5 + */ +void rtadv_stop_ra(struct interface *ifp) +{ + struct zebra_if *zif; + struct zebra_vrf *zvrf; + + zif = ifp->info; + zvrf = rtadv_interface_get_zvrf(ifp); + + if (zif->rtadv.AdvSendAdvertisements) + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS); +} + +/* + * Send router lifetime value of zero in RAs on all interfaces since we're + * ceasing to advertise globally and want to let all of our neighbors know + * RFC 4861 secion 6.2.5 + * + * Delete all ipv6 global prefixes added to the router advertisement prefix + * lists prior to ceasing. + */ +void rtadv_stop_ra_all(void) +{ + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *zif; + struct rtadv_prefix *rprefix; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + FOR_ALL_INTERFACES (vrf, ifp) { + zif = ifp->info; + + frr_each_safe (rtadv_prefixes, zif->rtadv.prefixes, + rprefix) + rtadv_prefix_reset(zif, rprefix); + + rtadv_stop_ra(ifp); + } +} + +void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS) +{ + zebra_interface_radv_set(client, hdr, msg, zvrf, 0); +} +void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS) +{ + zebra_interface_radv_set(client, hdr, msg, zvrf, 1); +} + +static void show_zvrf_rtadv_adv_if_helper(struct vty *vty, + struct adv_if_list_head *adv_if_head) +{ + struct adv_if *node = NULL; + + if (!adv_if_is_empty_internal(adv_if_head)) { + frr_each (adv_if_list, adv_if_head, node) { + vty_out(vty, " %s\n", node->name); + } + } + + vty_out(vty, "\n"); +} + +static void show_zvrf_rtadv_helper(struct vty *vty, struct zebra_vrf *zvrf) +{ + vty_out(vty, "VRF: %s\n", zvrf_name(zvrf)); + vty_out(vty, " Interfaces:\n"); + show_zvrf_rtadv_adv_if_helper(vty, &zvrf->rtadv.adv_if); + + vty_out(vty, " Interfaces(msec):\n"); + show_zvrf_rtadv_adv_if_helper(vty, &zvrf->rtadv.adv_msec_if); +} + +DEFPY(show_ipv6_nd_ra_if, show_ipv6_nd_ra_if_cmd, + "show ipv6 nd ra-interfaces [vrf<NAME$vrf_name|all$vrf_all>]", + SHOW_STR IP6_STR + "Neighbor discovery\n" + "Route Advertisement Interfaces\n" VRF_FULL_CMD_HELP_STR) +{ + struct zebra_vrf *zvrf = NULL; + + if (!vrf_is_backend_netns() && (vrf_name || vrf_all)) { + vty_out(vty, + "%% VRF subcommand only applicable for netns-based vrfs.\n"); + return CMD_WARNING; + } + + if (vrf_all) { + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + struct zebra_vrf *zvrf; + + zvrf = vrf->info; + if (!zvrf) + continue; + + show_zvrf_rtadv_helper(vty, zvrf); + } + + return CMD_SUCCESS; + } + + if (vrf_name) + zvrf = zebra_vrf_lookup_by_name(vrf_name); + else + zvrf = zebra_vrf_lookup_by_name(VRF_DEFAULT_NAME); + + if (!zvrf) { + vty_out(vty, "%% VRF '%s' specified does not exist\n", + vrf_name); + return CMD_WARNING; + } + + show_zvrf_rtadv_helper(vty, zvrf); + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_fast_retrans, + ipv6_nd_ra_fast_retrans_cmd, + "ipv6 nd ra-fast-retrans", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Fast retransmit of RA packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.UseFastRexmit = true; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_fast_retrans, + no_ipv6_nd_ra_fast_retrans_cmd, + "no ipv6 nd ra-fast-retrans", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Fast retransmit of RA packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.UseFastRexmit = false; + + return CMD_SUCCESS; +} + +DEFPY (ipv6_nd_ra_hop_limit, + ipv6_nd_ra_hop_limit_cmd, + "ipv6 nd ra-hop-limit (0-255)$hopcount", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Hop Limit\n" + "Advertisement Hop Limit in hops (default:64)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvCurHopLimit = hopcount; + + return CMD_SUCCESS; +} + +DEFPY (no_ipv6_nd_ra_hop_limit, + no_ipv6_nd_ra_hop_limit_cmd, + "no ipv6 nd ra-hop-limit [(0-255)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Hop Limit\n" + "Advertisement Hop Limit in hops\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvCurHopLimit = RTADV_DEFAULT_HOPLIMIT; + + return CMD_SUCCESS; +} + +DEFPY (ipv6_nd_ra_retrans_interval, + ipv6_nd_ra_retrans_interval_cmd, + "ipv6 nd ra-retrans-interval (0-4294967295)$interval", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Retransmit Interval\n" + "Advertisement Retransmit Interval in msec\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on loopback interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvRetransTimer = interval; + + return CMD_SUCCESS; +} + +DEFPY (no_ipv6_nd_ra_retrans_interval, + no_ipv6_nd_ra_retrans_interval_cmd, + "no ipv6 nd ra-retrans-interval [(0-4294967295)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Retransmit Interval\n" + "Advertisement Retransmit Interval in msec\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot remove IPv6 Router Advertisements on loopback interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvRetransTimer = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_suppress_ra, + ipv6_nd_suppress_ra_cmd, + "ipv6 nd suppress-ra", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Suppress Router Advertisement\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + ipv6_nd_suppress_ra_set(ifp, RA_SUPPRESS); + + UNSET_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_suppress_ra, + no_ipv6_nd_suppress_ra_cmd, + "no ipv6 nd suppress-ra", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Suppress Router Advertisement\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ipv6_nd_suppress_ra_set(ifp, RA_ENABLE); + SET_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED); + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_interval_msec, + ipv6_nd_ra_interval_msec_cmd, + "ipv6 nd ra-interval msec (70-1800000)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router Advertisement interval\n" + "Router Advertisement interval in milliseconds\n" + "Router Advertisement interval in milliseconds\n") +{ + int idx_number = 4; + VTY_DECLVAR_CONTEXT(interface, ifp); + unsigned interval; + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf; + struct adv_if *adv_if; + + zvrf = rtadv_interface_get_zvrf(ifp); + + interval = strtoul(argv[idx_number]->arg, NULL, 10); + if ((zif->rtadv.AdvDefaultLifetime != -1 + && interval > (unsigned)zif->rtadv.AdvDefaultLifetime * 1000)) { + vty_out(vty, + "This ra-interval would conflict with configured ra-lifetime!\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (zif->rtadv.MaxRtrAdvInterval % 1000) { + adv_if = adv_msec_if_del(zvrf, ifp->name); + if (adv_if != NULL) + adv_if_free(adv_if); + } + + if (interval % 1000) + (void)adv_msec_if_add(zvrf, ifp->name); + + SET_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED); + zif->rtadv.MaxRtrAdvInterval = interval; + zif->rtadv.MinRtrAdvInterval = 0.33 * interval; + zif->rtadv.AdvIntervalTimer = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_interval, + ipv6_nd_ra_interval_cmd, + "ipv6 nd ra-interval (1-1800)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router Advertisement interval\n" + "Router Advertisement interval in seconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + unsigned interval; + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf; + struct adv_if *adv_if; + + zvrf = rtadv_interface_get_zvrf(ifp); + + interval = strtoul(argv[idx_number]->arg, NULL, 10); + if ((zif->rtadv.AdvDefaultLifetime != -1 + && interval > (unsigned)zif->rtadv.AdvDefaultLifetime)) { + vty_out(vty, + "This ra-interval would conflict with configured ra-lifetime!\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (zif->rtadv.MaxRtrAdvInterval % 1000) { + adv_if = adv_msec_if_del(zvrf, ifp->name); + if (adv_if != NULL) + adv_if_free(adv_if); + } + + /* convert to milliseconds */ + interval = interval * 1000; + + SET_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED); + zif->rtadv.MaxRtrAdvInterval = interval; + zif->rtadv.MinRtrAdvInterval = 0.33 * interval; + zif->rtadv.AdvIntervalTimer = 0; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_interval, + no_ipv6_nd_ra_interval_cmd, + "no ipv6 nd ra-interval [<(1-1800)|msec (1-1800000)>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router Advertisement interval\n" + "Router Advertisement interval in seconds\n" + "Specify millisecond router advertisement interval\n" + "Router Advertisement interval in milliseconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf = NULL; + struct adv_if *adv_if; + + zvrf = rtadv_interface_get_zvrf(ifp); + + if (zif->rtadv.MaxRtrAdvInterval % 1000) { + adv_if = adv_msec_if_del(zvrf, ifp->name); + if (adv_if != NULL) + adv_if_free(adv_if); + } + + UNSET_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED); + + if (CHECK_FLAG(zif->rtadv.ra_configured, BGP_RA_CONFIGURED)) + zif->rtadv.MaxRtrAdvInterval = 10000; + else + zif->rtadv.MaxRtrAdvInterval = RTADV_MAX_RTR_ADV_INTERVAL; + + zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval; + zif->rtadv.MinRtrAdvInterval = RTADV_MIN_RTR_ADV_INTERVAL; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_ra_lifetime, + ipv6_nd_ra_lifetime_cmd, + "ipv6 nd ra-lifetime (0-9000)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router lifetime\n" + "Router lifetime in seconds (0 stands for a non-default gw)\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + int lifetime; + + lifetime = strtoul(argv[idx_number]->arg, NULL, 10); + + /* The value to be placed in the Router Lifetime field + * of Router Advertisements sent from the interface, + * in seconds. MUST be either zero or between + * MaxRtrAdvInterval and 9000 seconds. -- RFC4861, 6.2.1 */ + if ((lifetime != 0 && lifetime * 1000 < zif->rtadv.MaxRtrAdvInterval)) { + vty_out(vty, + "This ra-lifetime would conflict with configured ra-interval\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.AdvDefaultLifetime = lifetime; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_lifetime, + no_ipv6_nd_ra_lifetime_cmd, + "no ipv6 nd ra-lifetime [(0-9000)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Router lifetime\n" + "Router lifetime in seconds (0 stands for a non-default gw)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvDefaultLifetime = -1; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_reachable_time, + ipv6_nd_reachable_time_cmd, + "ipv6 nd reachable-time (1-3600000)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Reachable time\n" + "Reachable time in milliseconds\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.AdvReachableTime = strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_reachable_time, + no_ipv6_nd_reachable_time_cmd, + "no ipv6 nd reachable-time [(1-3600000)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Reachable time\n" + "Reachable time in milliseconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvReachableTime = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_homeagent_preference, + ipv6_nd_homeagent_preference_cmd, + "ipv6 nd home-agent-preference (0-65535)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent preference\n" + "preference value (default is 0, least preferred)\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.HomeAgentPreference = + strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_homeagent_preference, + no_ipv6_nd_homeagent_preference_cmd, + "no ipv6 nd home-agent-preference [(0-65535)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent preference\n" + "preference value (default is 0, least preferred)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.HomeAgentPreference = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_homeagent_lifetime, + ipv6_nd_homeagent_lifetime_cmd, + "ipv6 nd home-agent-lifetime (0-65520)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent lifetime\n" + "Home Agent lifetime in seconds (0 to track ra-lifetime)\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.HomeAgentLifetime = strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_homeagent_lifetime, + no_ipv6_nd_homeagent_lifetime_cmd, + "no ipv6 nd home-agent-lifetime [(0-65520)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent lifetime\n" + "Home Agent lifetime in seconds (0 to track ra-lifetime)\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.HomeAgentLifetime = -1; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_managed_config_flag, + ipv6_nd_managed_config_flag_cmd, + "ipv6 nd managed-config-flag", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Managed address configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvManagedFlag = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_managed_config_flag, + no_ipv6_nd_managed_config_flag_cmd, + "no ipv6 nd managed-config-flag", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Managed address configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvManagedFlag = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_homeagent_config_flag, + ipv6_nd_homeagent_config_flag_cmd, + "ipv6 nd home-agent-config-flag", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvHomeAgentFlag = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_homeagent_config_flag, + no_ipv6_nd_homeagent_config_flag_cmd, + "no ipv6 nd home-agent-config-flag", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Home Agent configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvHomeAgentFlag = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_adv_interval_config_option, + ipv6_nd_adv_interval_config_option_cmd, + "ipv6 nd adv-interval-option", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Interval Option\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvIntervalOption = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_adv_interval_config_option, + no_ipv6_nd_adv_interval_config_option_cmd, + "no ipv6 nd adv-interval-option", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertisement Interval Option\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvIntervalOption = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_other_config_flag, + ipv6_nd_other_config_flag_cmd, + "ipv6 nd other-config-flag", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Other statefull configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvOtherConfigFlag = 1; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_other_config_flag, + no_ipv6_nd_other_config_flag_cmd, + "no ipv6 nd other-config-flag", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Other statefull configuration flag\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.AdvOtherConfigFlag = 0; + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_prefix, + ipv6_nd_prefix_cmd, + "ipv6 nd prefix X:X::X:X/M [<(0-4294967295)|infinite> <(0-4294967295)|infinite>] [<router-address|off-link [no-autoconfig]|no-autoconfig [off-link]>]", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Prefix information\n" + "IPv6 prefix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n" + "Preferred lifetime in seconds\n" + "Infinite preferred lifetime\n" + "Set Router Address flag\n" + "Do not use prefix for onlink determination\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for onlink determination\n") +{ + /* prelude */ + char *prefix = argv[3]->arg; + int lifetimes = (argc > 4) && (argv[4]->type == RANGE_TKN + || strmatch(argv[4]->text, "infinite")); + int routeropts = lifetimes ? argc > 6 : argc > 4; + + int idx_routeropts = routeropts ? (lifetimes ? 6 : 4) : 0; + + char *lifetime = NULL, *preflifetime = NULL; + int routeraddr = 0, offlink = 0, noautoconf = 0; + if (lifetimes) { + lifetime = argv[4]->type == RANGE_TKN ? argv[4]->arg + : argv[4]->text; + preflifetime = argv[5]->type == RANGE_TKN ? argv[5]->arg + : argv[5]->text; + } + if (routeropts) { + routeraddr = + strmatch(argv[idx_routeropts]->text, "router-address"); + if (!routeraddr) { + offlink = (argc > idx_routeropts + 1 + || strmatch(argv[idx_routeropts]->text, + "off-link")); + noautoconf = (argc > idx_routeropts + 1 + || strmatch(argv[idx_routeropts]->text, + "no-autoconfig")); + } + } + + /* business */ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zebra_if = ifp->info; + int ret; + struct rtadv_prefix rp; + + ret = str2prefix_ipv6(prefix, &rp.prefix); + if (!ret) { + vty_out(vty, "Malformed IPv6 prefix\n"); + return CMD_WARNING_CONFIG_FAILED; + } + apply_mask_ipv6(&rp.prefix); /* RFC4861 4.6.2 */ + rp.AdvOnLinkFlag = !offlink; + rp.AdvAutonomousFlag = !noautoconf; + rp.AdvRouterAddressFlag = routeraddr; + rp.AdvValidLifetime = RTADV_VALID_LIFETIME; + rp.AdvPreferredLifetime = RTADV_PREFERRED_LIFETIME; + rp.AdvPrefixCreate = PREFIX_SRC_MANUAL; + + if (lifetimes) { + rp.AdvValidLifetime = strmatch(lifetime, "infinite") + ? UINT32_MAX + : strtoll(lifetime, NULL, 10); + rp.AdvPreferredLifetime = + strmatch(preflifetime, "infinite") + ? UINT32_MAX + : strtoll(preflifetime, NULL, 10); + if (rp.AdvPreferredLifetime > rp.AdvValidLifetime) { + vty_out(vty, "Invalid preferred lifetime\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + rtadv_prefix_set(zebra_if, &rp); + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_prefix, + no_ipv6_nd_prefix_cmd, + "no ipv6 nd prefix X:X::X:X/M [<(0-4294967295)|infinite> <(0-4294967295)|infinite>] [<router-address|off-link [no-autoconfig]|no-autoconfig [off-link]>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Prefix information\n" + "IPv6 prefix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n" + "Preferred lifetime in seconds\n" + "Infinite preferred lifetime\n" + "Set Router Address flag\n" + "Do not use prefix for onlink determination\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for autoconfiguration\n" + "Do not use prefix for onlink determination\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zebra_if = ifp->info; + int ret; + struct rtadv_prefix rp; + char *prefix = argv[4]->arg; + + ret = str2prefix_ipv6(prefix, &rp.prefix); + if (!ret) { + vty_out(vty, "Malformed IPv6 prefix\n"); + return CMD_WARNING_CONFIG_FAILED; + } + apply_mask_ipv6(&rp.prefix); /* RFC4861 4.6.2 */ + rp.AdvPrefixCreate = PREFIX_SRC_MANUAL; + + ret = rtadv_prefix_reset(zebra_if, &rp); + if (!ret) { + vty_out(vty, "Non-existant IPv6 prefix\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_router_preference, + ipv6_nd_router_preference_cmd, + "ipv6 nd router-preference <high|medium|low>", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Default router preference\n" + "High default router preference\n" + "Medium default router preference (default)\n" + "Low default router preference\n") +{ + int idx_high_medium_low = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + int i = 0; + + while (0 != rtadv_pref_strs[i]) { + if (strncmp(argv[idx_high_medium_low]->arg, rtadv_pref_strs[i], + 1) + == 0) { + zif->rtadv.DefaultPreference = i; + return CMD_SUCCESS; + } + i++; + } + + return CMD_ERR_NO_MATCH; +} + +DEFUN (no_ipv6_nd_router_preference, + no_ipv6_nd_router_preference_cmd, + "no ipv6 nd router-preference [<high|medium|low>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Default router preference\n" + "High default router preference\n" + "Medium default router preference (default)\n" + "Low default router preference\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + zif->rtadv.DefaultPreference = + RTADV_PREF_MEDIUM; /* Default per RFC4191. */ + + return CMD_SUCCESS; +} + +DEFUN (ipv6_nd_mtu, + ipv6_nd_mtu_cmd, + "ipv6 nd mtu (1-65535)", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertised MTU\n" + "MTU in bytes\n") +{ + int idx_number = 3; + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.AdvLinkMTU = strtoul(argv[idx_number]->arg, NULL, 10); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_mtu, + no_ipv6_nd_mtu_cmd, + "no ipv6 nd mtu [(1-65535)]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Advertised MTU\n" + "MTU in bytes\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + zif->rtadv.AdvLinkMTU = 0; + return CMD_SUCCESS; +} + +static struct rtadv_rdnss *rtadv_rdnss_new(void) +{ + return XCALLOC(MTYPE_RTADV_RDNSS, sizeof(struct rtadv_rdnss)); +} + +static void rtadv_rdnss_free(struct rtadv_rdnss *rdnss) +{ + XFREE(MTYPE_RTADV_RDNSS, rdnss); +} + +static struct rtadv_rdnss *rtadv_rdnss_lookup(struct list *list, + struct rtadv_rdnss *rdnss) +{ + struct listnode *node; + struct rtadv_rdnss *p; + + for (ALL_LIST_ELEMENTS_RO(list, node, p)) + if (IPV6_ADDR_SAME(&p->addr, &rdnss->addr)) + return p; + return NULL; +} + +static struct rtadv_rdnss *rtadv_rdnss_get(struct list *list, + struct rtadv_rdnss *rdnss) +{ + struct rtadv_rdnss *p; + + p = rtadv_rdnss_lookup(list, rdnss); + if (p) + return p; + + p = rtadv_rdnss_new(); + memcpy(p, rdnss, sizeof(struct rtadv_rdnss)); + listnode_add(list, p); + + return p; +} + +static void rtadv_rdnss_set(struct zebra_if *zif, struct rtadv_rdnss *rdnss) +{ + struct rtadv_rdnss *p; + + p = rtadv_rdnss_get(zif->rtadv.AdvRDNSSList, rdnss); + p->lifetime = rdnss->lifetime; + p->lifetime_set = rdnss->lifetime_set; +} + +static int rtadv_rdnss_reset(struct zebra_if *zif, struct rtadv_rdnss *rdnss) +{ + struct rtadv_rdnss *p; + + p = rtadv_rdnss_lookup(zif->rtadv.AdvRDNSSList, rdnss); + if (p) { + listnode_delete(zif->rtadv.AdvRDNSSList, p); + rtadv_rdnss_free(p); + return 1; + } + + return 0; +} + +static struct rtadv_dnssl *rtadv_dnssl_new(void) +{ + return XCALLOC(MTYPE_RTADV_DNSSL, sizeof(struct rtadv_dnssl)); +} + +static void rtadv_dnssl_free(struct rtadv_dnssl *dnssl) +{ + XFREE(MTYPE_RTADV_DNSSL, dnssl); +} + +static struct rtadv_dnssl *rtadv_dnssl_lookup(struct list *list, + struct rtadv_dnssl *dnssl) +{ + struct listnode *node; + struct rtadv_dnssl *p; + + for (ALL_LIST_ELEMENTS_RO(list, node, p)) + if (!strcasecmp(p->name, dnssl->name)) + return p; + return NULL; +} + +static struct rtadv_dnssl *rtadv_dnssl_get(struct list *list, + struct rtadv_dnssl *dnssl) +{ + struct rtadv_dnssl *p; + + p = rtadv_dnssl_lookup(list, dnssl); + if (p) + return p; + + p = rtadv_dnssl_new(); + memcpy(p, dnssl, sizeof(struct rtadv_dnssl)); + listnode_add(list, p); + + return p; +} + +static void rtadv_dnssl_set(struct zebra_if *zif, struct rtadv_dnssl *dnssl) +{ + struct rtadv_dnssl *p; + + p = rtadv_dnssl_get(zif->rtadv.AdvDNSSLList, dnssl); + memcpy(p, dnssl, sizeof(struct rtadv_dnssl)); +} + +static int rtadv_dnssl_reset(struct zebra_if *zif, struct rtadv_dnssl *dnssl) +{ + struct rtadv_dnssl *p; + + p = rtadv_dnssl_lookup(zif->rtadv.AdvDNSSLList, dnssl); + if (p) { + listnode_delete(zif->rtadv.AdvDNSSLList, p); + rtadv_dnssl_free(p); + return 1; + } + + return 0; +} + +/* + * Convert dotted domain name (with or without trailing root zone dot) to + * sequence of length-prefixed labels, as described in [RFC1035 3.1]. Write up + * to strlen(in) + 2 octets to out. + * + * Returns the number of octets written to out or -1 if in does not constitute + * a valid domain name. + */ +static int rtadv_dnssl_encode(uint8_t *out, const char *in) +{ + const char *label_start, *label_end; + size_t outp; + + outp = 0; + label_start = in; + + while (*label_start) { + size_t label_len; + + label_end = strchr(label_start, '.'); + if (label_end == NULL) + label_end = label_start + strlen(label_start); + + label_len = label_end - label_start; + if (label_len >= 64) + return -1; /* labels must be 63 octets or less */ + + out[outp++] = (uint8_t)label_len; + memcpy(out + outp, label_start, label_len); + outp += label_len; + label_start += label_len; + if (*label_start == '.') + label_start++; + } + + out[outp++] = '\0'; + return outp; +} + +DEFUN(ipv6_nd_rdnss, + ipv6_nd_rdnss_cmd, + "ipv6 nd rdnss X:X::X:X [<(0-4294967295)|infinite>]", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Recursive DNS server information\n" + "IPv6 address\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_rdnss rdnss = {}; + + if (inet_pton(AF_INET6, argv[3]->arg, &rdnss.addr) != 1) { + vty_out(vty, "Malformed IPv6 address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (argc > 4) { + char *lifetime = argv[4]->type == RANGE_TKN ? argv[4]->arg + : argv[4]->text; + rdnss.lifetime = strmatch(lifetime, "infinite") + ? UINT32_MAX + : strtoll(lifetime, NULL, 10); + rdnss.lifetime_set = 1; + } + + rtadv_rdnss_set(zif, &rdnss); + + return CMD_SUCCESS; +} + +DEFUN(no_ipv6_nd_rdnss, + no_ipv6_nd_rdnss_cmd, + "no ipv6 nd rdnss X:X::X:X [<(0-4294967295)|infinite>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Recursive DNS server information\n" + "IPv6 address\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_rdnss rdnss = {}; + + if (inet_pton(AF_INET6, argv[4]->arg, &rdnss.addr) != 1) { + vty_out(vty, "Malformed IPv6 address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (rtadv_rdnss_reset(zif, &rdnss) != 1) { + vty_out(vty, "Non-existant RDNSS address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN(ipv6_nd_dnssl, + ipv6_nd_dnssl_cmd, + "ipv6 nd dnssl SUFFIX [<(0-4294967295)|infinite>]", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "DNS search list information\n" + "Domain name suffix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_dnssl dnssl = {}; + size_t len; + int ret; + + len = strlcpy(dnssl.name, argv[3]->arg, sizeof(dnssl.name)); + if (len == 0 || len >= sizeof(dnssl.name)) { + vty_out(vty, "Malformed DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (dnssl.name[len - 1] == '.') { + /* + * Allow, but don't require, a trailing dot signifying the root + * zone. Canonicalize by cutting it off if present. + */ + dnssl.name[len - 1] = '\0'; + len--; + } + if (argc > 4) { + char *lifetime = argv[4]->type == RANGE_TKN ? argv[4]->arg + : argv[4]->text; + dnssl.lifetime = strmatch(lifetime, "infinite") + ? UINT32_MAX + : strtoll(lifetime, NULL, 10); + dnssl.lifetime_set = 1; + } + + ret = rtadv_dnssl_encode(dnssl.encoded_name, dnssl.name); + if (ret < 0) { + vty_out(vty, "Malformed DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + dnssl.encoded_len = ret; + rtadv_dnssl_set(zif, &dnssl); + + return CMD_SUCCESS; +} + +DEFUN(no_ipv6_nd_dnssl, + no_ipv6_nd_dnssl_cmd, + "no ipv6 nd dnssl SUFFIX [<(0-4294967295)|infinite>]", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "DNS search list information\n" + "Domain name suffix\n" + "Valid lifetime in seconds\n" + "Infinite valid lifetime\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + struct rtadv_dnssl dnssl = {}; + size_t len; + + len = strlcpy(dnssl.name, argv[4]->arg, sizeof(dnssl.name)); + if (len == 0 || len >= sizeof(dnssl.name)) { + vty_out(vty, "Malformed DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (dnssl.name[len - 1] == '.') { + dnssl.name[len - 1] = '\0'; + len--; + } + if (rtadv_dnssl_reset(zif, &dnssl) != 1) { + vty_out(vty, "Non-existant DNS search domain\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + + +/* Dump interface ND information to vty. */ +static int nd_dump_vty(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zif; + struct rtadvconf *rtadv; + int interval; + + zif = (struct zebra_if *)ifp->info; + rtadv = &zif->rtadv; + + if (rtadv->AdvSendAdvertisements) { + vty_out(vty, + " ND advertised reachable time is %d milliseconds\n", + rtadv->AdvReachableTime); + vty_out(vty, + " ND advertised retransmit interval is %u milliseconds\n", + rtadv->AdvRetransTimer); + vty_out(vty, " ND advertised hop-count limit is %d hops\n", + rtadv->AdvCurHopLimit); + vty_out(vty, " ND router advertisements sent: %d rcvd: %d\n", + zif->ra_sent, zif->ra_rcvd); + interval = rtadv->MaxRtrAdvInterval; + if (interval % 1000) + vty_out(vty, + " ND router advertisements are sent every %d milliseconds\n", + interval); + else + vty_out(vty, + " ND router advertisements are sent every %d seconds\n", + interval / 1000); + if (!rtadv->UseFastRexmit) + vty_out(vty, + " ND router advertisements do not use fast retransmit\n"); + + if (rtadv->AdvDefaultLifetime != -1) + vty_out(vty, + " ND router advertisements live for %d seconds\n", + rtadv->AdvDefaultLifetime); + else + vty_out(vty, + " ND router advertisements lifetime tracks ra-interval\n"); + vty_out(vty, + " ND router advertisement default router preference is %s\n", + rtadv_pref_strs[rtadv->DefaultPreference]); + if (rtadv->AdvManagedFlag) + vty_out(vty, + " Hosts use DHCP to obtain routable addresses.\n"); + else + vty_out(vty, + " Hosts use stateless autoconfig for addresses.\n"); + if (rtadv->AdvHomeAgentFlag) { + vty_out(vty, + " ND router advertisements with Home Agent flag bit set.\n"); + if (rtadv->HomeAgentLifetime != -1) + vty_out(vty, + " Home Agent lifetime is %u seconds\n", + rtadv->HomeAgentLifetime); + else + vty_out(vty, + " Home Agent lifetime tracks ra-lifetime\n"); + vty_out(vty, " Home Agent preference is %u\n", + rtadv->HomeAgentPreference); + } + if (rtadv->AdvIntervalOption) + vty_out(vty, + " ND router advertisements with Adv. Interval option.\n"); + } + return 0; +} + + +/* Write configuration about router advertisement. */ +static int rtadv_config_write(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zif; + struct listnode *node; + struct rtadv_prefix *rprefix; + struct rtadv_rdnss *rdnss; + struct rtadv_dnssl *dnssl; + int interval; + + zif = ifp->info; + + if (!if_is_loopback(ifp)) { + if (zif->rtadv.AdvSendAdvertisements + && CHECK_FLAG(zif->rtadv.ra_configured, VTY_RA_CONFIGURED)) + vty_out(vty, " no ipv6 nd suppress-ra\n"); + } + + interval = zif->rtadv.MaxRtrAdvInterval; + if (CHECK_FLAG(zif->rtadv.ra_configured, VTY_RA_INTERVAL_CONFIGURED)) { + if (interval % 1000) + vty_out(vty, " ipv6 nd ra-interval msec %d\n", + interval); + else if (interval != RTADV_MAX_RTR_ADV_INTERVAL) + vty_out(vty, " ipv6 nd ra-interval %d\n", + interval / 1000); + } + + if (zif->rtadv.AdvIntervalOption) + vty_out(vty, " ipv6 nd adv-interval-option\n"); + + if (!zif->rtadv.UseFastRexmit) + vty_out(vty, " no ipv6 nd ra-fast-retrans\n"); + + if (zif->rtadv.AdvRetransTimer != 0) + vty_out(vty, " ipv6 nd ra-retrans-interval %u\n", + zif->rtadv.AdvRetransTimer); + + if (zif->rtadv.AdvCurHopLimit != RTADV_DEFAULT_HOPLIMIT) + vty_out(vty, " ipv6 nd ra-hop-limit %d\n", + zif->rtadv.AdvCurHopLimit); + + if (zif->rtadv.AdvDefaultLifetime != -1) + vty_out(vty, " ipv6 nd ra-lifetime %d\n", + zif->rtadv.AdvDefaultLifetime); + + if (zif->rtadv.HomeAgentPreference) + vty_out(vty, " ipv6 nd home-agent-preference %u\n", + zif->rtadv.HomeAgentPreference); + + if (zif->rtadv.HomeAgentLifetime != -1) + vty_out(vty, " ipv6 nd home-agent-lifetime %u\n", + zif->rtadv.HomeAgentLifetime); + + if (zif->rtadv.AdvHomeAgentFlag) + vty_out(vty, " ipv6 nd home-agent-config-flag\n"); + + if (zif->rtadv.AdvReachableTime) + vty_out(vty, " ipv6 nd reachable-time %d\n", + zif->rtadv.AdvReachableTime); + + if (zif->rtadv.AdvManagedFlag) + vty_out(vty, " ipv6 nd managed-config-flag\n"); + + if (zif->rtadv.AdvOtherConfigFlag) + vty_out(vty, " ipv6 nd other-config-flag\n"); + + if (zif->rtadv.DefaultPreference != RTADV_PREF_MEDIUM) + vty_out(vty, " ipv6 nd router-preference %s\n", + rtadv_pref_strs[zif->rtadv.DefaultPreference]); + + if (zif->rtadv.AdvLinkMTU) + vty_out(vty, " ipv6 nd mtu %d\n", zif->rtadv.AdvLinkMTU); + + frr_each (rtadv_prefixes, zif->rtadv.prefixes, rprefix) { + if ((rprefix->AdvPrefixCreate == PREFIX_SRC_MANUAL) + || (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH)) { + vty_out(vty, " ipv6 nd prefix %pFX", &rprefix->prefix); + if ((rprefix->AdvValidLifetime != RTADV_VALID_LIFETIME) + || (rprefix->AdvPreferredLifetime + != RTADV_PREFERRED_LIFETIME)) { + if (rprefix->AdvValidLifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", + rprefix->AdvValidLifetime); + if (rprefix->AdvPreferredLifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", + rprefix->AdvPreferredLifetime); + } + if (!rprefix->AdvOnLinkFlag) + vty_out(vty, " off-link"); + if (!rprefix->AdvAutonomousFlag) + vty_out(vty, " no-autoconfig"); + if (rprefix->AdvRouterAddressFlag) + vty_out(vty, " router-address"); + vty_out(vty, "\n"); + } + } + + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvRDNSSList, node, rdnss)) { + char buf[INET6_ADDRSTRLEN]; + + vty_out(vty, " ipv6 nd rdnss %s", + inet_ntop(AF_INET6, &rdnss->addr, buf, sizeof(buf))); + if (rdnss->lifetime_set) { + if (rdnss->lifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", rdnss->lifetime); + } + vty_out(vty, "\n"); + } + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvDNSSLList, node, dnssl)) { + vty_out(vty, " ipv6 nd dnssl %s", dnssl->name); + if (dnssl->lifetime_set) { + if (dnssl->lifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", dnssl->lifetime); + } + vty_out(vty, "\n"); + } + return 0; +} + + +static void rtadv_event(struct zebra_vrf *zvrf, enum rtadv_event event, int val) +{ + struct rtadv *rtadv; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct vrf *vrf = zvrf->vrf; + + zlog_debug("%s(%s) with event: %d and val: %d", __func__, + VRF_LOGNAME(vrf), event, val); + } + + rtadv = &zvrf->rtadv; + + switch (event) { + case RTADV_START: + thread_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock, + &rtadv->ra_read); + thread_add_event(zrouter.master, rtadv_timer, zvrf, 0, + &rtadv->ra_timer); + break; + case RTADV_STOP: + THREAD_OFF(rtadv->ra_timer); + THREAD_OFF(rtadv->ra_read); + break; + case RTADV_TIMER: + thread_add_timer(zrouter.master, rtadv_timer, zvrf, val, + &rtadv->ra_timer); + break; + case RTADV_TIMER_MSEC: + thread_add_timer_msec(zrouter.master, rtadv_timer, zvrf, val, + &rtadv->ra_timer); + break; + case RTADV_READ: + thread_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock, + &rtadv->ra_read); + break; + default: + break; + } + return; +} + +void rtadv_if_up(struct zebra_if *zif) +{ + struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(zif->ifp); + + /* Enable fast tx of RA if enabled && RA interval is not in msecs */ + if (zif->rtadv.AdvSendAdvertisements && + (zif->rtadv.MaxRtrAdvInterval >= 1000) && + zif->rtadv.UseFastRexmit) { + zif->rtadv.inFastRexmit = 1; + zif->rtadv.NumFastReXmitsRemain = RTADV_NUM_FAST_REXMITS; + } + + /* + * startup the state machine, if it hasn't been already + * due to a delayed ifindex on startup ordering + */ + if (zif->rtadv.AdvSendAdvertisements) + rtadv_start_interface_events(zvrf, zif); +} + +void rtadv_if_init(struct zebra_if *zif) +{ + /* Set default router advertise values. */ + struct rtadvconf *rtadv; + + rtadv = &zif->rtadv; + + rtadv->AdvSendAdvertisements = 0; + rtadv->MaxRtrAdvInterval = RTADV_MAX_RTR_ADV_INTERVAL; + rtadv->MinRtrAdvInterval = RTADV_MIN_RTR_ADV_INTERVAL; + rtadv->AdvIntervalTimer = 0; + rtadv->AdvManagedFlag = 0; + rtadv->AdvOtherConfigFlag = 0; + rtadv->AdvHomeAgentFlag = 0; + rtadv->AdvLinkMTU = 0; + rtadv->AdvReachableTime = 0; + rtadv->AdvRetransTimer = 0; + rtadv->AdvCurHopLimit = RTADV_DEFAULT_HOPLIMIT; + memset(&rtadv->lastadvcurhoplimit, 0, + sizeof(rtadv->lastadvcurhoplimit)); + memset(&rtadv->lastadvmanagedflag, 0, + sizeof(rtadv->lastadvmanagedflag)); + memset(&rtadv->lastadvotherconfigflag, 0, + sizeof(rtadv->lastadvotherconfigflag)); + memset(&rtadv->lastadvreachabletime, 0, + sizeof(rtadv->lastadvreachabletime)); + memset(&rtadv->lastadvretranstimer, 0, + sizeof(rtadv->lastadvretranstimer)); + rtadv->AdvDefaultLifetime = -1; /* derive from MaxRtrAdvInterval */ + rtadv->HomeAgentPreference = 0; + rtadv->HomeAgentLifetime = -1; /* derive from AdvDefaultLifetime */ + rtadv->AdvIntervalOption = 0; + rtadv->UseFastRexmit = true; + rtadv->DefaultPreference = RTADV_PREF_MEDIUM; + + rtadv_prefixes_init(rtadv->prefixes); + + rtadv->AdvRDNSSList = list_new(); + rtadv->AdvDNSSLList = list_new(); +} + +void rtadv_if_fini(struct zebra_if *zif) +{ + struct rtadvconf *rtadv; + struct rtadv_prefix *rp; + + rtadv = &zif->rtadv; + + while ((rp = rtadv_prefixes_pop(rtadv->prefixes))) + rtadv_prefix_free(rp); + + list_delete(&rtadv->AdvRDNSSList); + list_delete(&rtadv->AdvDNSSLList); +} + +void rtadv_vrf_init(struct zebra_vrf *zvrf) +{ + if (!vrf_is_backend_netns() && (zvrf_id(zvrf) != VRF_DEFAULT)) + return; + + zvrf->rtadv.sock = rtadv_make_socket(zvrf->zns->ns_id); +} + +void rtadv_vrf_terminate(struct zebra_vrf *zvrf) +{ + if (!vrf_is_backend_netns() && (zvrf_id(zvrf) != VRF_DEFAULT)) + return; + + rtadv_event(zvrf, RTADV_STOP, 0); + if (zvrf->rtadv.sock >= 0) { + close(zvrf->rtadv.sock); + zvrf->rtadv.sock = -1; + } + + adv_if_clean(zvrf); + adv_msec_if_clean(zvrf); +} + +void rtadv_cmd_init(void) +{ + interfaces_configured_for_ra_from_bgp = 0; + + hook_register(zebra_if_extra_info, nd_dump_vty); + hook_register(zebra_if_config_wr, rtadv_config_write); + + install_element(VIEW_NODE, &show_ipv6_nd_ra_if_cmd); + + install_element(INTERFACE_NODE, &ipv6_nd_ra_fast_retrans_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_fast_retrans_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_retrans_interval_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_retrans_interval_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_hop_limit_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_hop_limit_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_suppress_ra_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_suppress_ra_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_interval_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_interval_msec_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_interval_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_ra_lifetime_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_lifetime_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_reachable_time_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_reachable_time_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_managed_config_flag_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_managed_config_flag_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_other_config_flag_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_other_config_flag_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_homeagent_config_flag_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_homeagent_config_flag_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_homeagent_preference_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_homeagent_preference_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_homeagent_lifetime_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_homeagent_lifetime_cmd); + install_element(INTERFACE_NODE, + &ipv6_nd_adv_interval_config_option_cmd); + install_element(INTERFACE_NODE, + &no_ipv6_nd_adv_interval_config_option_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_prefix_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_prefix_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_router_preference_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_router_preference_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_mtu_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_mtu_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_rdnss_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_rdnss_cmd); + install_element(INTERFACE_NODE, &ipv6_nd_dnssl_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_dnssl_cmd); +} + +static int if_join_all_router(int sock, struct interface *ifp) +{ + int ret; + + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr); + mreq.ipv6mr_interface = ifp->ifindex; + + ret = setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, (char *)&mreq, + sizeof(mreq)); + if (ret < 0) + flog_err_sys(EC_LIB_SOCKET, + "%s(%u): Failed to join group, socket %u error %s", + ifp->name, ifp->ifindex, sock, + safe_strerror(errno)); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s(%s:%u): Join All-Routers multicast group, socket %u", + ifp->name, ifp->vrf->name, ifp->ifindex, sock); + + return 0; +} + +static int if_leave_all_router(int sock, struct interface *ifp) +{ + int ret; + + struct ipv6_mreq mreq; + + memset(&mreq, 0, sizeof(mreq)); + inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr); + mreq.ipv6mr_interface = ifp->ifindex; + + ret = setsockopt(sock, IPPROTO_IPV6, IPV6_LEAVE_GROUP, (char *)&mreq, + sizeof(mreq)); + if (ret < 0) + flog_err_sys( + EC_LIB_SOCKET, + "%s(%s:%u): Failed to leave group, socket %u error %s", + ifp->name, ifp->vrf->name, ifp->ifindex, sock, + safe_strerror(errno)); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s(%s:%u): Leave All-Routers multicast group, socket %u", + ifp->name, ifp->vrf->name, ifp->ifindex, sock); + + return 0; +} + +bool rtadv_compiled_in(void) +{ + return true; +} + +#else /* !HAVE_RTADV */ +/* + * If the end user does not have RADV enabled we should + * handle this better + */ +void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Received %s command, but ZEBRA is not compiled with Router Advertisements on", + zserv_command_string(hdr->command)); + + return; +} + +void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Received %s command, but ZEBRA is not compiled with Router Advertisements on", + zserv_command_string(hdr->command)); + + return; +} + +bool rtadv_compiled_in(void) +{ + return false; +} + +#endif /* HAVE_RTADV */ + +uint32_t rtadv_get_interfaces_configured_from_bgp(void) +{ + return interfaces_configured_for_ra_from_bgp; +} diff --git a/zebra/rtadv.h b/zebra/rtadv.h new file mode 100644 index 0000000..26c7823 --- /dev/null +++ b/zebra/rtadv.h @@ -0,0 +1,451 @@ +/* Router advertisement + * Copyright (C) 2005 6WIND <jean-mickael.guerin@6wind.com> + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_RTADV_H +#define _ZEBRA_RTADV_H + +#include "zebra.h" +#include "vty.h" +#include "typesafe.h" + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct interface; +struct zebra_if; + +#if defined(HAVE_RTADV) + +PREDECL_SORTLIST_UNIQ(adv_if_list); +/* Structure which hold status of router advertisement. */ +struct rtadv { + int sock; + + struct adv_if_list_head adv_if; + struct adv_if_list_head adv_msec_if; + + struct thread *ra_read; + struct thread *ra_timer; +}; + +PREDECL_RBTREE_UNIQ(rtadv_prefixes); + +/* Router advertisement parameter. From RFC4861, RFC6275 and RFC4191. */ +struct rtadvconf { + /* A flag indicating whether or not the router sends periodic Router + Advertisements and responds to Router Solicitations. + Default: false */ + int AdvSendAdvertisements; + + /* The maximum time allowed between sending unsolicited multicast + Router Advertisements from the interface, in milliseconds. + MUST be no less than 70 ms [RFC6275 7.5] and no greater + than 1800000 ms [RFC4861 6.2.1]. + + Default: 600000 milliseconds */ + int MaxRtrAdvInterval; +#define RTADV_MAX_RTR_ADV_INTERVAL 600000 + + /* The minimum time allowed between sending unsolicited multicast + Router Advertisements from the interface, in milliseconds. + MUST be no less than 30 ms [RFC6275 7.5]. + MUST be no greater than .75 * MaxRtrAdvInterval. + + Default: 0.33 * MaxRtrAdvInterval */ + int MinRtrAdvInterval; /* This field is currently unused. */ +#define RTADV_MIN_RTR_ADV_INTERVAL (0.33 * RTADV_MAX_RTR_ADV_INTERVAL) + + /* Unsolicited Router Advertisements' interval timer. */ + int AdvIntervalTimer; + + /* The true/false value to be placed in the "Managed address + configuration" flag field in the Router Advertisement. See + [ADDRCONF]. + + Default: false */ + int AdvManagedFlag; + struct timeval lastadvmanagedflag; + + + /* The true/false value to be placed in the "Other stateful + configuration" flag field in the Router Advertisement. See + [ADDRCONF]. + + Default: false */ + int AdvOtherConfigFlag; + struct timeval lastadvotherconfigflag; + + /* The value to be placed in MTU options sent by the router. A + value of zero indicates that no MTU options are sent. + + Default: 0 */ + int AdvLinkMTU; + + + /* The value to be placed in the Reachable Time field in the Router + Advertisement messages sent by the router. The value zero means + unspecified (by this router). MUST be no greater than 3,600,000 + milliseconds (1 hour). + + Default: 0 */ + uint32_t AdvReachableTime; +#define RTADV_MAX_REACHABLE_TIME 3600000 + struct timeval lastadvreachabletime; + + /* The value to be placed in the Retrans Timer field in the Router + Advertisement messages sent by the router. The value zero means + unspecified (by this router). + + Default: 0 */ + int AdvRetransTimer; + struct timeval lastadvretranstimer; + + /* The default value to be placed in the Cur Hop Limit field in the + Router Advertisement messages sent by the router. The value + should be set to that current diameter of the Internet. The + value zero means unspecified (by this router). + + Default: The value specified in the "Assigned Numbers" RFC + [ASSIGNED] that was in effect at the time of implementation. */ + int AdvCurHopLimit; + struct timeval lastadvcurhoplimit; + +#define RTADV_DEFAULT_HOPLIMIT 64 /* 64 hops */ + + /* The value to be placed in the Router Lifetime field of Router + Advertisements sent from the interface, in seconds. MUST be + either zero or between MaxRtrAdvInterval and 9000 seconds. A + value of zero indicates that the router is not to be used as a + default router. + + Default: 3 * MaxRtrAdvInterval */ + int AdvDefaultLifetime; +#define RTADV_MAX_RTRLIFETIME 9000 /* 2.5 hours */ + + /* A list of prefixes to be placed in Prefix Information options in + Router Advertisement messages sent from the interface. + + Default: all prefixes that the router advertises via routing + protocols as being on-link for the interface from which the + advertisement is sent. The link-local prefix SHOULD NOT be + included in the list of advertised prefixes. */ + struct rtadv_prefixes_head prefixes[1]; + + /* The true/false value to be placed in the "Home agent" + flag field in the Router Advertisement. See [RFC6275 7.1]. + + Default: false */ + int AdvHomeAgentFlag; +#ifndef ND_RA_FLAG_HOME_AGENT +#define ND_RA_FLAG_HOME_AGENT 0x20 +#endif + + /* The value to be placed in Home Agent Information option if Home + Flag is set. + Default: 0 */ + int HomeAgentPreference; + + /* The value to be placed in Home Agent Information option if Home + Flag is set. Lifetime (seconds) MUST not be greater than 18.2 + hours. + The value 0 has special meaning: use of AdvDefaultLifetime value. + + Default: 0 */ + int HomeAgentLifetime; +#define RTADV_MAX_HALIFETIME 65520 /* 18.2 hours */ + + /* The true/false value to insert or not an Advertisement Interval + option. See [RFC 6275 7.3] + + Default: false */ + int AdvIntervalOption; + + /* The value to be placed in the Default Router Preference field of + a router advertisement. See [RFC 4191 2.1 & 2.2] + + Default: 0 (medium) */ + int DefaultPreference; +#define RTADV_PREF_MEDIUM 0x0 /* Per RFC4191. */ + + /* + * List of recursive DNS servers to include in the RDNSS option. + * See [RFC8106 5.1] + * + * Default: empty list; do not emit RDNSS option + */ + struct list *AdvRDNSSList; + + /* + * List of DNS search domains to include in the DNSSL option. + * See [RFC8106 5.2] + * + * Default: empty list; do not emit DNSSL option + */ + struct list *AdvDNSSLList; + + /* + * rfc4861 states RAs must be sent at least 3 seconds apart. + * We allow faster retransmits to speed up convergence but can + * turn that capability off to meet the rfc if needed. + */ + bool UseFastRexmit; /* True if fast rexmits are enabled */ + + uint8_t inFastRexmit; /* True if we're rexmits faster than usual */ + + /* Track if RA was configured by BGP or by the Operator or both */ + uint8_t ra_configured; /* Was RA configured? */ +#define BGP_RA_CONFIGURED (1 << 0) /* BGP configured RA? */ +#define VTY_RA_CONFIGURED (1 << 1) /* Operator configured RA? */ +#define VTY_RA_INTERVAL_CONFIGURED \ + (1 << 2) /* Operator configured RA interval */ + int NumFastReXmitsRemain; /* Loaded first with number of fast + rexmits to do */ + +#define RTADV_FAST_REXMIT_PERIOD 1 /* 1 sec */ +#define RTADV_NUM_FAST_REXMITS 4 /* Fast Rexmit RA 4 times on certain events \ + */ +}; + +struct rtadv_rdnss { + /* Address of recursive DNS server to advertise */ + struct in6_addr addr; + + /* + * Lifetime in seconds; all-ones means infinity, zero + * stop using it. + */ + uint32_t lifetime; + + /* If lifetime not set, use a default of 3*MaxRtrAdvInterval */ + int lifetime_set; +}; + +/* + * [RFC1035 2.3.4] sets the maximum length of a domain name (a sequence of + * labels, each prefixed by a length octet) at 255 octets. + */ +#define RTADV_MAX_ENCODED_DOMAIN_NAME 255 + +struct rtadv_dnssl { + /* Domain name without trailing root zone dot (NUL-terminated) */ + char name[RTADV_MAX_ENCODED_DOMAIN_NAME - 1]; + + /* Name encoded as in [RFC1035 3.1] */ + uint8_t encoded_name[RTADV_MAX_ENCODED_DOMAIN_NAME]; + + /* Actual length of encoded_name */ + size_t encoded_len; + + /* Lifetime as for RDNSS */ + uint32_t lifetime; + int lifetime_set; +}; + +/* Router advertisement prefix. */ +struct rtadv_prefix { + struct rtadv_prefixes_item item; + + /* Prefix to be advertised. */ + struct prefix_ipv6 prefix; + + /* The prefix was manually/automatically defined. */ + int AdvPrefixCreate; + + /* The value to be placed in the Valid Lifetime in the Prefix */ + uint32_t AdvValidLifetime; +#define RTADV_VALID_LIFETIME 2592000 + + /* The value to be placed in the on-link flag */ + int AdvOnLinkFlag; + + /* The value to be placed in the Preferred Lifetime in the Prefix + Information option, in seconds.*/ + uint32_t AdvPreferredLifetime; +#define RTADV_PREFERRED_LIFETIME 604800 + + /* The value to be placed in the Autonomous Flag. */ + int AdvAutonomousFlag; + + /* The value to be placed in the Router Address Flag [RFC6275 7.2]. */ + int AdvRouterAddressFlag; +#ifndef ND_OPT_PI_FLAG_RADDR +#define ND_OPT_PI_FLAG_RADDR 0x20 +#endif +}; + +/* RFC4861 minimum delay between RAs */ +#ifndef MIN_DELAY_BETWEEN_RAS +#define MIN_DELAY_BETWEEN_RAS 3000 +#endif + +/* RFC4584 Extension to Sockets API for Mobile IPv6 */ + +#ifndef ND_OPT_ADV_INTERVAL +#define ND_OPT_ADV_INTERVAL 7 /* Adv Interval Option */ +#endif +#ifndef ND_OPT_HA_INFORMATION +#define ND_OPT_HA_INFORMATION 8 /* HA Information Option */ +#endif + +#ifndef HAVE_STRUCT_ND_OPT_ADV_INTERVAL +struct nd_opt_adv_interval { /* Advertisement interval option */ + uint8_t nd_opt_ai_type; + uint8_t nd_opt_ai_len; + uint16_t nd_opt_ai_reserved; + uint32_t nd_opt_ai_interval; +} __attribute__((__packed__)); +#else +#ifndef HAVE_STRUCT_ND_OPT_ADV_INTERVAL_ND_OPT_AI_TYPE +/* fields may have to be renamed */ +#define nd_opt_ai_type nd_opt_adv_interval_type +#define nd_opt_ai_len nd_opt_adv_interval_len +#define nd_opt_ai_reserved nd_opt_adv_interval_reserved +#define nd_opt_ai_interval nd_opt_adv_interval_ival +#endif +#endif + +#ifndef HAVE_STRUCT_ND_OPT_HOMEAGENT_INFO +struct nd_opt_homeagent_info { /* Home Agent info */ + uint8_t nd_opt_hai_type; + uint8_t nd_opt_hai_len; + uint16_t nd_opt_hai_reserved; + uint16_t nd_opt_hai_preference; + uint16_t nd_opt_hai_lifetime; +} __attribute__((__packed__)); +#endif + +#ifndef ND_OPT_RDNSS +#define ND_OPT_RDNSS 25 +#endif +#ifndef ND_OPT_DNSSL +#define ND_OPT_DNSSL 31 +#endif + +#ifndef HAVE_STRUCT_ND_OPT_RDNSS +struct nd_opt_rdnss { /* Recursive DNS server option [RFC8106 5.1] */ + uint8_t nd_opt_rdnss_type; + uint8_t nd_opt_rdnss_len; + uint16_t nd_opt_rdnss_reserved; + uint32_t nd_opt_rdnss_lifetime; + /* Followed by one or more IPv6 addresses */ +} __attribute__((__packed__)); +#endif + +#ifndef HAVE_STRUCT_ND_OPT_DNSSL +struct nd_opt_dnssl { /* DNS search list option [RFC8106 5.2] */ + uint8_t nd_opt_dnssl_type; + uint8_t nd_opt_dnssl_len; + uint16_t nd_opt_dnssl_reserved; + uint32_t nd_opt_dnssl_lifetime; + /* + * Followed by one or more domain names encoded as in [RFC1035 3.1]. + * Multiple domain names are concatenated after encoding. In any case, + * the result is zero-padded to a multiple of 8 octets. + */ +} __attribute__((__packed__)); +#endif + +/* + * ipv6 nd prefixes can be manually defined, derived from the kernel interface + * configs or both. If both, manual flag/timer settings are used. + */ +enum ipv6_nd_prefix_source { + PREFIX_SRC_NONE = 0, + PREFIX_SRC_MANUAL, + PREFIX_SRC_AUTO, + PREFIX_SRC_BOTH, +}; + +enum ipv6_nd_suppress_ra_status { + RA_ENABLE = 0, + RA_SUPPRESS, +}; + +extern void rtadv_vrf_init(struct zebra_vrf *zvrf); +extern void rtadv_vrf_terminate(struct zebra_vrf *zvrf); +extern void rtadv_stop_ra(struct interface *ifp); +extern void rtadv_stop_ra_all(void); +extern void rtadv_cmd_init(void); +extern void rtadv_if_init(struct zebra_if *zif); +extern void rtadv_if_up(struct zebra_if *zif); +extern void rtadv_if_fini(struct zebra_if *zif); +extern void rtadv_add_prefix(struct zebra_if *zif, const struct prefix_ipv6 *p); +extern void rtadv_delete_prefix(struct zebra_if *zif, const struct prefix *p); + +#else /* !HAVE_RTADV */ +struct rtadv { + /* empty structs aren't valid ISO C */ + char dummy; +}; + +struct rtadvconf { + /* same again, empty structs aren't valid ISO C */ + char dummy; +}; + +static inline void rtadv_vrf_init(struct zebra_vrf *zvrf) +{ +} +static inline void rtadv_vrf_terminate(struct zebra_vrf *zvrf) +{ +} +static inline void rtadv_cmd_init(void) +{ +} +static inline void rtadv_if_init(struct zebra_if *zif) +{ +} +static inline void rtadv_if_up(struct zebra_if *zif) +{ +} +static inline void rtadv_if_fini(struct zebra_if *zif) +{ +} +static inline void rtadv_add_prefix(struct zebra_if *zif, + const struct prefix_ipv6 *p) +{ +} +static inline void rtadv_delete_prefix(struct zebra_if *zif, + const struct prefix *p) +{ +} +static inline void rtadv_stop_ra(struct interface *ifp) +{ +} +static inline void rtadv_stop_ra_all(void) +{ +} +#endif + +extern void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS); +extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS); + +extern uint32_t rtadv_get_interfaces_configured_from_bgp(void); +extern bool rtadv_compiled_in(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_RTADV_H */ diff --git a/zebra/rtread_netlink.c b/zebra/rtread_netlink.c new file mode 100644 index 0000000..f70b006 --- /dev/null +++ b/zebra/rtread_netlink.c @@ -0,0 +1,74 @@ +/* + * Kernel routing table readup by netlink + * Copyright (C) 1998 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef GNU_LINUX + +#include "vty.h" +#include "zebra/rt.h" +#include "zebra/zebra_pbr.h" +#include "zebra/rt_netlink.h" +#include "zebra/rule_netlink.h" + +void route_read(struct zebra_ns *zns) +{ + netlink_route_read(zns); +} + +void macfdb_read(struct zebra_ns *zns) +{ + netlink_macfdb_read(zns); +} + +void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if) +{ + netlink_macfdb_read_for_bridge(zns, ifp, br_if); +} + +void macfdb_read_specific_mac(struct zebra_ns *zns, struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid) +{ + netlink_macfdb_read_specific_mac(zns, br_if, mac, vid); +} + +void neigh_read(struct zebra_ns *zns) +{ + netlink_neigh_read(zns); +} + +void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if) +{ + netlink_neigh_read_for_vlan(zns, vlan_if); +} + +void neigh_read_specific_ip(const struct ipaddr *ip, struct interface *vlan_if) +{ + netlink_neigh_read_specific_ip(ip, vlan_if); +} + +void kernel_read_pbr_rules(struct zebra_ns *zns) +{ + netlink_rules_read(zns); +} + +#endif /* GNU_LINUX */ diff --git a/zebra/rtread_sysctl.c b/zebra/rtread_sysctl.c new file mode 100644 index 0000000..594f7c2 --- /dev/null +++ b/zebra/rtread_sysctl.c @@ -0,0 +1,111 @@ +/* + * Kernel routing table read by sysctl function. + * Copyright (C) 1997, 98 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#if !defined(GNU_LINUX) + +#include "memory.h" +#include "log.h" +#include "vrf.h" + +#include "zebra/rt.h" +#include "zebra/kernel_socket.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_errors.h" + +/* Kernel routing table read up by sysctl function. */ +void route_read(struct zebra_ns *zns) +{ + caddr_t buf, end, ref; + size_t bufsiz; + struct rt_msghdr *rtm; + +#define MIBSIZ 6 + int mib[MIBSIZ] = {CTL_NET, PF_ROUTE, 0, 0, NET_RT_DUMP, 0}; + + if (zns->ns_id != NS_DEFAULT) + return; + + /* Get buffer size. */ + if (sysctl(mib, MIBSIZ, NULL, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl fail: %s", + safe_strerror(errno)); + return; + } + + /* Allocate buffer. */ + ref = buf = XMALLOC(MTYPE_TMP, bufsiz); + + /* Read routing table information by calling sysctl(). */ + if (sysctl(mib, MIBSIZ, buf, &bufsiz, NULL, 0) < 0) { + flog_warn(EC_ZEBRA_SYSCTL_FAILED, "sysctl() fail by %s", + safe_strerror(errno)); + XFREE(MTYPE_TMP, ref); + return; + } + + for (end = buf + bufsiz; buf < end; buf += rtm->rtm_msglen) { + rtm = (struct rt_msghdr *)buf; + /* We must set RTF_DONE here, so rtm_read() doesn't ignore the + * message. */ + SET_FLAG(rtm->rtm_flags, RTF_DONE); + rtm_read(rtm); + } + + /* Free buffer. */ + XFREE(MTYPE_TMP, ref); + + return; +} + +/* Only implemented for the netlink method. */ +void macfdb_read(struct zebra_ns *zns) +{ +} + +void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, + struct interface *br_if) +{ +} + +void macfdb_read_specific_mac(struct zebra_ns *zns, struct interface *br_if, + const struct ethaddr *mac, vlanid_t vid) +{ +} + +void neigh_read(struct zebra_ns *zns) +{ +} + +void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if) +{ +} + +void neigh_read_specific_ip(const struct ipaddr *ip, struct interface *vlan_if) +{ +} + +void kernel_read_pbr_rules(struct zebra_ns *zns) +{ +} + +#endif /* !defined(GNU_LINUX) */ diff --git a/zebra/rule_netlink.c b/zebra/rule_netlink.c new file mode 100644 index 0000000..135f065 --- /dev/null +++ b/zebra/rule_netlink.c @@ -0,0 +1,422 @@ +/* + * Zebra Policy Based Routing (PBR) interaction with the kernel using + * netlink. + * Copyright (C) 2018 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +#include "if.h" +#include "prefix.h" +#include "vrf.h" + +#include <linux/fib_rules.h> +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_trace.h" + +/* definitions */ + +/* static function declarations */ + +/* Private functions */ + + +/* + * netlink_rule_msg_encode + * + * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen. + * + * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer + * or the number of bytes written to buf. + */ +static ssize_t netlink_rule_msg_encode( + int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm, + uint32_t priority, uint32_t table, const struct prefix *src_ip, + const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield, + uint8_t ip_protocol, void *buf, size_t buflen) +{ + uint8_t protocol = RTPROT_ZEBRA; + int family; + int bytelen; + struct { + struct nlmsghdr n; + struct fib_rule_hdr frh; + char buf[]; + } *req = buf; + + const char *ifname = dplane_ctx_rule_get_ifname(ctx); + + if (buflen < sizeof(*req)) + return 0; + memset(req, 0, sizeof(*req)); + + /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */ + if (PREFIX_FAMILY(src_ip)) + family = PREFIX_FAMILY(src_ip); + else if (PREFIX_FAMILY(dst_ip)) + family = PREFIX_FAMILY(dst_ip); + else + family = AF_INET; + + bytelen = (family == AF_INET ? 4 : 16); + + req->n.nlmsg_type = cmd; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; + + req->frh.family = family; + req->frh.action = FR_ACT_TO_TBL; + + if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol, + sizeof(protocol))) + return 0; + + /* rule's pref # */ + if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority)) + return 0; + + /* interface on which applied */ + if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname, + strlen(ifname) + 1)) + return 0; + + /* source IP, if specified */ + if (filter_bm & PBR_FILTER_SRC_IP) { + req->frh.src_len = src_ip->prefixlen; + if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix, + bytelen)) + return 0; + } + + /* destination IP, if specified */ + if (filter_bm & PBR_FILTER_DST_IP) { + req->frh.dst_len = dst_ip->prefixlen; + if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix, + bytelen)) + return 0; + } + + /* fwmark, if specified */ + if (filter_bm & PBR_FILTER_FWMARK) { + if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark)) + return 0; + } + + /* dsfield, if specified */ + if (filter_bm & PBR_FILTER_DSFIELD) + req->frh.tos = dsfield; + + /* protocol to match on */ + if (filter_bm & PBR_FILTER_IP_PROTOCOL) + nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol); + + /* Route table to use to forward, if filter criteria matches. */ + if (table < 256) + req->frh.table = table; + else { + req->frh.table = RT_TABLE_UNSPEC; + if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table)) + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u", + nl_msg_type_to_str(cmd), nl_family_to_str(family), + ifname, priority, fwmark, src_ip, dst_ip, table); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + int cmd = RTM_NEWRULE; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE) + cmd = RTM_DELRULE; + + return netlink_rule_msg_encode( + cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx), + dplane_ctx_rule_get_priority(ctx), + dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx), + dplane_ctx_rule_get_dst_ip(ctx), + dplane_ctx_rule_get_fwmark(ctx), + dplane_ctx_rule_get_dsfield(ctx), + dplane_ctx_rule_get_ipproto(ctx), buf, buflen); +} + +static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_rule_msg_encode( + RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx), + dplane_ctx_rule_get_old_priority(ctx), + dplane_ctx_rule_get_old_table(ctx), + dplane_ctx_rule_get_old_src_ip(ctx), + dplane_ctx_rule_get_old_dst_ip(ctx), + dplane_ctx_rule_get_old_fwmark(ctx), + dplane_ctx_rule_get_old_dsfield(ctx), + dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen); +} + +/* Public functions */ + +enum netlink_msg_status +netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum netlink_msg_status ret; + + op = dplane_ctx_get_op(ctx); + if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE + || op == DPLANE_OP_RULE_DELETE)) { + flog_err( + EC_ZEBRA_PBR_RULE_UPDATE, + "Context received for kernel rule update with incorrect OP code (%u)", + op); + return FRR_NETLINK_ERROR; + } + + ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false); + + /** + * Delete the old one. + * + * Don't care about this result right? + */ + if (op == DPLANE_OP_RULE_UPDATE) + netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder, + true); + + return ret; +} + +/* + * Handle netlink notification informing a rule add or delete. + * Handling of an ADD is TBD. + * DELs are notified up, if other attributes indicate it may be a + * notification of interest. The expectation is that if this corresponds + * to a PBR rule added by FRR, it will be readded. + * + * If startup and we see a rule we created, delete it as its leftover + * from a previous instance and should have been removed on shutdown. + * + */ +int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + struct zebra_ns *zns; + struct fib_rule_hdr *frh; + struct rtattr *tb[FRA_MAX + 1]; + int len; + char *ifname; + struct zebra_pbr_rule rule = {}; + uint8_t proto = 0; + uint8_t ip_proto = 0; + + frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup); + + /* Basic validation followed by extracting attributes. */ + if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); + if (len < 0) { + zlog_err( + "%s: Message received from netlink is of a broken size: %d %zu", + __func__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr))); + return -1; + } + + frh = NLMSG_DATA(h); + + if (frh->family != AF_INET && frh->family != AF_INET6) { + if (frh->family == RTNL_FAMILY_IPMR + || frh->family == RTNL_FAMILY_IP6MR) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Received rule netlink that we are ignoring for family %u, rule change: %u", + frh->family, h->nlmsg_type); + return 0; + } + flog_warn( + EC_ZEBRA_NETLINK_INVALID_AF, + "Invalid address family: %u received from kernel rule change: %u", + frh->family, h->nlmsg_type); + return 0; + } + if (frh->action != FR_ACT_TO_TBL) + return 0; + + memset(tb, 0, sizeof(tb)); + netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); + + if (tb[FRA_PRIORITY]) + rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]); + + if (tb[FRA_SRC]) { + if (frh->family == AF_INET) + memcpy(&rule.rule.filter.src_ip.u.prefix4, + RTA_DATA(tb[FRA_SRC]), 4); + else + memcpy(&rule.rule.filter.src_ip.u.prefix6, + RTA_DATA(tb[FRA_SRC]), 16); + rule.rule.filter.src_ip.prefixlen = frh->src_len; + rule.rule.filter.src_ip.family = frh->family; + rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP; + } + + if (tb[FRA_DST]) { + if (frh->family == AF_INET) + memcpy(&rule.rule.filter.dst_ip.u.prefix4, + RTA_DATA(tb[FRA_DST]), 4); + else + memcpy(&rule.rule.filter.dst_ip.u.prefix6, + RTA_DATA(tb[FRA_DST]), 16); + rule.rule.filter.dst_ip.prefixlen = frh->dst_len; + rule.rule.filter.dst_ip.family = frh->family; + rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP; + } + + if (tb[FRA_TABLE]) + rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]); + else + rule.rule.action.table = frh->table; + + /* TBD: We don't care about rules not specifying an IIF. */ + if (tb[FRA_IFNAME] == NULL) + return 0; + + if (tb[FRA_PROTOCOL]) + proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]); + + if (tb[FRA_IP_PROTO]) + ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]); + + ifname = (char *)RTA_DATA(tb[FRA_IFNAME]); + strlcpy(rule.ifname, ifname, sizeof(rule.ifname)); + + if (h->nlmsg_type == RTM_NEWRULE) { + /* + * If we see a rule at startup we created, delete it now. + * It should have been flushed on a previous shutdown. + */ + if (startup && proto == RTPROT_ZEBRA) { + enum zebra_dplane_result ret; + + ret = dplane_pbr_rule_delete(&rule); + + zlog_debug( + "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", + __func__, + ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) + ? "Failed to remove" + : "Removed"), + nl_family_to_str(frh->family), rule.ifname, + rule.rule.priority, &rule.rule.filter.src_ip, + &rule.rule.filter.dst_ip, + rule.rule.action.table, ip_proto); + } + + /* TBD */ + return 0; + } + + zns = zebra_ns_lookup(ns_id); + + /* If we don't know the interface, we don't care. */ + if (!if_lookup_by_name_per_ns(zns, ifname)) + return 0; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", + nl_msg_type_to_str(h->nlmsg_type), + nl_family_to_str(frh->family), rule.ifname, + rule.rule.priority, &rule.rule.filter.src_ip, + &rule.rule.filter.dst_ip, rule.rule.action.table, + ip_proto); + + return kernel_pbr_rule_del(&rule); +} + +/* + * Request rules from the kernel + */ +static int netlink_request_rules(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct fib_rule_hdr frh; + char buf[NL_PKT_BUF_SIZE]; + } req; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); + req.frh.family = family; + + return netlink_request(&zns->netlink_cmd, &req); +} + +/* + * Get to know existing PBR rules in the kernel - typically called at startup. + */ +int netlink_rules_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true); + + ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, + &dp_info, 0, true); + if (ret < 0) + return ret; + + ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE); + if (ret < 0) + return ret; + + ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, + &dp_info, 0, true); + return ret; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/rule_netlink.h b/zebra/rule_netlink.h new file mode 100644 index 0000000..cf4d978 --- /dev/null +++ b/zebra/rule_netlink.h @@ -0,0 +1,52 @@ +/* + * Zebra Policy Based Routing (PBR) interaction with the kernel using + * netlink - public definitions and function declarations. + * Copyright (C) 2018 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_RULE_NETLINK_H +#define _ZEBRA_RULE_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Handle netlink notification informing a rule add or delete. + */ +extern int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); + +/* + * Get to know existing PBR rules in the kernel - typically called at startup. + */ +extern int netlink_rules_read(struct zebra_ns *zns); + +extern enum netlink_msg_status +netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_RULE_NETLINK_H */ diff --git a/zebra/rule_socket.c b/zebra/rule_socket.c new file mode 100644 index 0000000..e629017 --- /dev/null +++ b/zebra/rule_socket.c @@ -0,0 +1,53 @@ +/* + * Zebra Policy Based Routing (PBR) interaction with the kernel using + * netlink. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#include "if.h" +#include "prefix.h" +#include "vrf.h" +#include "lib_errors.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rule_netlink.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_pbr_rule_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif diff --git a/zebra/sample_plugin.c b/zebra/sample_plugin.c new file mode 100644 index 0000000..e54186b --- /dev/null +++ b/zebra/sample_plugin.c @@ -0,0 +1,133 @@ +/* + * Sample plugin for the FRR zebra dataplane. + * + * Copyright (c) 2019 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Should be possible to build this plugin using this sort of command: + * + * gcc -I ~/work/frr/ -I ~/work/frr/lib -I ~/work/frr/zebra \ + * -g -O0 -o sample_plugin.so -shared -fPIC sample_plugin.c + * + * where 'frr' is a configured and built frr sandbox. + * + * Run zebra with '-M /path/to/sample_plugin.so' to load the module. + */ + +#include "config.h" /* Include this explicitly */ +#include "lib/zebra.h" +#include "lib/libfrr.h" +#include "zebra/zebra_dplane.h" +#include "zebra/debug.h" + +static const char *plugin_name = "SAMPLE"; + +static struct zebra_dplane_provider *prov_p; + +/* + * Startup/init callback, called from the dataplane. + */ +static int sample_start(struct zebra_dplane_provider *prov) +{ + /* Nothing special to do - we don't allocate anything. */ + return 0; +} + + +/* + * Shutdown/cleanup callback, called from the dataplane pthread. + */ +static int sample_fini(struct zebra_dplane_provider *prov, bool early) +{ + /* Nothing special to do. */ + return 0; +} + +/* + * Callback from the dataplane to process incoming work; this runs in the + * dplane pthread. + */ +static int sample_process(struct zebra_dplane_provider *prov) +{ + int counter, limit; + struct zebra_dplane_ctx *ctx; + + limit = dplane_provider_get_work_limit(prov_p); + + /* Respect the configured limit on the amount of work to do in + * any one call. + */ + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov_p); + if (!ctx) + break; + + /* Just set 'success' status and return to the dataplane */ + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov_p, ctx); + } + + return 0; +} + +/* + * Init entry point called during zebra startup. This is registered during + * module init. + */ +static int init_sample_plugin(struct thread_master *tm) +{ + int ret; + + /* Note that we don't use or store the thread_master 'tm'. We + * don't use the zebra main pthread: our plugin code will run in + * the zebra dataplane pthread context. + */ + + /* Register the plugin with the dataplane infrastructure. We + * register to be called before the kernel, and we register + * our init, process work, and shutdown callbacks. + */ + ret = dplane_provider_register(plugin_name, DPLANE_PRIO_PRE_KERNEL, + DPLANE_PROV_FLAGS_DEFAULT, + sample_start, + sample_process, + sample_fini, + NULL, + &prov_p); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("sample plugin register => %d", ret); + + return 0; +} + +/* + * Base FRR loadable module info: basic info including module entry-point. + */ +static int module_init(void) +{ + hook_register(frr_late_init, init_sample_plugin); + return 0; +} + +FRR_MODULE_SETUP( + .name = "dplane_sample", + .version = "0.0.1", + .description = "Dataplane Sample Plugin", + .init = module_init, +); diff --git a/zebra/subdir.am b/zebra/subdir.am new file mode 100644 index 0000000..298b715 --- /dev/null +++ b/zebra/subdir.am @@ -0,0 +1,275 @@ +# +# zebra +# + +if ZEBRA +sbin_PROGRAMS += zebra/zebra +vtysh_scan += \ + zebra/debug.c \ + zebra/interface.c \ + zebra/router-id.c \ + zebra/rtadv.c \ + zebra/zebra_gr.c \ + zebra/zebra_mlag_vty.c \ + zebra/zebra_evpn_mh.c \ + zebra/zebra_mpls_vty.c \ + zebra/zebra_srv6_vty.c \ + zebra/zebra_ptm.c \ + zebra/zebra_pw.c \ + zebra/zebra_routemap.c \ + zebra/zebra_vty.c \ + zebra/zserv.c \ + zebra/zebra_vrf.c \ + zebra/dpdk/zebra_dplane_dpdk_vty.c \ + # end + +# can be loaded as DSO - always include for vtysh +vtysh_scan += zebra/irdp_interface.c +vtysh_scan += zebra/zebra_fpm.c + +vtysh_daemons += zebra + +if IRDP +module_LTLIBRARIES += zebra/zebra_irdp.la +endif +if SNMP +module_LTLIBRARIES += zebra/zebra_snmp.la +endif +if FPM +module_LTLIBRARIES += zebra/zebra_fpm.la +endif +if LINUX +module_LTLIBRARIES += zebra/zebra_cumulus_mlag.la +endif + +# Dataplane sample plugin +if DEV_BUILD +module_LTLIBRARIES += zebra/dplane_sample_plugin.la +endif + +man8 += $(MANBUILD)/frr-zebra.8 +## endif ZEBRA +endif + +zebra_zebra_LDADD = lib/libfrr.la $(LIBCAP) $(UST_LIBS) +if HAVE_PROTOBUF3 +zebra_zebra_LDADD += mlag/libmlag_pb.la $(PROTOBUF_C_LIBS) +zebra/zebra_mlag.$(OBJEXT): mlag/mlag.pb-c.h +endif +zebra_zebra_SOURCES = \ + zebra/connected.c \ + zebra/debug.c \ + zebra/if_ioctl.c \ + zebra/if_netlink.c \ + zebra/if_socket.c \ + zebra/if_sysctl.c \ + zebra/interface.c \ + zebra/ioctl.c \ + zebra/ipforward_proc.c \ + zebra/ipforward_sysctl.c \ + zebra/kernel_netlink.c \ + zebra/kernel_socket.c \ + zebra/label_manager.c \ + zebra/main.c \ + zebra/netconf_netlink.c \ + zebra/redistribute.c \ + zebra/router-id.c \ + zebra/rt_netlink.c \ + zebra/rt_socket.c \ + zebra/rtadv.c \ + zebra/rtread_netlink.c \ + zebra/rtread_sysctl.c \ + zebra/rule_netlink.c \ + zebra/rule_socket.c \ + zebra/table_manager.c \ + zebra/tc_netlink.c \ + zebra/tc_socket.c \ + zebra/zapi_msg.c \ + zebra/zebra_dplane.c \ + zebra/zebra_errors.c \ + zebra/zebra_gr.c \ + zebra/zebra_l2.c \ + zebra/zebra_evpn.c \ + zebra/zebra_evpn_mac.c \ + zebra/zebra_evpn_neigh.c \ + zebra/zebra_mlag.c \ + zebra/zebra_mlag_vty.c \ + zebra/zebra_mpls.c \ + zebra/zebra_mpls_netlink.c \ + zebra/zebra_mpls_openbsd.c \ + zebra/zebra_mpls_null.c \ + zebra/zebra_mpls_vty.c \ + zebra/zebra_srv6.c \ + zebra/zebra_srv6_vty.c \ + zebra/zebra_mroute.c \ + zebra/zebra_nb.c \ + zebra/zebra_nb_config.c \ + zebra/zebra_nb_rpcs.c \ + zebra/zebra_nb_state.c \ + zebra/zebra_netns_id.c \ + zebra/zebra_netns_notify.c \ + zebra/zebra_nhg.c \ + zebra/zebra_ns.c \ + zebra/zebra_opaque.c \ + zebra/zebra_pbr.c \ + zebra/zebra_ptm.c \ + zebra/zebra_ptm_redistribute.c \ + zebra/zebra_pw.c \ + zebra/zebra_rib.c \ + zebra/zebra_router.c \ + zebra/zebra_rnh.c \ + zebra/zebra_routemap.c \ + zebra/zebra_routemap_nb.c \ + zebra/zebra_routemap_nb_config.c \ + zebra/zebra_script.c \ + zebra/zebra_srte.c \ + zebra/zebra_trace.c \ + zebra/zebra_vrf.c \ + zebra/zebra_vty.c \ + zebra/zebra_vxlan.c \ + zebra/zebra_evpn_mh.c \ + zebra/zebra_neigh.c \ + zebra/zserv.c \ + # end + +clippy_scan += \ + zebra/debug.c \ + zebra/interface.c \ + zebra/rtadv.c \ + zebra/zebra_evpn_mh.c \ + zebra/zebra_mlag_vty.c \ + zebra/zebra_routemap.c \ + zebra/zebra_vty.c \ + zebra/zebra_srv6_vty.c \ + zebra/zebra_vrf.c \ + zebra/dpdk/zebra_dplane_dpdk_vty.c \ + # end + +noinst_HEADERS += \ + zebra/connected.h \ + zebra/debug.h \ + zebra/if_netlink.h \ + zebra/interface.h \ + zebra/ioctl.h \ + zebra/ipforward.h \ + zebra/irdp.h \ + zebra/kernel_netlink.h \ + zebra/kernel_socket.h \ + zebra/label_manager.h \ + zebra/netconf_netlink.h \ + zebra/redistribute.h \ + zebra/rib.h \ + zebra/router-id.h \ + zebra/rt.h \ + zebra/rt_netlink.h \ + zebra/rtadv.h \ + zebra/rule_netlink.h \ + zebra/table_manager.h \ + zebra/tc_netlink.h \ + zebra/zapi_msg.h \ + zebra/zebra_dplane.h \ + zebra/zebra_errors.h \ + zebra/zebra_evpn.h \ + zebra/zebra_evpn_mac.h \ + zebra/zebra_evpn_neigh.h \ + zebra/zebra_evpn_vxlan.h \ + zebra/zebra_fpm_private.h \ + zebra/zebra_l2.h \ + zebra/zebra_mlag.h \ + zebra/zebra_mlag_vty.h \ + zebra/zebra_mpls.h \ + zebra/zebra_srv6.h \ + zebra/zebra_srv6_vty.h \ + zebra/zebra_mroute.h \ + zebra/zebra_nb.h \ + zebra/zebra_netns_id.h \ + zebra/zebra_netns_notify.h \ + zebra/zebra_nhg.h \ + zebra/zebra_nhg_private.h \ + zebra/zebra_ns.h \ + zebra/zebra_opaque.h \ + zebra/zebra_pbr.h \ + zebra/zebra_ptm.h \ + zebra/zebra_ptm_redistribute.h \ + zebra/zebra_pw.h \ + zebra/zebra_rnh.h \ + zebra/zebra_routemap.h \ + zebra/zebra_routemap_nb.h \ + zebra/zebra_router.h \ + zebra/zebra_script.h \ + zebra/zebra_srte.h \ + zebra/zebra_trace.h \ + zebra/zebra_vrf.h \ + zebra/zebra_vxlan.h \ + zebra/zebra_vxlan_private.h \ + zebra/zebra_evpn_mh.h \ + zebra/zebra_neigh.h \ + zebra/zserv.h \ + zebra/dpdk/zebra_dplane_dpdk.h \ + zebra/dpdk/zebra_dplane_dpdk_private.h \ + # end + +zebra_zebra_irdp_la_SOURCES = \ + zebra/irdp_interface.c \ + zebra/irdp_main.c \ + zebra/irdp_packet.c \ + # end +zebra_zebra_irdp_la_LDFLAGS = $(MODULE_LDFLAGS) + +zebra_zebra_snmp_la_SOURCES = zebra/zebra_snmp.c +zebra_zebra_snmp_la_CFLAGS = $(AM_CFLAGS) $(SNMP_CFLAGS) -std=gnu11 +zebra_zebra_snmp_la_LDFLAGS = $(MODULE_LDFLAGS) +zebra_zebra_snmp_la_LIBADD = lib/libfrrsnmp.la + +zebra_zebra_fpm_la_LDFLAGS = $(MODULE_LDFLAGS) +zebra_zebra_fpm_la_LIBADD = +zebra_zebra_fpm_la_SOURCES = zebra/zebra_fpm.c +zebra_zebra_fpm_la_SOURCES += zebra/zebra_fpm_netlink.c +if HAVE_PROTOBUF +zebra_zebra_fpm_la_LIBADD += fpm/libfrrfpm_pb.la qpb/libfrr_pb.la $(PROTOBUF_C_LIBS) +zebra_zebra_fpm_la_SOURCES += zebra/zebra_fpm_protobuf.c +zebra/zebra_fpm_protobuf.lo: fpm/fpm.pb-c.h qpb/qpb.pb-c.h +if DEV_BUILD +zebra_zebra_fpm_la_SOURCES += zebra/zebra_fpm_dt.c +zebra/zebra_fpm_dt.lo: fpm/fpm.pb-c.h qpb/qpb.pb-c.h +endif +endif + +# Sample dataplane plugin +if DEV_BUILD +zebra_dplane_sample_plugin_la_SOURCES = zebra/sample_plugin.c +zebra_dplane_sample_plugin_la_LDFLAGS = $(MODULE_LDFLAGS) +endif + +nodist_zebra_zebra_SOURCES = \ + yang/frr-zebra.yang.c \ + yang/frr-zebra-route-map.yang.c \ + # end + +zebra_zebra_cumulus_mlag_la_SOURCES = zebra/zebra_mlag_private.c +zebra_zebra_cumulus_mlag_la_LDFLAGS = $(MODULE_LDFLAGS) + +if LINUX +module_LTLIBRARIES += zebra/dplane_fpm_nl.la + +zebra_dplane_fpm_nl_la_SOURCES = zebra/dplane_fpm_nl.c +zebra_dplane_fpm_nl_la_LDFLAGS = $(MODULE_LDFLAGS) +zebra_dplane_fpm_nl_la_LIBADD = + +vtysh_scan += zebra/dplane_fpm_nl.c +endif + +if NETLINK_DEBUG +zebra_zebra_SOURCES += \ + zebra/debug_nl.c \ + # end +endif + +if DP_DPDK +module_LTLIBRARIES += zebra/zebra_dplane_dpdk.la +endif + +zebra_zebra_dplane_dpdk_la_SOURCES = zebra/dpdk/zebra_dplane_dpdk.c zebra/dpdk/zebra_dplane_dpdk_vty.c +zebra_zebra_dplane_dpdk_la_LDFLAGS = -avoid-version -module -shared -export-dynamic -L/usr/local/lib -v +zebra_zebra_dplane_dpdk_la_CFLAGS = $(DPDK_CFLAGS) +zebra_zebra_dplane_dpdk_la_LIBADD = $(DPDK_LIBS) diff --git a/zebra/table_manager.c b/zebra/table_manager.c new file mode 100644 index 0000000..a3daca6 --- /dev/null +++ b/zebra/table_manager.c @@ -0,0 +1,344 @@ +/* zebra table Manager for routing table identifier management + * Copyright (C) 2018 6WIND + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "zebra.h" + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "lib/log.h" +#include "lib/memory.h" +#include "lib/table.h" +#include "lib/network.h" +#include "lib/stream.h" +#include "lib/zclient.h" +#include "lib/libfrr.h" +#include "lib/vrf.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" +#include "zebra/label_manager.h" /* for NO_PROTO */ +#include "zebra/table_manager.h" +#include "zebra/zebra_errors.h" + +/* routing table identifiers + * + */ +#if !defined(GNU_LINUX) +/* BSD systems + */ +#else +/* Linux Systems + */ +#define RT_TABLE_ID_LOCAL 255 +#define RT_TABLE_ID_MAIN 254 +#define RT_TABLE_ID_DEFAULT 253 +#define RT_TABLE_ID_COMPAT 252 +#define RT_TABLE_ID_UNSPEC 0 +#endif /* !def(GNU_LINUX) */ +#define RT_TABLE_ID_UNRESERVED_MIN 1 +#define RT_TABLE_ID_UNRESERVED_MAX 0xffffffff + +DEFINE_MGROUP(TABLE_MGR, "Table Manager"); +DEFINE_MTYPE_STATIC(TABLE_MGR, TM_CHUNK, "Table Manager Chunk"); +DEFINE_MTYPE_STATIC(TABLE_MGR, TM_TABLE, "Table Manager Context"); + +static void delete_table_chunk(void *val) +{ + XFREE(MTYPE_TM_CHUNK, val); +} + +/** + * Init table manager + */ +void table_manager_enable(struct zebra_vrf *zvrf) +{ + + if (zvrf->tbl_mgr) + return; + if (!vrf_is_backend_netns() + && strcmp(zvrf_name(zvrf), VRF_DEFAULT_NAME)) { + struct zebra_vrf *def = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + if (def) + zvrf->tbl_mgr = def->tbl_mgr; + return; + } + zvrf->tbl_mgr = XCALLOC(MTYPE_TM_TABLE, sizeof(struct table_manager)); + zvrf->tbl_mgr->lc_list = list_new(); + zvrf->tbl_mgr->lc_list->del = delete_table_chunk; +} + +/** + * Core function, assigns table chunks + * + * It first searches through the list to check if there's one available + * (previously released). Otherwise it creates and assigns a new one + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @para size Size of the table chunk + * @return Pointer to the assigned table chunk + */ +struct table_manager_chunk *assign_table_chunk(uint8_t proto, uint16_t instance, + uint32_t size, + struct zebra_vrf *zvrf) +{ + struct table_manager_chunk *tmc; + struct listnode *node; + uint32_t start; + bool manual_conf = false; + + if (!zvrf) + return NULL; + + /* first check if there's one available */ + for (ALL_LIST_ELEMENTS_RO(zvrf->tbl_mgr->lc_list, node, tmc)) { + if (tmc->proto == NO_PROTO + && tmc->end - tmc->start + 1 == size) { + tmc->proto = proto; + tmc->instance = instance; + return tmc; + } + } + /* otherwise create a new one */ + tmc = XCALLOC(MTYPE_TM_CHUNK, sizeof(struct table_manager_chunk)); + + if (zvrf->tbl_mgr->start || zvrf->tbl_mgr->end) + manual_conf = true; + /* table RT IDs range are [1;252] and [256;0xffffffff] + * - check if the requested range can be within the first range, + * otherwise elect second one + * - TODO : vrf-lites have their own table identifier. + * In that case, table_id should be removed from the table range. + */ + if (list_isempty(zvrf->tbl_mgr->lc_list)) { + if (!manual_conf) + start = RT_TABLE_ID_UNRESERVED_MIN; + else + start = zvrf->tbl_mgr->start; + } else + start = ((struct table_manager_chunk *)listgetdata( + listtail(zvrf->tbl_mgr->lc_list))) + ->end + + 1; + + if (!manual_conf) { + +#if !defined(GNU_LINUX) +/* BSD systems + */ +#else +/* Linux Systems + */ + /* if not enough room space between MIN and COMPAT, + * then begin after LOCAL + */ + if (start < RT_TABLE_ID_COMPAT + && (size > RT_TABLE_ID_COMPAT - RT_TABLE_ID_UNRESERVED_MIN)) + start = RT_TABLE_ID_LOCAL + 1; +#endif /* !def(GNU_LINUX) */ + tmc->start = start; + if (RT_TABLE_ID_UNRESERVED_MAX - size + 1 < start) { + flog_err(EC_ZEBRA_TM_EXHAUSTED_IDS, + "Reached max table id. Start/Size %u/%u", + start, size); + XFREE(MTYPE_TM_CHUNK, tmc); + return NULL; + } + } else { + tmc->start = start; + if (zvrf->tbl_mgr->end - size + 1 < start) { + flog_err(EC_ZEBRA_TM_EXHAUSTED_IDS, + "Reached max table id. Start/Size %u/%u", + start, size); + XFREE(MTYPE_TM_CHUNK, tmc); + return NULL; + } + } + tmc->end = tmc->start + size - 1; + tmc->proto = proto; + tmc->instance = instance; + listnode_add(zvrf->tbl_mgr->lc_list, tmc); + + return tmc; +} + +/** + * Core function, release no longer used table chunks + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param start First table RT ID of the chunk + * @param end Last table RT ID of the chunk + * @return 0 on success, -1 otherwise + */ +int release_table_chunk(uint8_t proto, uint16_t instance, uint32_t start, + uint32_t end, struct zebra_vrf *zvrf) +{ + struct listnode *node; + struct table_manager_chunk *tmc; + int ret = -1; + struct table_manager *tbl_mgr; + + if (!zvrf) + return -1; + + tbl_mgr = zvrf->tbl_mgr; + if (!tbl_mgr) + return ret; + /* check that size matches */ + zlog_debug("Releasing table chunk: %u - %u", start, end); + /* find chunk and disown */ + for (ALL_LIST_ELEMENTS_RO(tbl_mgr->lc_list, node, tmc)) { + if (tmc->start != start) + continue; + if (tmc->end != end) + continue; + if (tmc->proto != proto || tmc->instance != instance) { + flog_err(EC_ZEBRA_TM_DAEMON_MISMATCH, + "%s: Daemon mismatch!!", __func__); + continue; + } + tmc->proto = NO_PROTO; + tmc->instance = 0; + ret = 0; + break; + } + if (ret != 0) + flog_err(EC_ZEBRA_TM_UNRELEASED_CHUNK, + "%s: Table chunk not released!!", __func__); + + return ret; +} + +/** + * Release table chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param client the client to release chunks from + * @return Number of chunks released + */ +int release_daemon_table_chunks(struct zserv *client) +{ + uint8_t proto = client->proto; + uint16_t instance = client->instance; + struct listnode *node; + struct table_manager_chunk *tmc; + int count = 0; + int ret; + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + + if (!zvrf) + continue; + if (!vrf_is_backend_netns() && vrf->vrf_id != VRF_DEFAULT) + continue; + for (ALL_LIST_ELEMENTS_RO(zvrf->tbl_mgr->lc_list, node, tmc)) { + if (tmc->proto == proto && tmc->instance == instance) { + ret = release_table_chunk( + tmc->proto, tmc->instance, tmc->start, + tmc->end, zvrf); + if (ret == 0) + count++; + } + } + } + zlog_debug("%s: Released %d table chunks", __func__, count); + + return count; +} + +static void table_range_add(struct zebra_vrf *zvrf, uint32_t start, + uint32_t end) +{ + if (!zvrf->tbl_mgr) + return; + zvrf->tbl_mgr->start = start; + zvrf->tbl_mgr->end = end; +} + +void table_manager_disable(struct zebra_vrf *zvrf) +{ + if (!zvrf->tbl_mgr) + return; + if (!vrf_is_backend_netns() + && strcmp(zvrf_name(zvrf), VRF_DEFAULT_NAME)) { + zvrf->tbl_mgr = NULL; + return; + } + list_delete(&zvrf->tbl_mgr->lc_list); + XFREE(MTYPE_TM_TABLE, zvrf->tbl_mgr); + zvrf->tbl_mgr = NULL; +} + +int table_manager_range(struct vty *vty, bool add, struct zebra_vrf *zvrf, + const char *start_table_str, const char *end_table_str) +{ + uint32_t start; + uint32_t end; + + if (add) { + if (!start_table_str || !end_table_str) { + vty_out(vty, "%% Labels not specified\n"); + return CMD_WARNING_CONFIG_FAILED; + } + start = atoi(start_table_str); + end = atoi(end_table_str); + if (end < start) { + vty_out(vty, "%% End table is less than Start table\n"); + return CMD_WARNING_CONFIG_FAILED; + } + +#if !defined(GNU_LINUX) +/* BSD systems + */ +#else + /* Linux Systems + */ + if ((start >= RT_TABLE_ID_COMPAT && start <= RT_TABLE_ID_LOCAL) + || (end >= RT_TABLE_ID_COMPAT + && end <= RT_TABLE_ID_LOCAL)) { + vty_out(vty, "%% Values forbidden in range [%u;%u]\n", + RT_TABLE_ID_COMPAT, RT_TABLE_ID_LOCAL); + return CMD_WARNING_CONFIG_FAILED; + } + if (start < RT_TABLE_ID_COMPAT && end > RT_TABLE_ID_LOCAL) { + vty_out(vty, + "%% Range overlaps range [%u;%u] forbidden\n", + RT_TABLE_ID_COMPAT, RT_TABLE_ID_LOCAL); + return CMD_WARNING_CONFIG_FAILED; + } +#endif + if (zvrf->tbl_mgr + && ((zvrf->tbl_mgr->start && zvrf->tbl_mgr->start != start) + || (zvrf->tbl_mgr->end && zvrf->tbl_mgr->end != end))) { + vty_out(vty, + "%% New range will be taken into account at restart\n"); + } + table_range_add(zvrf, start, end); + } else + table_range_add(zvrf, 0, 0); + return CMD_SUCCESS; +} diff --git a/zebra/table_manager.h b/zebra/table_manager.h new file mode 100644 index 0000000..fa13668 --- /dev/null +++ b/zebra/table_manager.h @@ -0,0 +1,79 @@ +/* zebra table Manager for routing table identifier management + * Copyright (C) 2018 6WIND + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _TABLE_MANAGER_H +#define _TABLE_MANAGER_H + +#include <stdint.h> + +#include "lib/linklist.h" +#include "lib/thread.h" +#include "lib/ns.h" + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Table chunk struct + * Client daemon which the chunk belongs to can be identified by either + * proto (daemon protocol) + instance + VRF. + * If the client then passes a non-empty value to keep field when it requests + * for chunks, the chunks won't be garbage collected and the client will be + * responsible of its release. + * Otherwise, if the keep field is not set (value 0) for the chunk, it will be + * automatically released when the client disconnects or when it reconnects + * (in case it died unexpectedly, we can know it's the same because it will have + * the same proto and instance values) + */ +struct table_manager_chunk { + vrf_id_t vrf_id; + uint8_t proto; + uint16_t instance; + uint32_t start; /* First table RT ID of the chunk */ + uint32_t end; /* Last table RT ID of the chunk */ +}; + +/* + * Main table manager struct + * Holds a linked list of table chunks. + */ +struct table_manager { + struct list *lc_list; + uint32_t start; + uint32_t end; +}; + +void table_manager_enable(struct zebra_vrf *zvrf); +struct table_manager_chunk *assign_table_chunk(uint8_t proto, uint16_t instance, + uint32_t size, + struct zebra_vrf *zvrf); +int release_table_chunk(uint8_t proto, uint16_t instance, uint32_t start, + uint32_t end, struct zebra_vrf *zvrf); +int release_daemon_table_chunks(struct zserv *client); +void table_manager_disable(struct zebra_vrf *zvrf); +int table_manager_range(struct vty *vty, bool add, struct zebra_vrf *zvrf, + const char *min, const char *max); + +#ifdef __cplusplus +} +#endif + +#endif /* _TABLE_MANAGER_H */ diff --git a/zebra/tc_netlink.c b/zebra/tc_netlink.c new file mode 100644 index 0000000..afa03a4 --- /dev/null +++ b/zebra/tc_netlink.c @@ -0,0 +1,468 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +#include <netinet/if_ether.h> +#include <sys/socket.h> + +#include "if.h" +#include "prefix.h" +#include "vrf.h" + +#include <linux/fib_rules.h> +#include <linux/pkt_cls.h> +#include <linux/pkt_sched.h> +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/tc_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_trace.h" + +/* TODO: move these bitflags to zebra_tc.h */ +#define TC_FILTER_SRC_IP (1 << 0) +#define TC_FILTER_DST_IP (1 << 1) +#define TC_FILTER_IP_PROTOCOL (1 << 9) + +#define TC_FREQ_DEFAULT (100) + +#define TC_MAJOR_BASE (0x1000u) +#define TC_MINOR_NOCLASS (0xffffu) + +#define TC_FILTER_MASK (0x8000u) + +#define TIME_UNITS_PER_SEC (1000000) +#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r))) + +static uint32_t tc_get_freq(void) +{ + int freq = 0; + FILE *fp = fopen("/proc/net/psched", "r"); + + if (fp) { + uint32_t nom, denom; + + if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) { + if (nom == 1000000) + freq = denom; + } + fclose(fp); + } + + return freq == 0 ? TC_FREQ_DEFAULT : freq; +} + +static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor) +{ + return (major) << 16 | (minor); +} + +static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx, + uint16_t minor) +{ + uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx); + + return tc_make_handle(major, minor); +} + +static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table, + uint32_t mtu) +{ + if (mtu == 0) + mtu = 2047; + + int cell_log = -1; + + if (cell_log < 0) { + cell_log = 0; + while ((mtu >> cell_log) > 255) + cell_log++; + } + + for (int i = 0; i < 256; i++) + table[i] = xmittime(ratespec->rate, (i + 1) << cell_log); + + ratespec->cell_align = -1; + ratespec->cell_log = cell_log; + ratespec->linklayer = TC_LINKLAYER_ETHERNET; +} + +static int tc_flower_get_inet_prefix(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32)); + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val)); + } else { + return -1; + } + + return 0; +} + +static int tc_flower_get_inet_mask(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else { + return -1; + } + + memset(addr->data, 0xff, addr->bytelen); + + int rest = prefix->prefixlen; + + for (int i = 0; i < addr->bytelen / 4; i++) { + if (!rest) { + addr->data[i] = 0; + } else if (rest / 32 >= 1) { + rest -= 32; + } else { + addr->data[i] <<= 32 - rest; + addr->data[i] = htonl(addr->data[i]); + rest = 0; + } + } + + return 0; +} + +/* + * Traffic control queue discipline encoding (only "htb" supported) + */ +static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + + const char *kind = "htb"; + + struct tc_htb_glob htb_glob = { + .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS}; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_handle = tc_get_handle(ctx, 0); + req->t.tcm_parent = TC_H_ROOT; + + nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1); + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob, + sizeof(htb_glob)); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control class encoding + */ +static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + struct tc_htb_opt htb_opt = {}; + + uint64_t rate, ceil; + uint64_t buffer, cbuffer; + + /* TODO: fetch mtu from interface */ + uint32_t mtu = 0; + + uint32_t rtab[256]; + uint32_t ctab[256]; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_handle = tc_get_handle(ctx, 1); + req->t.tcm_parent = tc_get_handle(ctx, 0); + + rate = dplane_ctx_tc_get_rate(ctx); + ceil = dplane_ctx_tc_get_ceil(ctx); + + ceil = ceil < rate ? rate : ceil; + + htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate; + htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil; + + buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq(); + + htb_opt.buffer = buffer; + htb_opt.cbuffer = cbuffer; + + tc_calc_rate_table(&htb_opt.rate, rtab, mtu); + tc_calc_rate_table(&htb_opt.ceil, ctab, mtu); + + htb_opt.ceil.mpu = htb_opt.rate.mpu = 0; + htb_opt.ceil.overhead = htb_opt.rate.overhead = 0; + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + if (rate >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate, + sizeof(rate)); + } + + if (ceil >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil, + sizeof(ceil)); + } + + nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt)); + + nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab)); + nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab)); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control filter encoding (only "flower" supported) + */ +static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + struct rtattr *nest; + + const char *kind = "flower"; + + uint16_t priority; + uint16_t protocol; + uint32_t classid; + uint32_t filter_bm; + uint32_t flags = 0; + + struct inet_prefix addr; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_EXCL; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + + /* TODO: priority and layer-3 protocol support */ + priority = 0; + protocol = htons(ETH_P_IP); + classid = tc_get_handle(ctx, 1); + filter_bm = dplane_ctx_tc_get_filter_bm(ctx); + + req->t.tcm_info = tc_make_handle(priority, protocol); + + req->t.tcm_handle = 1; + req->t.tcm_parent = tc_get_handle(ctx, 0); + + nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1); + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid, + sizeof(classid)); + + if (filter_bm & TC_FILTER_SRC_IP) { + const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx); + + if (tc_flower_get_inet_prefix(src_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC + : TCA_FLOWER_KEY_IPV6_SRC, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(src_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_SRC_MASK + : TCA_FLOWER_KEY_IPV6_SRC_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FILTER_DST_IP) { + const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx); + + if (tc_flower_get_inet_prefix(dst_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST + : TCA_FLOWER_KEY_IPV6_DST, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(dst_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_DST_MASK + : TCA_FLOWER_KEY_IPV6_DST_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FILTER_IP_PROTOCOL) { + nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO, + dplane_ctx_tc_get_ip_proto(ctx)); + } + + nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags); + + nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen); +} + +static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen); +} + +static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen); +} + +enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + /* TODO: error handling and other actions (delete, replace, ...) */ + + netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false); + netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false); + return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder, + false); +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/tc_netlink.h b/zebra/tc_netlink.h new file mode 100644 index 0000000..2190bca --- /dev/null +++ b/zebra/tc_netlink.h @@ -0,0 +1,62 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_TC_NETLINK_H +#define _ZEBRA_TC_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +/* Represent a prefixed address in flower filter */ + +struct inet_prefix { + uint16_t flags; + uint16_t bytelen; + uint16_t bitlen; + uint16_t family; + uint32_t data[64]; +}; + +enum { + PREFIXLEN_SPECIFIED = (1 << 0), + ADDRTYPE_INET = (1 << 1), + ADDRTYPE_UNSPEC = (1 << 2), + ADDRTYPE_MULTI = (1 << 3), + + ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC, + ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI +}; + +extern enum netlink_msg_status +netlink_put_tc_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_TC_NETLINK_H */ diff --git a/zebra/tc_socket.c b/zebra/tc_socket.c new file mode 100644 index 0000000..0bf9e48 --- /dev/null +++ b/zebra/tc_socket.c @@ -0,0 +1,41 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using socket. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/testrib.conf b/zebra/testrib.conf new file mode 100644 index 0000000..0df7dc2 --- /dev/null +++ b/zebra/testrib.conf @@ -0,0 +1,76 @@ +! +! Zebra configuration saved from vty +! 2007/04/01 17:46:48 +! +password foo +log stdout +service advanced-vty +! +debug zebra rib +debug zebra kernel +! +interface eth0 + ip address 10.0.0.1/24 + ipv6 address 1::0:1/64 + state up +! +interface eth1 + ip address 10.0.1.1/24 + ipv6 address 1::1:1/64 +! +interface eth2 + ip address 10.0.2.1/24 + ipv6 address 1::2:1/64 +! +! Unnumbered +interface foo1 + ip address 192.168.1.1/32 + ipv6 address 2::1:1/128 +! +interface foo0 + ip address 192.168.1.1/32 + ip address 192.168.1.1/24 label foo + ipv6 address 2::1:1/128 + state up +! + +! statics that should be subsumed by connected routes, according to interface +! state +ip route 10.0.0.0/24 10.0.1.254 +ip route 10.0.1.0/24 10.0.2.254 +ip route 10.0.2.0/24 10.0.0.254 +ipv6 route 1::0:0/64 1::1:f +ipv6 route 1::1:0/64 1::2:f +ipv6 route 1::2:0/64 1::0:f + +! null route +ip route 10.1.0.1/32 null0 +ipv6 route 100::1:1/128 null0 + +! normalish routes +ip route 1.1.2.0/24 10.0.0.2 +ipv6 route 80::/64 1::0:e + +! different admin distances +ip route 1.1.0.2/32 10.0.0.3 10 +ip route 1.1.0.2/32 10.0.0.4 20 +ip route 1.1.0.2/32 10.0.1.3 30 + +ipv6 route 90::1/128 1::0:a 10 +ipv6 route 90::1/128 1::0:b 20 +ipv6 route 90::1/128 1::1:c 30 + +! multiple-nexthop + distance +ip route 1.1.0.2/32 10.0.0.5 10 +ipv6 route 90::1/128 1::0:d 10 + +! a recursive route, potentially. +ip route 1.1.3.0/24 10.0.0.2 +! double recursive, potentially +ip route 1.1.0.1/32 1.1.3.1 +! +ip route 1.1.1.0/24 1.1.2.2 + +line vty + exec-timeout 0 0 +! diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c new file mode 100644 index 0000000..a32a82d --- /dev/null +++ b/zebra/zapi_msg.c @@ -0,0 +1,3850 @@ +/* + * Zebra API message creation & consumption. + * Portions: + * Copyright (C) 1997-1999 Kunihiro Ishiguro + * Copyright (C) 2015-2018 Cumulus Networks, Inc. + * et al. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include <libgen.h> + +#include "lib/prefix.h" +#include "lib/stream.h" +#include "lib/memory.h" +#include "lib/table.h" +#include "lib/network.h" +#include "lib/log.h" +#include "lib/zclient.h" +#include "lib/privs.h" +#include "lib/nexthop.h" +#include "lib/vrf.h" +#include "lib/libfrr.h" +#include "lib/lib_errors.h" + +#include "zebra/zebra_router.h" +#include "zebra/rib.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/router-id.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/zebra_rnh.h" +#include "zebra/interface.h" +#include "zebra/zebra_ptm.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_mroute.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/rt.h" +#include "zebra/zebra_pbr.h" +#include "zebra/table_manager.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_mlag.h" +#include "zebra/connected.h" +#include "zebra/zebra_opaque.h" +#include "zebra/zebra_srte.h" +#include "zebra/zebra_srv6.h" + +DEFINE_MTYPE_STATIC(ZEBRA, RE_OPAQUE, "Route Opaque Data"); + +static int zapi_nhg_decode(struct stream *s, int cmd, struct zapi_nhg *api_nhg); + +/* Encoding helpers -------------------------------------------------------- */ + +static void zserv_encode_interface(struct stream *s, struct interface *ifp) +{ + /* Interface information. */ + struct zebra_if *zif = ifp->info; + + stream_put(s, ifp->name, INTERFACE_NAMSIZ); + stream_putl(s, ifp->ifindex); + stream_putc(s, ifp->status); + stream_putq(s, ifp->flags); + stream_putc(s, ifp->ptm_enable); + stream_putc(s, ifp->ptm_status); + stream_putl(s, ifp->metric); + stream_putl(s, ifp->speed); + stream_putl(s, ifp->mtu); + stream_putl(s, ifp->mtu6); + stream_putl(s, ifp->bandwidth); + stream_putl(s, zif->link_ifindex); + stream_putl(s, ifp->ll_type); + stream_putl(s, ifp->hw_addr_len); + if (ifp->hw_addr_len) + stream_put(s, ifp->hw_addr, ifp->hw_addr_len); + + /* Then, Traffic Engineering parameters if any */ + if (HAS_LINK_PARAMS(ifp) && IS_LINK_PARAMS_SET(ifp->link_params)) { + stream_putc(s, 1); + zebra_interface_link_params_write(s, ifp); + } else + stream_putc(s, 0); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); +} + +static void zserv_encode_vrf(struct stream *s, struct zebra_vrf *zvrf) +{ + struct vrf_data data; + const char *netns_name = zvrf_ns_name(zvrf); + + memset(&data, 0, sizeof(data)); + data.l.table_id = zvrf->table_id; + + if (netns_name) + strlcpy(data.l.netns_name, basename((char *)netns_name), + NS_NAMSIZ); + else + memset(data.l.netns_name, 0, NS_NAMSIZ); + /* Pass the tableid and the netns NAME */ + stream_put(s, &data, sizeof(struct vrf_data)); + /* Interface information. */ + stream_put(s, zvrf_name(zvrf), VRF_NAMSIZ); + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); +} + +static int zserv_encode_nexthop(struct stream *s, struct nexthop *nexthop) +{ + stream_putl(s, nexthop->vrf_id); + stream_putc(s, nexthop->type); + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + stream_put_in_addr(s, &nexthop->gate.ipv4); + stream_putl(s, nexthop->ifindex); + break; + case NEXTHOP_TYPE_IPV6: + stream_put(s, &nexthop->gate.ipv6, 16); + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + stream_put(s, &nexthop->gate.ipv6, 16); + stream_putl(s, nexthop->ifindex); + break; + case NEXTHOP_TYPE_IFINDEX: + stream_putl(s, nexthop->ifindex); + break; + default: + /* do nothing */ + break; + } + return 1; +} + +/* + * Zebra error addition adds error type. + * + * + * 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | enum zebra_error_types | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ +static void zserv_encode_error(struct stream *s, enum zebra_error_types error) +{ + stream_put(s, &error, sizeof(error)); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); +} + +/* Send handlers ----------------------------------------------------------- */ + +/* Interface is added. Send ZEBRA_INTERFACE_ADD to client. */ +/* + * This function is called in the following situations: + * - in response to a 3-byte ZEBRA_INTERFACE_ADD request + * from the client. + * - at startup, when zebra figures out the available interfaces + * - when an interface is added (where support for + * RTM_IFANNOUNCE or AF_NETLINK sockets is available), or when + * an interface is marked IFF_UP (i.e., an RTM_IFINFO message is + * received) + */ +int zsend_interface_add(struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_INTERFACE_ADD, ifp->vrf->vrf_id); + zserv_encode_interface(s, ifp); + + client->ifadd_cnt++; + return zserv_send_message(client, s); +} + +/* Interface deletion from zebra daemon. */ +int zsend_interface_delete(struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_INTERFACE_DELETE, ifp->vrf->vrf_id); + zserv_encode_interface(s, ifp); + + client->ifdel_cnt++; + return zserv_send_message(client, s); +} + +int zsend_vrf_add(struct zserv *client, struct zebra_vrf *zvrf) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_VRF_ADD, zvrf_id(zvrf)); + zserv_encode_vrf(s, zvrf); + + client->vrfadd_cnt++; + return zserv_send_message(client, s); +} + +/* VRF deletion from zebra daemon. */ +int zsend_vrf_delete(struct zserv *client, struct zebra_vrf *zvrf) + +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_VRF_DELETE, zvrf_id(zvrf)); + zserv_encode_vrf(s, zvrf); + + client->vrfdel_cnt++; + return zserv_send_message(client, s); +} + +int zsend_interface_link_params(struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + if (!ifp->link_params) { + stream_free(s); + return 0; + } + + zclient_create_header(s, ZEBRA_INTERFACE_LINK_PARAMS, ifp->vrf->vrf_id); + + /* Add Interface Index */ + stream_putl(s, ifp->ifindex); + + /* Then TE Link Parameters */ + if (zebra_interface_link_params_write(s, ifp) == 0) { + stream_free(s); + return 0; + } + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Interface address is added/deleted. Send ZEBRA_INTERFACE_ADDRESS_ADD or + * ZEBRA_INTERFACE_ADDRESS_DELETE to the client. + * + * A ZEBRA_INTERFACE_ADDRESS_ADD is sent in the following situations: + * - in response to a 3-byte ZEBRA_INTERFACE_ADD request + * from the client, after the ZEBRA_INTERFACE_ADD has been + * sent from zebra to the client + * - redistribute new address info to all clients in the following situations + * - at startup, when zebra figures out the available interfaces + * - when an interface is added (where support for + * RTM_IFANNOUNCE or AF_NETLINK sockets is available), or when + * an interface is marked IFF_UP (i.e., an RTM_IFINFO message is + * received) + * - for the vty commands "ip address A.B.C.D/M [<label LINE>]" + * and "no bandwidth <1-10000000>", "ipv6 address X:X::X:X/M" + * - when an RTM_NEWADDR message is received from the kernel, + * + * The call tree that triggers ZEBRA_INTERFACE_ADDRESS_DELETE: + * + * zsend_interface_address(DELETE) + * ^ + * | + * zebra_interface_address_delete_update + * ^ ^ ^ + * | | if_delete_update + * | | + * ip_address_uninstall connected_delete_ipv4 + * [ipv6_addresss_uninstall] [connected_delete_ipv6] + * ^ ^ + * | | + * | RTM_NEWADDR on routing/netlink socket + * | + * vty commands: + * "no ip address A.B.C.D/M [label LINE]" + * "no ip address A.B.C.D/M" + * ["no ipv6 address X:X::X:X/M"] + * + */ +int zsend_interface_address(int cmd, struct zserv *client, + struct interface *ifp, struct connected *ifc) +{ + int blen; + struct prefix *p; + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, ifp->vrf->vrf_id); + stream_putl(s, ifp->ifindex); + + /* Interface address flag. */ + stream_putc(s, ifc->flags); + + /* Prefix information. */ + p = ifc->address; + stream_putc(s, p->family); + blen = prefix_blen(p); + stream_put(s, &p->u.prefix, blen); + + /* + * XXX gnu version does not send prefixlen for + * ZEBRA_INTERFACE_ADDRESS_DELETE + * but zebra_interface_address_delete_read() in the gnu version + * expects to find it + */ + stream_putc(s, p->prefixlen); + + /* Destination. */ + p = ifc->destination; + if (p) + stream_put(s, &p->u.prefix, blen); + else + stream_put(s, NULL, blen); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + client->connected_rt_add_cnt++; + return zserv_send_message(client, s); +} + +static int zsend_interface_nbr_address(int cmd, struct zserv *client, + struct interface *ifp, + struct nbr_connected *ifc) +{ + int blen; + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + struct prefix *p; + + zclient_create_header(s, cmd, ifp->vrf->vrf_id); + stream_putl(s, ifp->ifindex); + + /* Prefix information. */ + p = ifc->address; + stream_putc(s, p->family); + blen = prefix_blen(p); + stream_put(s, &p->u.prefix, blen); + + /* + * XXX gnu version does not send prefixlen for + * ZEBRA_INTERFACE_ADDRESS_DELETE + * but zebra_interface_address_delete_read() in the gnu version + * expects to find it + */ + stream_putc(s, p->prefixlen); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Interface address addition. */ +static void zebra_interface_nbr_address_add_update(struct interface *ifp, + struct nbr_connected *ifc) +{ + struct listnode *node, *nnode; + struct zserv *client; + struct prefix *p; + + if (IS_ZEBRA_DEBUG_EVENT) { + char buf[INET6_ADDRSTRLEN]; + + p = ifc->address; + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_NBR_ADDRESS_ADD %s/%d on %s", + inet_ntop(p->family, &p->u.prefix, buf, + INET6_ADDRSTRLEN), + p->prefixlen, ifc->ifp->name); + } + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_interface_nbr_address(ZEBRA_INTERFACE_NBR_ADDRESS_ADD, + client, ifp, ifc); + } +} + +/* Interface address deletion. */ +static void zebra_interface_nbr_address_delete_update(struct interface *ifp, + struct nbr_connected *ifc) +{ + struct listnode *node, *nnode; + struct zserv *client; + struct prefix *p; + + if (IS_ZEBRA_DEBUG_EVENT) { + char buf[INET6_ADDRSTRLEN]; + + p = ifc->address; + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_NBR_ADDRESS_DELETE %s/%d on %s", + inet_ntop(p->family, &p->u.prefix, buf, + INET6_ADDRSTRLEN), + p->prefixlen, ifc->ifp->name); + } + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_interface_nbr_address(ZEBRA_INTERFACE_NBR_ADDRESS_DELETE, + client, ifp, ifc); + } +} + +/* Send addresses on interface to client */ +int zsend_interface_addresses(struct zserv *client, struct interface *ifp) +{ + struct listnode *cnode, *cnnode; + struct connected *c; + struct nbr_connected *nc; + + /* Send interface addresses. */ + for (ALL_LIST_ELEMENTS(ifp->connected, cnode, cnnode, c)) { + if (!CHECK_FLAG(c->conf, ZEBRA_IFC_REAL)) + continue; + + if (zsend_interface_address(ZEBRA_INTERFACE_ADDRESS_ADD, client, + ifp, c) + < 0) + return -1; + } + + /* Send interface neighbors. */ + for (ALL_LIST_ELEMENTS(ifp->nbr_connected, cnode, cnnode, nc)) { + if (zsend_interface_nbr_address(ZEBRA_INTERFACE_NBR_ADDRESS_ADD, + client, ifp, nc) + < 0) + return -1; + } + + return 0; +} + +/* Notify client about interface moving from one VRF to another. + * Whether client is interested in old and new VRF is checked by caller. + */ +int zsend_interface_vrf_update(struct zserv *client, struct interface *ifp, + vrf_id_t vrf_id) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_INTERFACE_VRF_UPDATE, ifp->vrf->vrf_id); + + /* Fill in the name of the interface and its new VRF (id) */ + stream_put(s, ifp->name, INTERFACE_NAMSIZ); + stream_putl(s, vrf_id); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + client->if_vrfchg_cnt++; + return zserv_send_message(client, s); +} + +/* Add new nbr connected IPv6 address */ +void nbr_connected_add_ipv6(struct interface *ifp, struct in6_addr *address) +{ + struct nbr_connected *ifc; + struct prefix p; + + p.family = AF_INET6; + IPV6_ADDR_COPY(&p.u.prefix6, address); + p.prefixlen = IPV6_MAX_BITLEN; + + ifc = listnode_head(ifp->nbr_connected); + if (!ifc) { + /* new addition */ + ifc = nbr_connected_new(); + ifc->address = prefix_new(); + ifc->ifp = ifp; + listnode_add(ifp->nbr_connected, ifc); + } + + prefix_copy(ifc->address, &p); + + zebra_interface_nbr_address_add_update(ifp, ifc); + + if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, address, 1); +} + +void nbr_connected_delete_ipv6(struct interface *ifp, struct in6_addr *address) +{ + struct nbr_connected *ifc; + struct prefix p; + + p.family = AF_INET6; + IPV6_ADDR_COPY(&p.u.prefix6, address); + p.prefixlen = IPV6_MAX_BITLEN; + + ifc = nbr_connected_check(ifp, &p); + if (!ifc) + return; + + listnode_delete(ifp->nbr_connected, ifc); + + zebra_interface_nbr_address_delete_update(ifp, ifc); + + if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, address, 0); + + nbr_connected_free(ifc); +} + +/* + * The cmd passed to zsend_interface_update may be ZEBRA_INTERFACE_UP or + * ZEBRA_INTERFACE_DOWN. + * + * The ZEBRA_INTERFACE_UP message is sent from the zebra server to + * the clients in one of 2 situations: + * - an if_up is detected e.g., as a result of an RTM_IFINFO message + * - a vty command modifying the bandwidth of an interface is received. + * The ZEBRA_INTERFACE_DOWN message is sent when an if_down is detected. + */ +int zsend_interface_update(int cmd, struct zserv *client, struct interface *ifp) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, ifp->vrf->vrf_id); + zserv_encode_interface(s, ifp); + + if (cmd == ZEBRA_INTERFACE_UP) + client->ifup_cnt++; + else + client->ifdown_cnt++; + + return zserv_send_message(client, s); +} + +int zsend_redistribute_route(int cmd, struct zserv *client, + const struct route_node *rn, + const struct route_entry *re) +{ + struct zapi_route api; + struct zapi_nexthop *api_nh; + struct nexthop *nexthop; + const struct prefix *p, *src_p; + uint8_t count = 0; + afi_t afi; + size_t stream_size = + MAX(ZEBRA_MAX_PACKET_SIZ, sizeof(struct zapi_route)); + + srcdest_rnode_prefixes(rn, &p, &src_p); + memset(&api, 0, sizeof(api)); + api.vrf_id = re->vrf_id; + api.type = re->type; + api.safi = SAFI_UNICAST; + api.instance = re->instance; + api.flags = re->flags; + + afi = family2afi(p->family); + switch (afi) { + case AFI_IP: + if (cmd == ZEBRA_REDISTRIBUTE_ROUTE_ADD) + client->redist_v4_add_cnt++; + else + client->redist_v4_del_cnt++; + break; + case AFI_IP6: + if (cmd == ZEBRA_REDISTRIBUTE_ROUTE_ADD) + client->redist_v6_add_cnt++; + else + client->redist_v6_del_cnt++; + break; + default: + break; + } + + /* Prefix. */ + api.prefix = *p; + if (src_p) { + SET_FLAG(api.message, ZAPI_MESSAGE_SRCPFX); + memcpy(&api.src_prefix, src_p, sizeof(api.src_prefix)); + } + + for (nexthop = re->nhe->nhg.nexthop; + nexthop; nexthop = nexthop->next) { + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + api_nh = &api.nexthops[count]; + api_nh->vrf_id = nexthop->vrf_id; + api_nh->type = nexthop->type; + api_nh->weight = nexthop->weight; + switch (nexthop->type) { + case NEXTHOP_TYPE_BLACKHOLE: + api_nh->bh_type = nexthop->bh_type; + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + api_nh->gate.ipv4 = nexthop->gate.ipv4; + api_nh->ifindex = nexthop->ifindex; + break; + case NEXTHOP_TYPE_IFINDEX: + api_nh->ifindex = nexthop->ifindex; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + api_nh->gate.ipv6 = nexthop->gate.ipv6; + api_nh->ifindex = nexthop->ifindex; + } + count++; + } + + /* Nexthops. */ + if (count) { + SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); + api.nexthop_num = count; + } + + /* Attributes. */ + SET_FLAG(api.message, ZAPI_MESSAGE_DISTANCE); + api.distance = re->distance; + SET_FLAG(api.message, ZAPI_MESSAGE_METRIC); + api.metric = re->metric; + if (re->tag) { + SET_FLAG(api.message, ZAPI_MESSAGE_TAG); + api.tag = re->tag; + } + SET_FLAG(api.message, ZAPI_MESSAGE_MTU); + api.mtu = re->mtu; + + struct stream *s = stream_new(stream_size); + + /* Encode route and send. */ + if (zapi_route_encode(cmd, s, &api) < 0) { + stream_free(s); + return -1; + } + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("%s: %s to client %s: type %s, vrf_id %d, p %pFX", + __func__, zserv_command_string(cmd), + zebra_route_string(client->proto), + zebra_route_string(api.type), api.vrf_id, + &api.prefix); + return zserv_send_message(client, s); +} + +/* + * Modified version of zsend_ipv4_nexthop_lookup(): Query unicast rib if + * nexthop is not found on mrib. Returns both route metric and protocol + * distance. + * + * *XXX* this ZAPI call is slated to be removed at some point in the future + * since MRIB support in PIM is hopelessly broken in its interactions with NHT. + * The plan is to make pimd use NHT to receive URIB and MRIB in parallel and + * make the decision there, which will obsolete this ZAPI op. + * (Otherwise we would need to implement sending NHT updates for the result of + * this "URIB-MRIB-combined" table, but we only decide that here on the fly, + * so it'd be rather complex to do NHT for.) + */ +static int zsend_nexthop_lookup_mrib(struct zserv *client, struct ipaddr *addr, + struct route_entry *re, + struct zebra_vrf *zvrf) +{ + struct stream *s; + unsigned long nump; + uint8_t num; + struct nexthop *nexthop; + + /* Get output stream. */ + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + /* Fill in result. */ + zclient_create_header(s, ZEBRA_NEXTHOP_LOOKUP_MRIB, zvrf_id(zvrf)); + stream_put_ipaddr(s, addr); + + if (re) { + struct nexthop_group *nhg; + + stream_putc(s, re->distance); + stream_putl(s, re->metric); + num = 0; + /* remember position for nexthop_num */ + nump = stream_get_endp(s); + /* reserve room for nexthop_num */ + stream_putc(s, 0); + nhg = rib_get_fib_nhg(re); + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (rnh_nexthop_valid(re, nexthop)) + num += zserv_encode_nexthop(s, nexthop); + } + + /* store nexthop_num */ + stream_putc_at(s, nump, num); + } else { + stream_putc(s, 0); /* distance */ + stream_putl(s, 0); /* metric */ + stream_putc(s, 0); /* nexthop_num */ + } + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +int zsend_nhg_notify(uint16_t type, uint16_t instance, uint32_t session_id, + uint32_t id, enum zapi_nhg_notify_owner note) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client_session(type, instance, session_id); + if (!client) { + if (IS_ZEBRA_DEBUG_PACKET) { + zlog_debug("Not Notifying Owner: %u(%u) about %u(%d)", + type, instance, id, note); + } + return 0; + } + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("%s: type %d, id %d, note %s", + __func__, type, id, zapi_nhg_notify_owner2str(note)); + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + zclient_create_header(s, ZEBRA_NHG_NOTIFY_OWNER, VRF_DEFAULT); + + stream_put(s, ¬e, sizeof(note)); + stream_putl(s, id); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* + * Common utility send route notification, called from a path using a + * route_entry and from a path using a dataplane context. + */ +static int route_notify_internal(const struct route_node *rn, int type, + uint16_t instance, vrf_id_t vrf_id, + uint32_t table_id, + enum zapi_route_notify_owner note, afi_t afi, + safi_t safi) +{ + struct zserv *client; + struct stream *s; + uint8_t blen; + + client = zserv_find_client(type, instance); + if (!client || !client->notify_owner) { + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Not Notifying Owner: %s about prefix %pRN(%u) %d vrf: %u", + zebra_route_string(type), rn, table_id, note, + vrf_id); + return 0; + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Notifying Owner: %s about prefix %pRN(%u) %d vrf: %u", + zebra_route_string(type), rn, table_id, note, vrf_id); + + /* We're just allocating a small-ish buffer here, since we only + * encode a small amount of data. + */ + s = stream_new(ZEBRA_SMALL_PACKET_SIZE); + + stream_reset(s); + + zclient_create_header(s, ZEBRA_ROUTE_NOTIFY_OWNER, vrf_id); + + stream_put(s, ¬e, sizeof(note)); + + stream_putc(s, rn->p.family); + + blen = prefix_blen(&rn->p); + stream_putc(s, rn->p.prefixlen); + stream_put(s, &rn->p.u.prefix, blen); + + stream_putl(s, table_id); + + /* Encode AFI, SAFI in the message */ + stream_putc(s, afi); + stream_putc(s, safi); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +int zsend_route_notify_owner(const struct route_node *rn, + struct route_entry *re, + enum zapi_route_notify_owner note, afi_t afi, + safi_t safi) +{ + return (route_notify_internal(rn, re->type, re->instance, re->vrf_id, + re->table, note, afi, safi)); +} + +/* + * Route-owner notification using info from dataplane update context. + */ +int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, + enum zapi_route_notify_owner note) +{ + int result; + struct route_node *rn = rib_find_rn_from_ctx(ctx); + + result = route_notify_internal( + rn, dplane_ctx_get_type(ctx), dplane_ctx_get_instance(ctx), + dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), note, + dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx)); + + route_unlock_node(rn); + + return result; +} + +static void zread_route_notify_request(ZAPI_HANDLER_ARGS) +{ + uint8_t notify; + + STREAM_GETC(msg, notify); + client->notify_owner = notify; +stream_failure: + return; +} + +void zsend_rule_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_rule_notify_owner note) +{ + struct listnode *node; + struct zserv *client; + struct stream *s; + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Notifying %u", __func__, + dplane_ctx_rule_get_unique(ctx)); + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (dplane_ctx_rule_get_sock(ctx) == client->sock) + break; + } + + if (!client) + return; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_RULE_NOTIFY_OWNER, VRF_DEFAULT); + stream_put(s, ¬e, sizeof(note)); + stream_putl(s, dplane_ctx_rule_get_seq(ctx)); + stream_putl(s, dplane_ctx_rule_get_priority(ctx)); + stream_putl(s, dplane_ctx_rule_get_unique(ctx)); + stream_put(s, dplane_ctx_rule_get_ifname(ctx), INTERFACE_NAMSIZ); + + stream_putw_at(s, 0, stream_get_endp(s)); + + zserv_send_message(client, s); +} + +void zsend_iptable_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_iptable_notify_owner note) +{ + struct listnode *node; + struct zserv *client; + struct stream *s; + struct zebra_pbr_iptable ipt; + uint16_t cmd = ZEBRA_IPTABLE_NOTIFY_OWNER; + struct zebra_pbr_iptable *ipt_hash; + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + dplane_ctx_get_pbr_iptable(ctx, &ipt); + + ipt_hash = hash_lookup(zrouter.iptable_hash, &ipt); + if (ipt_hash) { + if (op == DPLANE_OP_IPTABLE_ADD && + CHECK_FLAG(ipt_hash->internal_flags, + IPTABLE_INSTALL_QUEUED)) + UNSET_FLAG(ipt_hash->internal_flags, + IPTABLE_INSTALL_QUEUED); + else if (op == DPLANE_OP_IPTABLE_DELETE && + CHECK_FLAG(ipt_hash->internal_flags, + IPTABLE_UNINSTALL_QUEUED)) + UNSET_FLAG(ipt_hash->internal_flags, + IPTABLE_UNINSTALL_QUEUED); + } + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Notifying %s id %u note %u", __func__, + zserv_command_string(cmd), ipt.unique, note); + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (ipt.sock == client->sock) + break; + } + + if (!client) + return; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, VRF_DEFAULT); + stream_putw(s, note); + stream_putl(s, ipt.unique); + stream_put(s, ipt.ipset_name, ZEBRA_IPSET_NAME_SIZE); + stream_putw_at(s, 0, stream_get_endp(s)); + + zserv_send_message(client, s); +} + +void zsend_ipset_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_ipset_notify_owner note) +{ + struct listnode *node; + struct zserv *client; + struct stream *s; + struct zebra_pbr_ipset ipset; + uint16_t cmd = ZEBRA_IPSET_NOTIFY_OWNER; + + dplane_ctx_get_pbr_ipset(ctx, &ipset); + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Notifying %s id %u note %u", __func__, + zserv_command_string(cmd), ipset.unique, note); + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (ipset.sock == client->sock) + break; + } + + if (!client) + return; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, VRF_DEFAULT); + stream_putw(s, note); + stream_putl(s, ipset.unique); + stream_put(s, ipset.ipset_name, ZEBRA_IPSET_NAME_SIZE); + stream_putw_at(s, 0, stream_get_endp(s)); + + zserv_send_message(client, s); +} + +void zsend_ipset_entry_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_ipset_entry_notify_owner note) +{ + struct listnode *node; + struct zserv *client; + struct stream *s; + struct zebra_pbr_ipset_entry ipent; + struct zebra_pbr_ipset ipset; + uint16_t cmd = ZEBRA_IPSET_ENTRY_NOTIFY_OWNER; + + dplane_ctx_get_pbr_ipset_entry(ctx, &ipent); + dplane_ctx_get_pbr_ipset(ctx, &ipset); + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Notifying %s id %u note %u", __func__, + zserv_command_string(cmd), ipent.unique, note); + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (ipent.sock == client->sock) + break; + } + + if (!client) + return; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, VRF_DEFAULT); + stream_putw(s, note); + stream_putl(s, ipent.unique); + stream_put(s, ipset.ipset_name, ZEBRA_IPSET_NAME_SIZE); + stream_putw_at(s, 0, stream_get_endp(s)); + + zserv_send_message(client, s); +} + +void zsend_nhrp_neighbor_notify(int cmd, struct interface *ifp, + struct ipaddr *ipaddr, int ndm_state, + union sockunion *link_layer_ipv4) +{ + struct stream *s; + struct listnode *node, *nnode; + struct zserv *client; + afi_t afi; + union sockunion ip; + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Notifying Neighbor entry (%u)", __func__, cmd); + + sockunion_family(&ip) = ipaddr_family(ipaddr); + afi = family2afi(sockunion_family(&ip)); + memcpy((char *)sockunion_get_addr(&ip), &ipaddr->ip.addr, + family2addrsize(sockunion_family(&ip))); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (!vrf_bitmap_check(client->nhrp_neighinfo[afi], + ifp->vrf->vrf_id)) + continue; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + zclient_neigh_ip_encode(s, cmd, &ip, link_layer_ipv4, ifp, + ndm_state); + stream_putw_at(s, 0, stream_get_endp(s)); + zserv_send_message(client, s); + } +} + + +/* Router-id is updated. Send ZEBRA_ROUTER_ID_UPDATE to client. */ +int zsend_router_id_update(struct zserv *client, afi_t afi, struct prefix *p, + vrf_id_t vrf_id) +{ + int blen; + struct stream *s; + + /* Check this client need interface information. */ + if (!vrf_bitmap_check(client->ridinfo[afi], vrf_id)) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + /* Message type. */ + zclient_create_header(s, ZEBRA_ROUTER_ID_UPDATE, vrf_id); + + /* Prefix information. */ + stream_putc(s, p->family); + blen = prefix_blen(p); + stream_put(s, &p->u.prefix, blen); + stream_putc(s, p->prefixlen); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* + * Function used by Zebra to send a PW status update to LDP daemon + */ +int zsend_pw_update(struct zserv *client, struct zebra_pw *pw) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_PW_STATUS_UPDATE, pw->vrf_id); + stream_write(s, pw->ifname, INTERFACE_NAMSIZ); + stream_putl(s, pw->ifindex); + stream_putl(s, pw->status); + + /* Put length at the first point of the stream. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Send response to a get label chunk request to client */ +int zsend_assign_label_chunk_response(struct zserv *client, vrf_id_t vrf_id, + struct label_manager_chunk *lmc) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_GET_LABEL_CHUNK, vrf_id); + /* proto */ + stream_putc(s, client->proto); + /* instance */ + stream_putw(s, client->instance); + + if (lmc) { + /* keep */ + stream_putc(s, lmc->keep); + /* start and end labels */ + stream_putl(s, lmc->start); + stream_putl(s, lmc->end); + } + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Send response to a label manager connect request to client */ +int zsend_label_manager_connect_response(struct zserv *client, vrf_id_t vrf_id, + unsigned short result) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_LABEL_MANAGER_CONNECT, vrf_id); + + /* proto */ + stream_putc(s, client->proto); + + /* instance */ + stream_putw(s, client->instance); + + /* result */ + stream_putc(s, result); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Send response to a get table chunk request to client */ +static int zsend_assign_table_chunk_response(struct zserv *client, + vrf_id_t vrf_id, + struct table_manager_chunk *tmc) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_GET_TABLE_CHUNK, vrf_id); + + if (tmc) { + /* start and end labels */ + stream_putl(s, tmc->start); + stream_putl(s, tmc->end); + } + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +static int zsend_table_manager_connect_response(struct zserv *client, + vrf_id_t vrf_id, + uint16_t result) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_TABLE_MANAGER_CONNECT, vrf_id); + + /* result */ + stream_putc(s, result); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* SRv6 locator add notification from zebra daemon. */ +int zsend_zebra_srv6_locator_add(struct zserv *client, struct srv6_locator *loc) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_SRV6_LOCATOR_ADD, VRF_DEFAULT); + zapi_srv6_locator_encode(s, loc); + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* SRv6 locator delete notification from zebra daemon. */ +int zsend_zebra_srv6_locator_delete(struct zserv *client, + struct srv6_locator *loc) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_SRV6_LOCATOR_DELETE, VRF_DEFAULT); + zapi_srv6_locator_encode(s, loc); + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Inbound message handling ------------------------------------------------ */ + +/* Nexthop register */ +static void zread_rnh_register(ZAPI_HANDLER_ARGS) +{ + struct rnh *rnh; + struct stream *s; + struct prefix p; + unsigned short l = 0; + uint8_t connected = 0; + uint8_t resolve_via_default; + bool exist; + bool flag_changed = false; + uint8_t orig_flags; + safi_t safi; + + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug( + "rnh_register msg from client %s: hdr->length=%d vrf=%u", + zebra_route_string(client->proto), hdr->length, + zvrf->vrf->vrf_id); + + s = msg; + + if (!client->nh_reg_time) + client->nh_reg_time = monotime(NULL); + + while (l < hdr->length) { + STREAM_GETC(s, connected); + STREAM_GETC(s, resolve_via_default); + STREAM_GETW(s, safi); + STREAM_GETW(s, p.family); + STREAM_GETC(s, p.prefixlen); + l += 7; + if (p.family == AF_INET) { + client->v4_nh_watch_add_cnt++; + if (p.prefixlen > IPV4_MAX_BITLEN) { + zlog_debug( + "%s: Specified prefix hdr->length %d is too large for a v4 address", + __func__, p.prefixlen); + return; + } + STREAM_GET(&p.u.prefix4.s_addr, s, IPV4_MAX_BYTELEN); + l += IPV4_MAX_BYTELEN; + } else if (p.family == AF_INET6) { + client->v6_nh_watch_add_cnt++; + if (p.prefixlen > IPV6_MAX_BITLEN) { + zlog_debug( + "%s: Specified prefix hdr->length %d is to large for a v6 address", + __func__, p.prefixlen); + return; + } + STREAM_GET(&p.u.prefix6, s, IPV6_MAX_BYTELEN); + l += IPV6_MAX_BYTELEN; + } else { + flog_err( + EC_ZEBRA_UNKNOWN_FAMILY, + "rnh_register: Received unknown family type %d", + p.family); + return; + } + rnh = zebra_add_rnh(&p, zvrf_id(zvrf), safi, &exist); + if (!rnh) + return; + + orig_flags = rnh->flags; + if (connected && !CHECK_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED)) + SET_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED); + else if (!connected + && CHECK_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED)) + UNSET_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED); + + if (resolve_via_default) + SET_FLAG(rnh->flags, ZEBRA_NHT_RESOLVE_VIA_DEFAULT); + + if (orig_flags != rnh->flags) + flag_changed = true; + + /* Anything not AF_INET/INET6 has been filtered out above */ + if (!exist || flag_changed) + zebra_evaluate_rnh(zvrf, family2afi(p.family), 1, &p, + safi); + + zebra_add_rnh_client(rnh, client, zvrf_id(zvrf)); + } + +stream_failure: + return; +} + +/* Nexthop register */ +static void zread_rnh_unregister(ZAPI_HANDLER_ARGS) +{ + struct rnh *rnh; + struct stream *s; + struct prefix p; + unsigned short l = 0; + safi_t safi; + + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug( + "rnh_unregister msg from client %s: hdr->length=%d vrf: %u", + zebra_route_string(client->proto), hdr->length, + zvrf->vrf->vrf_id); + + s = msg; + + while (l < hdr->length) { + uint8_t ignore; + + STREAM_GETC(s, ignore); + if (ignore != 0) + goto stream_failure; + STREAM_GETC(s, ignore); + if (ignore != 0) + goto stream_failure; + + STREAM_GETW(s, safi); + STREAM_GETW(s, p.family); + STREAM_GETC(s, p.prefixlen); + l += 7; + if (p.family == AF_INET) { + client->v4_nh_watch_rem_cnt++; + if (p.prefixlen > IPV4_MAX_BITLEN) { + zlog_debug( + "%s: Specified prefix hdr->length %d is to large for a v4 address", + __func__, p.prefixlen); + return; + } + STREAM_GET(&p.u.prefix4.s_addr, s, IPV4_MAX_BYTELEN); + l += IPV4_MAX_BYTELEN; + } else if (p.family == AF_INET6) { + client->v6_nh_watch_rem_cnt++; + if (p.prefixlen > IPV6_MAX_BITLEN) { + zlog_debug( + "%s: Specified prefix hdr->length %d is to large for a v6 address", + __func__, p.prefixlen); + return; + } + STREAM_GET(&p.u.prefix6, s, IPV6_MAX_BYTELEN); + l += IPV6_MAX_BYTELEN; + } else { + flog_err( + EC_ZEBRA_UNKNOWN_FAMILY, + "rnh_register: Received unknown family type %d", + p.family); + return; + } + rnh = zebra_lookup_rnh(&p, zvrf_id(zvrf), safi); + if (rnh) { + client->nh_dereg_time = monotime(NULL); + zebra_remove_rnh_client(rnh, client); + } + } +stream_failure: + return; +} + +#define ZEBRA_MIN_FEC_LENGTH 5 + +/* FEC register */ +static void zread_fec_register(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + unsigned short l = 0; + struct prefix p; + uint16_t flags; + uint32_t label = MPLS_INVALID_LABEL; + uint32_t label_index = MPLS_INVALID_LABEL_INDEX; + + s = msg; + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + /* + * The minimum amount of data that can be sent for one fec + * registration + */ + if (hdr->length < ZEBRA_MIN_FEC_LENGTH) { + flog_err( + EC_ZEBRA_IRDP_LEN_MISMATCH, + "fec_register: Received a fec register of hdr->length %d, it is of insufficient size to properly decode", + hdr->length); + return; + } + + while (l < hdr->length) { + STREAM_GETW(s, flags); + memset(&p, 0, sizeof(p)); + STREAM_GETW(s, p.family); + if (p.family != AF_INET && p.family != AF_INET6) { + flog_err( + EC_ZEBRA_UNKNOWN_FAMILY, + "fec_register: Received unknown family type %d", + p.family); + return; + } + STREAM_GETC(s, p.prefixlen); + if ((p.family == AF_INET && p.prefixlen > IPV4_MAX_BITLEN) + || (p.family == AF_INET6 + && p.prefixlen > IPV6_MAX_BITLEN)) { + zlog_debug( + "%s: Specified prefix hdr->length: %d is to long for %d", + __func__, p.prefixlen, p.family); + return; + } + l += 5; + STREAM_GET(&p.u.prefix, s, PSIZE(p.prefixlen)); + l += PSIZE(p.prefixlen); + if (flags & ZEBRA_FEC_REGISTER_LABEL) { + STREAM_GETL(s, label); + l += 4; + } else if (flags & ZEBRA_FEC_REGISTER_LABEL_INDEX) { + STREAM_GETL(s, label_index); + l += 4; + } + + zebra_mpls_fec_register(zvrf, &p, label, label_index, client); + } + +stream_failure: + return; +} + +/* FEC unregister */ +static void zread_fec_unregister(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + unsigned short l = 0; + struct prefix p; + uint16_t flags; + + s = msg; + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + /* + * The minimum amount of data that can be sent for one + * fec unregistration + */ + if (hdr->length < ZEBRA_MIN_FEC_LENGTH) { + flog_err( + EC_ZEBRA_IRDP_LEN_MISMATCH, + "fec_unregister: Received a fec unregister of hdr->length %d, it is of insufficient size to properly decode", + hdr->length); + return; + } + + while (l < hdr->length) { + STREAM_GETW(s, flags); + if (flags != 0) + goto stream_failure; + + memset(&p, 0, sizeof(p)); + STREAM_GETW(s, p.family); + if (p.family != AF_INET && p.family != AF_INET6) { + flog_err( + EC_ZEBRA_UNKNOWN_FAMILY, + "fec_unregister: Received unknown family type %d", + p.family); + return; + } + STREAM_GETC(s, p.prefixlen); + if ((p.family == AF_INET && p.prefixlen > IPV4_MAX_BITLEN) + || (p.family == AF_INET6 + && p.prefixlen > IPV6_MAX_BITLEN)) { + zlog_debug( + "%s: Received prefix hdr->length %d which is greater than %d can support", + __func__, p.prefixlen, p.family); + return; + } + l += 5; + STREAM_GET(&p.u.prefix, s, PSIZE(p.prefixlen)); + l += PSIZE(p.prefixlen); + zebra_mpls_fec_unregister(zvrf, &p, client); + } + +stream_failure: + return; +} + + +/* + * Register zebra server interface information. + * Send current all interface and address information. + */ +static void zread_interface_add(ZAPI_HANDLER_ARGS) +{ + struct vrf *vrf; + struct interface *ifp; + + vrf_id_t vrf_id = zvrf_id(zvrf); + if (vrf_id != VRF_DEFAULT && vrf_id != VRF_UNKNOWN) { + FOR_ALL_INTERFACES (zvrf->vrf, ifp) { + /* Skip pseudo interface. */ + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) + continue; + + zsend_interface_add(client, ifp); + zsend_interface_link_params(client, ifp); + zsend_interface_addresses(client, ifp); + } + return; + } + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + FOR_ALL_INTERFACES (vrf, ifp) { + /* Skip pseudo interface. */ + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) + continue; + + zsend_interface_add(client, ifp); + zsend_interface_link_params(client, ifp); + zsend_interface_addresses(client, ifp); + } + } +} + +/* Unregister zebra server interface information. */ +static void zread_interface_delete(ZAPI_HANDLER_ARGS) +{ +} + +/* + * Handle message requesting interface be set up or down. + */ +static void zread_interface_set_protodown(ZAPI_HANDLER_ARGS) +{ + ifindex_t ifindex; + struct interface *ifp; + char down; + enum protodown_reasons reason; + + STREAM_GETL(msg, ifindex); + STREAM_GETC(msg, down); + + /* set ifdown */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), ifindex); + + if (!ifp) { + zlog_warn( + "Cannot set protodown %s for interface %u; does not exist", + down ? "on" : "off", ifindex); + + return; + } + + switch (client->proto) { + case ZEBRA_ROUTE_VRRP: + reason = ZEBRA_PROTODOWN_VRRP; + break; + case ZEBRA_ROUTE_SHARP: + reason = ZEBRA_PROTODOWN_SHARP; + break; + default: + reason = 0; + break; + } + + zebra_if_set_protodown(ifp, down, reason); + +stream_failure: + return; +} + +bool zserv_nexthop_num_warn(const char *caller, const struct prefix *p, + const unsigned int nexthop_num) +{ + if (nexthop_num > zrouter.multipath_num) { + char buff[PREFIX2STR_BUFFER]; + + if (p) + prefix2str(p, buff, sizeof(buff)); + + flog_warn( + EC_ZEBRA_MORE_NH_THAN_MULTIPATH, + "%s: Prefix %s has %d nexthops, but we can only use the first %d", + caller, (p ? buff : "(NULL)"), nexthop_num, + zrouter.multipath_num); + return true; + } + + return false; +} + +/* + * Create a new nexthop based on a zapi nexthop. + */ +static struct nexthop *nexthop_from_zapi(const struct zapi_nexthop *api_nh, + uint32_t flags, struct prefix *p, + uint16_t backup_nexthop_num) +{ + struct nexthop *nexthop = NULL; + struct ipaddr vtep_ip; + struct interface *ifp; + int i; + char nhbuf[INET6_ADDRSTRLEN] = ""; + + switch (api_nh->type) { + case NEXTHOP_TYPE_IFINDEX: + nexthop = nexthop_from_ifindex(api_nh->ifindex, api_nh->vrf_id); + break; + case NEXTHOP_TYPE_IPV4: + if (IS_ZEBRA_DEBUG_RECV) { + inet_ntop(AF_INET, &api_nh->gate.ipv4, nhbuf, + sizeof(nhbuf)); + zlog_debug("%s: nh=%s, vrf_id=%d", __func__, + nhbuf, api_nh->vrf_id); + } + nexthop = nexthop_from_ipv4(&api_nh->gate.ipv4, NULL, + api_nh->vrf_id); + break; + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (IS_ZEBRA_DEBUG_RECV) { + inet_ntop(AF_INET, &api_nh->gate.ipv4, nhbuf, + sizeof(nhbuf)); + zlog_debug("%s: nh=%s, vrf_id=%d, ifindex=%d", + __func__, nhbuf, api_nh->vrf_id, + api_nh->ifindex); + } + + nexthop = nexthop_from_ipv4_ifindex( + &api_nh->gate.ipv4, NULL, api_nh->ifindex, + api_nh->vrf_id); + + /* Special handling for IPv4 routes sourced from EVPN: + * the nexthop and associated MAC need to be installed. + */ + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_EVPN)) { + memset(&vtep_ip, 0, sizeof(vtep_ip)); + vtep_ip.ipa_type = IPADDR_V4; + memcpy(&(vtep_ip.ipaddr_v4), &(api_nh->gate.ipv4), + sizeof(struct in_addr)); + zebra_rib_queue_evpn_route_add( + api_nh->vrf_id, &api_nh->rmac, &vtep_ip, p); + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN); + } + break; + case NEXTHOP_TYPE_IPV6: + if (IS_ZEBRA_DEBUG_RECV) { + inet_ntop(AF_INET6, &api_nh->gate.ipv6, nhbuf, + sizeof(nhbuf)); + zlog_debug("%s: nh=%s, vrf_id=%d", __func__, + nhbuf, api_nh->vrf_id); + } + nexthop = nexthop_from_ipv6(&api_nh->gate.ipv6, api_nh->vrf_id); + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (IS_ZEBRA_DEBUG_RECV) { + inet_ntop(AF_INET6, &api_nh->gate.ipv6, nhbuf, + sizeof(nhbuf)); + zlog_debug("%s: nh=%s, vrf_id=%d, ifindex=%d", + __func__, nhbuf, api_nh->vrf_id, + api_nh->ifindex); + } + nexthop = nexthop_from_ipv6_ifindex(&api_nh->gate.ipv6, + api_nh->ifindex, + api_nh->vrf_id); + + /* Special handling for IPv6 routes sourced from EVPN: + * the nexthop and associated MAC need to be installed. + */ + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_EVPN)) { + memset(&vtep_ip, 0, sizeof(vtep_ip)); + vtep_ip.ipa_type = IPADDR_V6; + memcpy(&vtep_ip.ipaddr_v6, &(api_nh->gate.ipv6), + sizeof(struct in6_addr)); + zebra_rib_queue_evpn_route_add( + api_nh->vrf_id, &api_nh->rmac, &vtep_ip, p); + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN); + } + break; + case NEXTHOP_TYPE_BLACKHOLE: + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: nh blackhole %d", + __func__, api_nh->bh_type); + + nexthop = + nexthop_from_blackhole(api_nh->bh_type, api_nh->vrf_id); + break; + } + + /* Return early if we couldn't process the zapi nexthop */ + if (nexthop == NULL) { + goto done; + } + + /* Mark nexthop as onlink either if client has explicitly told us + * to or if the nexthop is on an 'unnumbered' interface. + */ + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_ONLINK)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK); + else if (api_nh->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + ifp = if_lookup_by_index(api_nh->ifindex, api_nh->vrf_id); + if (ifp && connected_is_unnumbered(ifp)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK); + } + + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_WEIGHT)) + nexthop->weight = api_nh->weight; + + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_HAS_BACKUP)) { + /* Validate count */ + if (api_nh->backup_num > NEXTHOP_MAX_BACKUPS) { + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: invalid backup nh count %d", + __func__, api_nh->backup_num); + nexthop_free(nexthop); + nexthop = NULL; + goto done; + } + + /* Copy backup info */ + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + nexthop->backup_num = api_nh->backup_num; + + for (i = 0; i < api_nh->backup_num; i++) { + /* Validate backup index */ + if (api_nh->backup_idx[i] < backup_nexthop_num) { + nexthop->backup_idx[i] = api_nh->backup_idx[i]; + } else { + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: invalid backup nh idx %d", + __func__, + api_nh->backup_idx[i]); + nexthop_free(nexthop); + nexthop = NULL; + goto done; + } + } + } + +done: + return nexthop; +} + +static bool zapi_read_nexthops(struct zserv *client, struct prefix *p, + struct zapi_nexthop *nhops, uint32_t flags, + uint32_t message, uint16_t nexthop_num, + uint16_t backup_nh_num, + struct nexthop_group **png, + struct nhg_backup_info **pbnhg) +{ + struct nexthop_group *ng = NULL; + struct nhg_backup_info *bnhg = NULL; + uint16_t i; + struct nexthop *last_nh = NULL; + + assert(!(png && pbnhg)); + + if (png) + ng = nexthop_group_new(); + + if (pbnhg && backup_nh_num > 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: adding %d backup nexthops", __func__, + backup_nh_num); + + bnhg = zebra_nhg_backup_alloc(); + } + + /* + * TBD should _all_ of the nexthop add operations use + * api_nh->vrf_id instead of re->vrf_id ? I only changed + * for cases NEXTHOP_TYPE_IPV4 and NEXTHOP_TYPE_IPV6. + */ + for (i = 0; i < nexthop_num; i++) { + struct nexthop *nexthop; + enum lsp_types_t label_type; + char nhbuf[NEXTHOP_STRLEN]; + char labelbuf[MPLS_LABEL_STRLEN]; + struct zapi_nexthop *api_nh = &nhops[i]; + + /* Convert zapi nexthop */ + nexthop = nexthop_from_zapi(api_nh, flags, p, backup_nh_num); + if (!nexthop) { + flog_warn( + EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthops Specified: %u(%u) but we failed to properly create one", + __func__, nexthop_num, i); + if (ng) + nexthop_group_delete(&ng); + if (bnhg) + zebra_nhg_backup_free(&bnhg); + return false; + } + + if (bnhg + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + if (IS_ZEBRA_DEBUG_RECV) { + nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + zlog_debug("%s: backup nh %s with BACKUP flag!", + __func__, nhbuf); + } + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + nexthop->backup_num = 0; + } + + if (CHECK_FLAG(message, ZAPI_MESSAGE_SRTE)) { + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE); + nexthop->srte_color = api_nh->srte_color; + } + + /* MPLS labels for BGP-LU or Segment Routing */ + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_LABEL) + && api_nh->type != NEXTHOP_TYPE_IFINDEX + && api_nh->type != NEXTHOP_TYPE_BLACKHOLE + && api_nh->label_num > 0) { + + label_type = lsp_type_from_re_type(client->proto); + nexthop_add_labels(nexthop, label_type, + api_nh->label_num, + &api_nh->labels[0]); + } + + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_SEG6LOCAL) + && api_nh->type != NEXTHOP_TYPE_BLACKHOLE) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: adding seg6local action %s", + __func__, + seg6local_action2str( + api_nh->seg6local_action)); + + nexthop_add_srv6_seg6local(nexthop, + api_nh->seg6local_action, + &api_nh->seg6local_ctx); + } + + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_SEG6) + && api_nh->type != NEXTHOP_TYPE_BLACKHOLE) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: adding seg6", __func__); + + nexthop_add_srv6_seg6(nexthop, &api_nh->seg6_segs); + } + + if (IS_ZEBRA_DEBUG_RECV) { + labelbuf[0] = '\0'; + nhbuf[0] = '\0'; + + nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + + if (nexthop->nh_label && + nexthop->nh_label->num_labels > 0) { + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, + labelbuf, sizeof(labelbuf), + false); + } + + zlog_debug("%s: nh=%s, vrf_id=%d %s", + __func__, nhbuf, api_nh->vrf_id, labelbuf); + } + + if (ng) { + /* Add new nexthop to temporary list. This list is + * canonicalized - sorted - so that it can be hashed + * later in route processing. We expect that the sender + * has sent the list sorted, and the zapi client api + * attempts to enforce that, so this should be + * inexpensive - but it is necessary to support shared + * nexthop-groups. + */ + nexthop_group_add_sorted(ng, nexthop); + } + if (bnhg) { + /* Note that the order of the backup nexthops is + * significant, so we don't sort this list as we do the + * primary nexthops, we just append. + */ + if (last_nh) + NEXTHOP_APPEND(last_nh, nexthop); + else + bnhg->nhe->nhg.nexthop = nexthop; + + last_nh = nexthop; + } + } + + + /* succesfully read, set caller pointers now */ + if (png) + *png = ng; + + if (pbnhg) + *pbnhg = bnhg; + + return true; +} + +static int zapi_nhg_decode(struct stream *s, int cmd, struct zapi_nhg *api_nhg) +{ + uint16_t i; + struct zapi_nexthop *znh; + + STREAM_GETW(s, api_nhg->proto); + STREAM_GETL(s, api_nhg->id); + + if (cmd == ZEBRA_NHG_DEL) + goto done; + + /* Nexthops */ + STREAM_GETW(s, api_nhg->nexthop_num); + + if (zserv_nexthop_num_warn(__func__, NULL, api_nhg->nexthop_num)) + return -1; + + if (api_nhg->nexthop_num <= 0) { + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: No nexthops sent", __func__); + return -1; + } + + for (i = 0; i < api_nhg->nexthop_num; i++) { + znh = &((api_nhg->nexthops)[i]); + + if (zapi_nexthop_decode(s, znh, 0, 0) != 0) { + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthop creation failed", __func__); + return -1; + } + } + + /* Backup Nexthops */ + STREAM_GETW(s, api_nhg->backup_nexthop_num); + + if (zserv_nexthop_num_warn(__func__, NULL, api_nhg->backup_nexthop_num)) + return -1; + + for (i = 0; i < api_nhg->backup_nexthop_num; i++) { + znh = &((api_nhg->backup_nexthops)[i]); + + if (zapi_nexthop_decode(s, znh, 0, 0) != 0) { + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Backup Nexthop creation failed", + __func__); + return -1; + } + } + +done: + return 0; + +stream_failure: + flog_warn( + EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthop Group decode failed with some sort of stream read failure", + __func__); + return -1; +} + +static void zread_nhg_del(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_nhg api_nhg = {}; + struct nhg_hash_entry *nhe; + + s = msg; + if (zapi_nhg_decode(s, hdr->command, &api_nhg) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_nhg sent", + __func__); + return; + } + + /* + * Delete the received nhg id + */ + nhe = zebra_nhg_proto_del(api_nhg.id, api_nhg.proto); + + if (nhe) { + zebra_nhg_decrement_ref(nhe); + zsend_nhg_notify(api_nhg.proto, client->instance, + client->session_id, api_nhg.id, + ZAPI_NHG_REMOVED); + } else + zsend_nhg_notify(api_nhg.proto, client->instance, + client->session_id, api_nhg.id, + ZAPI_NHG_REMOVE_FAIL); +} + +static void zread_nhg_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_nhg api_nhg = {}; + struct nexthop_group *nhg = NULL; + struct nhg_backup_info *bnhg = NULL; + struct nhg_hash_entry *nhe; + + s = msg; + if (zapi_nhg_decode(s, hdr->command, &api_nhg) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_nhg sent", + __func__); + return; + } + + if ((!zapi_read_nexthops(client, NULL, api_nhg.nexthops, 0, 0, + api_nhg.nexthop_num, + api_nhg.backup_nexthop_num, &nhg, NULL)) + || (!zapi_read_nexthops(client, NULL, api_nhg.backup_nexthops, 0, 0, + api_nhg.backup_nexthop_num, + api_nhg.backup_nexthop_num, NULL, &bnhg))) { + + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthop Group Creation failed", __func__); + + /* Free any local allocations */ + nexthop_group_delete(&nhg); + zebra_nhg_backup_free(&bnhg); + + return; + } + + /* Create a temporary nhe */ + nhe = zebra_nhg_alloc(); + nhe->id = api_nhg.id; + nhe->type = api_nhg.proto; + nhe->zapi_instance = client->instance; + nhe->zapi_session = client->session_id; + + /* Take over the list(s) of nexthops */ + nhe->nhg.nexthop = nhg->nexthop; + nhg->nexthop = NULL; + + if (bnhg) { + nhe->backup_info = bnhg; + bnhg = NULL; + } + + /* + * TODO: + * Assume fully resolved for now and install. + * Resolution is going to need some more work. + */ + + /* Enqueue to workqueue for processing */ + rib_queue_nhe_add(nhe); + + /* Free any local allocations */ + nexthop_group_delete(&nhg); + zebra_nhg_backup_free(&bnhg); + +} + +static void zread_route_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_route api; + afi_t afi; + struct prefix_ipv6 *src_p = NULL; + struct route_entry *re; + struct nexthop_group *ng = NULL; + struct nhg_backup_info *bnhg = NULL; + int ret; + vrf_id_t vrf_id; + struct nhg_hash_entry nhe, *n = NULL; + + s = msg; + if (zapi_route_decode(s, &api) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_route sent", + __func__); + return; + } + + vrf_id = zvrf_id(zvrf); + + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: p=(%u:%u)%pFX, msg flags=0x%x, flags=0x%x", + __func__, vrf_id, api.tableid, &api.prefix, + (int)api.message, api.flags); + + /* Allocate new route. */ + re = zebra_rib_route_entry_new( + vrf_id, api.type, api.instance, api.flags, api.nhgid, + api.tableid ? api.tableid : zvrf->table_id, api.metric, api.mtu, + api.distance, api.tag); + + if (!CHECK_FLAG(api.message, ZAPI_MESSAGE_NHG) + && (!CHECK_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP) + || api.nexthop_num == 0)) { + flog_warn( + EC_ZEBRA_RX_ROUTE_NO_NEXTHOPS, + "%s: received a route without nexthops for prefix %pFX from client %s", + __func__, &api.prefix, + zebra_route_string(client->proto)); + + XFREE(MTYPE_RE, re); + return; + } + + /* Report misuse of the backup flag */ + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_BACKUP_NEXTHOPS) + && api.backup_nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: client %s: BACKUP flag set but no backup nexthops, prefix %pFX", + __func__, zebra_route_string(client->proto), + &api.prefix); + } + + if (!re->nhe_id + && (!zapi_read_nexthops(client, &api.prefix, api.nexthops, + api.flags, api.message, api.nexthop_num, + api.backup_nexthop_num, &ng, NULL) + || !zapi_read_nexthops(client, &api.prefix, api.backup_nexthops, + api.flags, api.message, + api.backup_nexthop_num, + api.backup_nexthop_num, NULL, &bnhg))) { + + nexthop_group_delete(&ng); + zebra_nhg_backup_free(&bnhg); + XFREE(MTYPE_RE, re); + return; + } + + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_OPAQUE)) { + re->opaque = + XMALLOC(MTYPE_RE_OPAQUE, + sizeof(struct re_opaque) + api.opaque.length); + re->opaque->length = api.opaque.length; + memcpy(re->opaque->data, api.opaque.data, re->opaque->length); + } + + afi = family2afi(api.prefix.family); + if (afi != AFI_IP6 && CHECK_FLAG(api.message, ZAPI_MESSAGE_SRCPFX)) { + flog_warn(EC_ZEBRA_RX_SRCDEST_WRONG_AFI, + "%s: Received SRC Prefix but afi is not v6", + __func__); + nexthop_group_delete(&ng); + zebra_nhg_backup_free(&bnhg); + XFREE(MTYPE_RE_OPAQUE, re->opaque); + XFREE(MTYPE_RE, re); + return; + } + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_SRCPFX)) + src_p = &api.src_prefix; + + if (api.safi != SAFI_UNICAST && api.safi != SAFI_MULTICAST) { + flog_warn(EC_LIB_ZAPI_MISSMATCH, + "%s: Received safi: %d but we can only accept UNICAST or MULTICAST", + __func__, api.safi); + nexthop_group_delete(&ng); + zebra_nhg_backup_free(&bnhg); + XFREE(MTYPE_RE_OPAQUE, re->opaque); + XFREE(MTYPE_RE, re); + return; + } + + /* + * If we have an ID, this proto owns the NHG it sent along with the + * route, so we just send the ID into rib code with it. + * + * Havent figured out how to handle backup NHs with this yet, so lets + * keep that separate. + * Include backup info with the route. We use a temporary nhe here; + * if this is a new/unknown nhe, a new copy will be allocated + * and stored. + */ + if (!re->nhe_id) { + zebra_nhe_init(&nhe, afi, ng->nexthop); + nhe.nhg.nexthop = ng->nexthop; + nhe.backup_info = bnhg; + n = zebra_nhe_copy(&nhe, 0); + } + ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p, re, n, + false); + + /* + * rib_add_multipath_nhe only fails in a couple spots + * and in those spots we have not freed memory + */ + if (ret == -1) { + client->error_cnt++; + XFREE(MTYPE_RE_OPAQUE, re->opaque); + XFREE(MTYPE_RE, re); + } + + /* At this point, these allocations are not needed: 're' has been + * retained or freed, and if 're' still exists, it is using + * a reference to a shared group object. + */ + nexthop_group_delete(&ng); + if (bnhg) + zebra_nhg_backup_free(&bnhg); + + /* Stats */ + switch (api.prefix.family) { + case AF_INET: + if (ret == 0) + client->v4_route_add_cnt++; + else if (ret == 1) + client->v4_route_upd8_cnt++; + break; + case AF_INET6: + if (ret == 0) + client->v6_route_add_cnt++; + else if (ret == 1) + client->v6_route_upd8_cnt++; + break; + } +} + +void zapi_re_opaque_free(struct re_opaque *opaque) +{ + XFREE(MTYPE_RE_OPAQUE, opaque); +} + +static void zread_route_del(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_route api; + afi_t afi; + struct prefix_ipv6 *src_p = NULL; + uint32_t table_id; + + s = msg; + if (zapi_route_decode(s, &api) < 0) + return; + + afi = family2afi(api.prefix.family); + if (afi != AFI_IP6 && CHECK_FLAG(api.message, ZAPI_MESSAGE_SRCPFX)) { + flog_warn(EC_ZEBRA_RX_SRCDEST_WRONG_AFI, + "%s: Received a src prefix while afi is not v6", + __func__); + return; + } + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_SRCPFX)) + src_p = &api.src_prefix; + + if (api.tableid) + table_id = api.tableid; + else + table_id = zvrf->table_id; + + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: p=(%u:%u)%pFX, msg flags=0x%x, flags=0x%x", + __func__, zvrf_id(zvrf), table_id, &api.prefix, + (int)api.message, api.flags); + + rib_delete(afi, api.safi, zvrf_id(zvrf), api.type, api.instance, + api.flags, &api.prefix, src_p, NULL, 0, table_id, api.metric, + api.distance, false); + + /* Stats */ + switch (api.prefix.family) { + case AF_INET: + client->v4_route_del_cnt++; + break; + case AF_INET6: + client->v6_route_del_cnt++; + break; + } +} + +/* MRIB Nexthop lookup for IPv4. */ +static void zread_nexthop_lookup_mrib(ZAPI_HANDLER_ARGS) +{ + struct ipaddr addr; + struct route_entry *re = NULL; + + STREAM_GET_IPADDR(msg, &addr); + + switch (addr.ipa_type) { + case IPADDR_V4: + re = rib_match_ipv4_multicast(zvrf_id(zvrf), addr.ipaddr_v4, + NULL); + break; + case IPADDR_V6: + re = rib_match_ipv6_multicast(zvrf_id(zvrf), addr.ipaddr_v6, + NULL); + break; + case IPADDR_NONE: + /* ??? */ + goto stream_failure; + } + + zsend_nexthop_lookup_mrib(client, &addr, re, zvrf); + +stream_failure: + return; +} + +/* Register zebra server router-id information. Send current router-id */ +static void zread_router_id_add(ZAPI_HANDLER_ARGS) +{ + afi_t afi; + struct prefix p; + struct prefix zero; + + STREAM_GETW(msg, afi); + + if (afi <= AFI_UNSPEC || afi >= AFI_MAX) { + zlog_warn( + "Invalid AFI %u while registering for router ID notifications", + afi); + goto stream_failure; + } + + /* Router-id information is needed. */ + vrf_bitmap_set(client->ridinfo[afi], zvrf_id(zvrf)); + + router_id_get(afi, &p, zvrf); + + /* + * If we have not officially setup a router-id let's not + * tell the upper level protocol about it yet. + */ + memset(&zero, 0, sizeof(zero)); + if ((p.family == AF_INET && p.u.prefix4.s_addr == INADDR_ANY) + || (p.family == AF_INET6 + && memcmp(&p.u.prefix6, &zero.u.prefix6, + sizeof(struct in6_addr)) + == 0)) + return; + + zsend_router_id_update(client, afi, &p, zvrf_id(zvrf)); + +stream_failure: + return; +} + +/* Unregister zebra server router-id information. */ +static void zread_router_id_delete(ZAPI_HANDLER_ARGS) +{ + afi_t afi; + + STREAM_GETW(msg, afi); + + if (afi <= AFI_UNSPEC || afi >= AFI_MAX) { + zlog_warn( + "Invalid AFI %u while unregistering from router ID notifications", + afi); + goto stream_failure; + } + + vrf_bitmap_unset(client->ridinfo[afi], zvrf_id(zvrf)); + +stream_failure: + return; +} + +static void zsend_capabilities(struct zserv *client, struct zebra_vrf *zvrf) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_CAPABILITIES, zvrf->vrf->vrf_id); + stream_putl(s, vrf_get_backend()); + stream_putc(s, mpls_enabled); + stream_putl(s, zrouter.multipath_num); + stream_putc(s, zebra_mlag_get_role()); + + stream_putw_at(s, 0, stream_get_endp(s)); + zserv_send_message(client, s); +} + +void zsend_capabilities_all_clients(void) +{ + struct listnode *node, *nnode; + struct zebra_vrf *zvrf; + struct zserv *client; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_capabilities(client, zvrf); + } +} + +/* Tie up route-type and client->sock */ +static void zread_hello(ZAPI_HANDLER_ARGS) +{ + /* type of protocol (lib/zebra.h) */ + uint8_t proto; + unsigned short instance; + uint8_t notify; + uint8_t synchronous; + uint32_t session_id; + + STREAM_GETC(msg, proto); + STREAM_GETW(msg, instance); + STREAM_GETL(msg, session_id); + STREAM_GETC(msg, notify); + STREAM_GETC(msg, synchronous); + if (notify) + client->notify_owner = true; + + if (synchronous) + client->synchronous = true; + + /* accept only dynamic routing protocols */ + if ((proto < ZEBRA_ROUTE_MAX) && (proto > ZEBRA_ROUTE_CONNECT)) { + zlog_notice( + "client %d says hello and bids fair to announce only %s routes vrf=%u", + client->sock, zebra_route_string(proto), + zvrf->vrf->vrf_id); + if (instance) + zlog_notice("client protocol instance %d", instance); + + client->proto = proto; + client->instance = instance; + client->session_id = session_id; + + /* Graceful restart processing for client connect */ + zebra_gr_client_reconnect(client); + } + + if (!client->synchronous) { + zsend_capabilities(client, zvrf); + zebra_vrf_update_all(client); + } +stream_failure: + return; +} + +/* Unregister all information in a VRF. */ +static void zread_vrf_unregister(ZAPI_HANDLER_ARGS) +{ + int i; + afi_t afi; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) + vrf_bitmap_unset(client->redist[afi][i], zvrf_id(zvrf)); + vrf_bitmap_unset(client->redist_default[afi], zvrf_id(zvrf)); + vrf_bitmap_unset(client->ridinfo[afi], zvrf_id(zvrf)); + vrf_bitmap_unset(client->nhrp_neighinfo[afi], zvrf_id(zvrf)); + } +} + +/* + * Validate incoming zapi mpls lsp / labels message + */ +static int zapi_labels_validate(const struct zapi_labels *zl) +{ + int ret = -1; + int i, j, idx; + uint32_t bits[8]; + uint32_t ival; + const struct zapi_nexthop *znh; + + /* Validate backup info: no duplicates for a single primary */ + if (zl->backup_nexthop_num == 0) { + ret = 0; + goto done; + } + + for (j = 0; j < zl->nexthop_num; j++) { + znh = &zl->nexthops[j]; + + memset(bits, 0, sizeof(bits)); + + for (i = 0; i < znh->backup_num; i++) { + idx = znh->backup_idx[i] / 32; + + ival = 1 << znh->backup_idx[i] % 32; + + /* Check whether value is already used */ + if (ival & bits[idx]) { + /* Fail */ + + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: invalid zapi mpls message: duplicate backup nexthop index %d", + __func__, + znh->backup_idx[i]); + goto done; + } + + /* Mark index value */ + bits[idx] |= ival; + } + } + + ret = 0; + +done: + + return ret; +} + +/* + * Handle request to create an MPLS LSP. + * + * A single message can fully specify an LSP with multiple nexthops. + * + * When the optional ZAPI_LABELS_FTN flag is set, the specified FEC (route) is + * updated to use the received label(s). + */ +static void zread_mpls_labels_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_labels zl; + + /* Get input stream. */ + s = msg; + if (zapi_labels_decode(s, &zl) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_labels sent", + __func__); + return; + } + + if (!mpls_enabled) + return; + + /* Validate; will debug on failure */ + if (zapi_labels_validate(&zl) < 0) + return; + + mpls_zapi_labels_process(true, zvrf, &zl); +} + +/* + * Handle request to delete an MPLS LSP. + * + * An LSP is identified by its type and local label. When the received message + * doesn't contain any nexthop, the whole LSP is deleted. Otherwise, only the + * listed LSP nexthops (aka NHLFEs) are deleted. + * + * When the optional ZAPI_LABELS_FTN flag is set, the labels of the specified + * FEC (route) nexthops are deleted. + */ +static void zread_mpls_labels_delete(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_labels zl; + + /* Get input stream. */ + s = msg; + if (zapi_labels_decode(s, &zl) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_labels sent", + __func__); + return; + } + + if (!mpls_enabled) + return; + + if (zl.nexthop_num > 0) { + mpls_zapi_labels_process(false /*delete*/, zvrf, &zl); + } else { + mpls_lsp_uninstall_all_vrf(zvrf, zl.type, zl.local_label); + + if (CHECK_FLAG(zl.message, ZAPI_LABELS_FTN)) + mpls_ftn_uninstall(zvrf, zl.type, &zl.route.prefix, + zl.route.type, zl.route.instance); + } +} + +/* + * Handle request to add an MPLS LSP or change an existing one. + * + * A single message can fully specify an LSP with multiple nexthops. + * + * When the optional ZAPI_LABELS_FTN flag is set, the specified FEC (route) is + * updated to use the received label(s). + * + * NOTE: zebra will use route replace semantics (make-before-break) to update + * the LSP in the forwarding plane if that's supported by the underlying + * platform. + */ +static void zread_mpls_labels_replace(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_labels zl; + + /* Get input stream. */ + s = msg; + if (zapi_labels_decode(s, &zl) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_labels sent", + __func__); + return; + } + + if (!mpls_enabled) + return; + + /* Validate; will debug on failure */ + if (zapi_labels_validate(&zl) < 0) + return; + + /* This removes everything, then re-adds from the client's + * zapi message. Since the LSP will be processed later, on this + * this same pthread, all of the changes will 'appear' at once. + */ + mpls_lsp_uninstall_all_vrf(zvrf, zl.type, zl.local_label); + if (CHECK_FLAG(zl.message, ZAPI_LABELS_FTN)) + mpls_ftn_uninstall(zvrf, zl.type, &zl.route.prefix, + zl.route.type, zl.route.instance); + + mpls_zapi_labels_process(true, zvrf, &zl); +} + +static void zread_sr_policy_set(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_sr_policy zp; + struct zapi_srte_tunnel *zt; + struct zebra_sr_policy *policy; + + /* Get input stream. */ + s = msg; + if (zapi_sr_policy_decode(s, &zp) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_sr_policy sent", + __func__); + return; + } + zt = &zp.segment_list; + if (zt->label_num < 1) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug( + "%s: SR-TE tunnel must contain at least one label", + __func__); + return; + } + + if (!mpls_enabled) + return; + + policy = zebra_sr_policy_find(zp.color, &zp.endpoint); + if (!policy) + policy = zebra_sr_policy_add(zp.color, &zp.endpoint, zp.name); + /* TODO: per-VRF list of SR-TE policies. */ + policy->zvrf = zvrf; + + zebra_sr_policy_validate(policy, &zp.segment_list); +} + +static void zread_sr_policy_delete(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_sr_policy zp; + struct zebra_sr_policy *policy; + + /* Get input stream. */ + s = msg; + if (zapi_sr_policy_decode(s, &zp) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_sr_policy sent", + __func__); + return; + } + + if (!mpls_enabled) + return; + + policy = zebra_sr_policy_find(zp.color, &zp.endpoint); + if (!policy) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to find SR-TE policy", __func__); + return; + } + + zebra_sr_policy_del(policy); +} + +int zsend_sr_policy_notify_status(uint32_t color, struct ipaddr *endpoint, + char *name, int status) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(ZEBRA_ROUTE_SRTE, 0); + if (!client) { + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Not notifying pathd about policy %s" + " status change to %d", + name, status); + return 0; + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug( + "Notifying pathd about policy %s status change" + " to %d", + name, status); + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + zclient_create_header(s, ZEBRA_SR_POLICY_NOTIFY_STATUS, VRF_DEFAULT); + stream_putl(s, color); + stream_put_ipaddr(s, endpoint); + stream_write(s, name, SRTE_POLICY_NAME_MAX_LENGTH); + stream_putl(s, status); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +/* Send client close notify to client */ +int zsend_client_close_notify(struct zserv *client, struct zserv *closed_client) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_CLIENT_CLOSE_NOTIFY, VRF_DEFAULT); + + stream_putc(s, closed_client->proto); + stream_putw(s, closed_client->instance); + stream_putl(s, closed_client->session_id); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + +int zsend_srv6_manager_get_locator_chunk_response(struct zserv *client, + vrf_id_t vrf_id, + struct srv6_locator *loc) +{ + struct srv6_locator_chunk chunk = {}; + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + strlcpy(chunk.locator_name, loc->name, sizeof(chunk.locator_name)); + chunk.prefix = loc->prefix; + chunk.block_bits_length = loc->block_bits_length; + chunk.node_bits_length = loc->node_bits_length; + chunk.function_bits_length = loc->function_bits_length; + chunk.argument_bits_length = loc->argument_bits_length; + chunk.keep = 0; + chunk.proto = client->proto; + chunk.instance = client->instance; + + zclient_create_header(s, ZEBRA_SRV6_MANAGER_GET_LOCATOR_CHUNK, vrf_id); + zapi_srv6_locator_chunk_encode(s, &chunk); + stream_putw_at(s, 0, stream_get_endp(s)); + return zserv_send_message(client, s); +} + +/* Send response to a table manager connect request to client */ +static void zread_table_manager_connect(struct zserv *client, + struct stream *msg, vrf_id_t vrf_id) +{ + struct stream *s; + uint8_t proto; + uint16_t instance; + struct vrf *vrf = vrf_lookup_by_id(vrf_id); + + s = msg; + + /* Get data. */ + STREAM_GETC(s, proto); + STREAM_GETW(s, instance); + + /* accept only dynamic routing protocols */ + if ((proto >= ZEBRA_ROUTE_MAX) || (proto <= ZEBRA_ROUTE_STATIC)) { + flog_err(EC_ZEBRA_TM_WRONG_PROTO, + "client %d has wrong protocol %s", client->sock, + zebra_route_string(proto)); + zsend_table_manager_connect_response(client, vrf_id, 1); + return; + } + zlog_notice("client %d with vrf %s(%u) instance %u connected as %s", + client->sock, VRF_LOGNAME(vrf), vrf_id, instance, + zebra_route_string(proto)); + client->proto = proto; + client->instance = instance; + + /* + * Release previous labels of same protocol and instance. + * This is done in case it restarted from an unexpected shutdown. + */ + release_daemon_table_chunks(client); + + zsend_table_manager_connect_response(client, vrf_id, 0); + +stream_failure: + return; +} + +static void zread_label_manager_connect(struct zserv *client, + struct stream *msg, vrf_id_t vrf_id) +{ + struct stream *s; + /* type of protocol (lib/zebra.h) */ + uint8_t proto; + unsigned short instance; + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETC(s, proto); + STREAM_GETW(s, instance); + + /* accept only dynamic routing protocols */ + if ((proto >= ZEBRA_ROUTE_MAX) || (proto <= ZEBRA_ROUTE_STATIC)) { + flog_err(EC_ZEBRA_TM_WRONG_PROTO, + "client %d has wrong protocol %s", client->sock, + zebra_route_string(proto)); + zsend_label_manager_connect_response(client, vrf_id, 1); + return; + } + + /* recall proto and instance in this socket */ + client->proto = proto; + client->instance = instance; + + /* call hook for connection using wrapper */ + lm_client_connect_call(client, vrf_id); + +stream_failure: + return; +} + +static void zread_get_label_chunk(struct zserv *client, struct stream *msg, + vrf_id_t vrf_id) +{ + struct stream *s; + uint8_t keep; + uint32_t size, base; + struct label_manager_chunk *lmc = NULL; + uint8_t proto; + unsigned short instance; + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETC(s, proto); + STREAM_GETW(s, instance); + STREAM_GETC(s, keep); + STREAM_GETL(s, size); + STREAM_GETL(s, base); + + assert(proto == client->proto && instance == client->instance); + + /* call hook to get a chunk using wrapper */ + lm_get_chunk_call(&lmc, client, keep, size, base, vrf_id); + +stream_failure: + return; +} + +static void zread_release_label_chunk(struct zserv *client, struct stream *msg) +{ + struct stream *s; + uint32_t start, end; + uint8_t proto; + unsigned short instance; + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETC(s, proto); + STREAM_GETW(s, instance); + STREAM_GETL(s, start); + STREAM_GETL(s, end); + + assert(proto == client->proto && instance == client->instance); + + /* call hook to release a chunk using wrapper */ + lm_release_chunk_call(client, start, end); + +stream_failure: + return; +} + +static void zread_label_manager_request(ZAPI_HANDLER_ARGS) +{ + if (hdr->command == ZEBRA_LABEL_MANAGER_CONNECT + || hdr->command == ZEBRA_LABEL_MANAGER_CONNECT_ASYNC) + zread_label_manager_connect(client, msg, zvrf_id(zvrf)); + else { + if (hdr->command == ZEBRA_GET_LABEL_CHUNK) + zread_get_label_chunk(client, msg, zvrf_id(zvrf)); + else if (hdr->command == ZEBRA_RELEASE_LABEL_CHUNK) + zread_release_label_chunk(client, msg); + } +} + +static void zread_get_table_chunk(struct zserv *client, struct stream *msg, + struct zebra_vrf *zvrf) +{ + struct stream *s; + uint32_t size; + struct table_manager_chunk *tmc; + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETL(s, size); + + tmc = assign_table_chunk(client->proto, client->instance, size, zvrf); + if (!tmc) + flog_err(EC_ZEBRA_TM_CANNOT_ASSIGN_CHUNK, + "%s: Unable to assign Table Chunk of size %u", + __func__, size); + else + zlog_debug("Assigned Table Chunk %u - %u", tmc->start, + tmc->end); + /* send response back */ + zsend_assign_table_chunk_response(client, zvrf_id(zvrf), tmc); + +stream_failure: + return; +} + +static void zread_release_table_chunk(struct zserv *client, struct stream *msg, + struct zebra_vrf *zvrf) +{ + struct stream *s; + uint32_t start, end; + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETL(s, start); + STREAM_GETL(s, end); + + release_table_chunk(client->proto, client->instance, start, end, zvrf); + +stream_failure: + return; +} + +static void zread_table_manager_request(ZAPI_HANDLER_ARGS) +{ + /* to avoid sending other messages like ZEBRA_INTERFACE_UP */ + if (hdr->command == ZEBRA_TABLE_MANAGER_CONNECT) + zread_table_manager_connect(client, msg, zvrf_id(zvrf)); + else { + /* Sanity: don't allow 'unidentified' requests */ + if (!client->proto) { + flog_err( + EC_ZEBRA_TM_ALIENS, + "Got SRv6 request from an unidentified client"); + return; + } + if (hdr->command == ZEBRA_GET_TABLE_CHUNK) + zread_get_table_chunk(client, msg, zvrf); + else if (hdr->command == ZEBRA_RELEASE_TABLE_CHUNK) + zread_release_table_chunk(client, msg, zvrf); + } +} + +static void zread_srv6_manager_get_locator_chunk(struct zserv *client, + struct stream *msg, + vrf_id_t vrf_id) +{ + struct stream *s = msg; + uint16_t len; + char locator_name[SRV6_LOCNAME_SIZE] = {0}; + + /* Get data. */ + STREAM_GETW(s, len); + STREAM_GET(locator_name, s, len); + + /* call hook to get a chunk using wrapper */ + struct srv6_locator *loc = NULL; + srv6_manager_get_locator_chunk_call(&loc, client, locator_name, vrf_id); + +stream_failure: + return; +} + +static void zread_srv6_manager_release_locator_chunk(struct zserv *client, + struct stream *msg, + vrf_id_t vrf_id) +{ + struct stream *s = msg; + uint16_t len; + char locator_name[SRV6_LOCNAME_SIZE] = {0}; + + /* Get data. */ + STREAM_GETW(s, len); + STREAM_GET(locator_name, s, len); + + /* call hook to release a chunk using wrapper */ + srv6_manager_release_locator_chunk_call(client, locator_name, vrf_id); + +stream_failure: + return; +} + +static void zread_srv6_manager_request(ZAPI_HANDLER_ARGS) +{ + switch (hdr->command) { + case ZEBRA_SRV6_MANAGER_GET_LOCATOR_CHUNK: + zread_srv6_manager_get_locator_chunk(client, msg, + zvrf_id(zvrf)); + break; + case ZEBRA_SRV6_MANAGER_RELEASE_LOCATOR_CHUNK: + zread_srv6_manager_release_locator_chunk(client, msg, + zvrf_id(zvrf)); + break; + default: + zlog_err("%s: unknown SRv6 Manager command", __func__); + break; + } +} + +static void zread_pseudowire(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + char ifname[INTERFACE_NAMSIZ]; + ifindex_t ifindex; + int type; + int af; + union g_addr nexthop; + uint32_t local_label; + uint32_t remote_label; + uint8_t flags; + union pw_protocol_fields data; + uint8_t protocol; + struct zebra_pw *pw; + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GET(ifname, s, INTERFACE_NAMSIZ); + ifname[INTERFACE_NAMSIZ - 1] = '\0'; + STREAM_GETL(s, ifindex); + STREAM_GETL(s, type); + STREAM_GETL(s, af); + switch (af) { + case AF_INET: + STREAM_GET(&nexthop.ipv4.s_addr, s, IPV4_MAX_BYTELEN); + break; + case AF_INET6: + STREAM_GET(&nexthop.ipv6, s, 16); + break; + default: + return; + } + STREAM_GETL(s, local_label); + STREAM_GETL(s, remote_label); + STREAM_GETC(s, flags); + STREAM_GET(&data, s, sizeof(data)); + protocol = client->proto; + + pw = zebra_pw_find(zvrf, ifname); + switch (hdr->command) { + case ZEBRA_PW_ADD: + if (pw) { + flog_warn(EC_ZEBRA_PSEUDOWIRE_EXISTS, + "%s: pseudowire %s already exists [%s]", + __func__, ifname, + zserv_command_string(hdr->command)); + return; + } + + zebra_pw_add(zvrf, ifname, protocol, client); + break; + case ZEBRA_PW_DELETE: + if (!pw) { + flog_warn(EC_ZEBRA_PSEUDOWIRE_NONEXISTENT, + "%s: pseudowire %s not found [%s]", __func__, + ifname, zserv_command_string(hdr->command)); + return; + } + + zebra_pw_del(zvrf, pw); + break; + case ZEBRA_PW_SET: + case ZEBRA_PW_UNSET: + if (!pw) { + flog_warn(EC_ZEBRA_PSEUDOWIRE_NONEXISTENT, + "%s: pseudowire %s not found [%s]", __func__, + ifname, zserv_command_string(hdr->command)); + return; + } + + switch (hdr->command) { + case ZEBRA_PW_SET: + pw->enabled = 1; + break; + case ZEBRA_PW_UNSET: + pw->enabled = 0; + break; + } + + zebra_pw_change(pw, ifindex, type, af, &nexthop, local_label, + remote_label, flags, &data); + break; + } + +stream_failure: + return; +} + +static void zread_interface_set_master(ZAPI_HANDLER_ARGS) +{ + struct interface *master; + struct interface *slave; + struct stream *s = msg; + int ifindex; + vrf_id_t vrf_id; + + STREAM_GETL(s, vrf_id); + STREAM_GETL(s, ifindex); + master = if_lookup_by_index(ifindex, vrf_id); + + STREAM_GETL(s, vrf_id); + STREAM_GETL(s, ifindex); + slave = if_lookup_by_index(ifindex, vrf_id); + + if (!master || !slave) + return; + + kernel_interface_set_master(master, slave); + +stream_failure: + return; +} + + +static void zread_vrf_label(ZAPI_HANDLER_ARGS) +{ + struct interface *ifp; + mpls_label_t nlabel; + afi_t afi; + struct stream *s; + struct zebra_vrf *def_zvrf; + enum lsp_types_t ltype; + + s = msg; + STREAM_GETL(s, nlabel); + STREAM_GETC(s, afi); + + if (!(IS_VALID_AFI(afi))) { + zlog_warn("Invalid AFI for VRF label: %u", afi); + return; + } + + if (nlabel == zvrf->label[afi]) { + /* + * Nothing to do here move along + */ + return; + } + + STREAM_GETC(s, ltype); + + if (zvrf->vrf->vrf_id != VRF_DEFAULT) + ifp = if_lookup_by_name(zvrf->vrf->name, zvrf->vrf->vrf_id); + else + ifp = if_lookup_by_name("lo", VRF_DEFAULT); + + if (!ifp) { + zlog_debug("Unable to find specified Interface for %s", + zvrf->vrf->name); + return; + } + + def_zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + if (zvrf->label[afi] != MPLS_LABEL_NONE) { + afi_t scrubber; + bool really_remove; + + really_remove = true; + for (scrubber = AFI_IP; scrubber < AFI_MAX; scrubber++) { + if (scrubber == afi) + continue; + + if (zvrf->label[scrubber] == MPLS_LABEL_NONE) + continue; + + if (zvrf->label[afi] == zvrf->label[scrubber]) { + really_remove = false; + break; + } + } + + if (really_remove) + mpls_lsp_uninstall(def_zvrf, ltype, zvrf->label[afi], + NEXTHOP_TYPE_IFINDEX, NULL, + ifp->ifindex, false /*backup*/); + } + + if (nlabel != MPLS_LABEL_NONE) { + mpls_label_t out_label = MPLS_LABEL_IMPLICIT_NULL; + mpls_lsp_install(def_zvrf, ltype, nlabel, 1, &out_label, + NEXTHOP_TYPE_IFINDEX, NULL, ifp->ifindex); + } + + zvrf->label[afi] = nlabel; + zvrf->label_proto[afi] = client->proto; + +stream_failure: + return; +} + +static inline void zread_rule(ZAPI_HANDLER_ARGS) +{ + struct zebra_pbr_rule zpr; + struct stream *s; + uint32_t total, i; + char ifname[INTERFACE_NAMSIZ + 1] = {}; + + s = msg; + STREAM_GETL(s, total); + + for (i = 0; i < total; i++) { + memset(&zpr, 0, sizeof(zpr)); + + zpr.sock = client->sock; + zpr.rule.vrf_id = hdr->vrf_id; + STREAM_GETL(s, zpr.rule.seq); + STREAM_GETL(s, zpr.rule.priority); + STREAM_GETL(s, zpr.rule.unique); + STREAM_GETC(s, zpr.rule.filter.ip_proto); + STREAM_GETC(s, zpr.rule.filter.src_ip.family); + STREAM_GETC(s, zpr.rule.filter.src_ip.prefixlen); + STREAM_GET(&zpr.rule.filter.src_ip.u.prefix, s, + prefix_blen(&zpr.rule.filter.src_ip)); + STREAM_GETW(s, zpr.rule.filter.src_port); + STREAM_GETC(s, zpr.rule.filter.dst_ip.family); + STREAM_GETC(s, zpr.rule.filter.dst_ip.prefixlen); + STREAM_GET(&zpr.rule.filter.dst_ip.u.prefix, s, + prefix_blen(&zpr.rule.filter.dst_ip)); + STREAM_GETW(s, zpr.rule.filter.dst_port); + STREAM_GETC(s, zpr.rule.filter.dsfield); + STREAM_GETL(s, zpr.rule.filter.fwmark); + + STREAM_GETL(s, zpr.rule.action.queue_id); + STREAM_GETW(s, zpr.rule.action.vlan_id); + STREAM_GETW(s, zpr.rule.action.vlan_flags); + STREAM_GETW(s, zpr.rule.action.pcp); + + STREAM_GETL(s, zpr.rule.action.table); + STREAM_GET(ifname, s, INTERFACE_NAMSIZ); + + strlcpy(zpr.ifname, ifname, sizeof(zpr.ifname)); + strlcpy(zpr.rule.ifname, ifname, sizeof(zpr.rule.ifname)); + + if (!is_default_prefix(&zpr.rule.filter.src_ip)) + zpr.rule.filter.filter_bm |= PBR_FILTER_SRC_IP; + + if (!is_default_prefix(&zpr.rule.filter.dst_ip)) + zpr.rule.filter.filter_bm |= PBR_FILTER_DST_IP; + + if (zpr.rule.filter.src_port) + zpr.rule.filter.filter_bm |= PBR_FILTER_SRC_PORT; + + if (zpr.rule.filter.dst_port) + zpr.rule.filter.filter_bm |= PBR_FILTER_DST_PORT; + + if (zpr.rule.filter.dsfield) + zpr.rule.filter.filter_bm |= PBR_FILTER_DSFIELD; + + if (zpr.rule.filter.ip_proto) + zpr.rule.filter.filter_bm |= PBR_FILTER_IP_PROTOCOL; + + if (zpr.rule.filter.fwmark) + zpr.rule.filter.filter_bm |= PBR_FILTER_FWMARK; + + if (!(zpr.rule.filter.src_ip.family == AF_INET + || zpr.rule.filter.src_ip.family == AF_INET6)) { + zlog_warn( + "Unsupported PBR source IP family: %s (%hhu)", + family2str(zpr.rule.filter.src_ip.family), + zpr.rule.filter.src_ip.family); + return; + } + if (!(zpr.rule.filter.dst_ip.family == AF_INET + || zpr.rule.filter.dst_ip.family == AF_INET6)) { + zlog_warn( + "Unsupported PBR destination IP family: %s (%hhu)", + family2str(zpr.rule.filter.dst_ip.family), + zpr.rule.filter.dst_ip.family); + return; + } + + + zpr.vrf_id = zvrf->vrf->vrf_id; + if (hdr->command == ZEBRA_RULE_ADD) + zebra_pbr_add_rule(&zpr); + else + zebra_pbr_del_rule(&zpr); + } + +stream_failure: + return; +} + +static inline void zread_ipset(ZAPI_HANDLER_ARGS) +{ + struct zebra_pbr_ipset zpi; + struct stream *s; + uint32_t total, i; + + s = msg; + STREAM_GETL(s, total); + + for (i = 0; i < total; i++) { + memset(&zpi, 0, sizeof(zpi)); + + zpi.sock = client->sock; + zpi.vrf_id = zvrf->vrf->vrf_id; + STREAM_GETL(s, zpi.unique); + STREAM_GETL(s, zpi.type); + STREAM_GETC(s, zpi.family); + STREAM_GET(&zpi.ipset_name, s, ZEBRA_IPSET_NAME_SIZE); + + if (hdr->command == ZEBRA_IPSET_CREATE) + zebra_pbr_create_ipset(&zpi); + else + zebra_pbr_destroy_ipset(&zpi); + } + +stream_failure: + return; +} + +static inline void zread_ipset_entry(ZAPI_HANDLER_ARGS) +{ + struct zebra_pbr_ipset_entry zpi; + struct zebra_pbr_ipset ipset; + struct stream *s; + uint32_t total, i; + + s = msg; + STREAM_GETL(s, total); + + for (i = 0; i < total; i++) { + memset(&zpi, 0, sizeof(zpi)); + memset(&ipset, 0, sizeof(ipset)); + + zpi.sock = client->sock; + STREAM_GETL(s, zpi.unique); + STREAM_GET(&ipset.ipset_name, s, ZEBRA_IPSET_NAME_SIZE); + ipset.ipset_name[ZEBRA_IPSET_NAME_SIZE - 1] = '\0'; + STREAM_GETC(s, zpi.src.family); + STREAM_GETC(s, zpi.src.prefixlen); + STREAM_GET(&zpi.src.u.prefix, s, prefix_blen(&zpi.src)); + STREAM_GETC(s, zpi.dst.family); + STREAM_GETC(s, zpi.dst.prefixlen); + STREAM_GET(&zpi.dst.u.prefix, s, prefix_blen(&zpi.dst)); + + STREAM_GETW(s, zpi.src_port_min); + STREAM_GETW(s, zpi.src_port_max); + STREAM_GETW(s, zpi.dst_port_min); + STREAM_GETW(s, zpi.dst_port_max); + STREAM_GETC(s, zpi.proto); + if (!is_default_prefix(&zpi.src)) + zpi.filter_bm |= PBR_FILTER_SRC_IP; + + if (!is_default_prefix(&zpi.dst)) + zpi.filter_bm |= PBR_FILTER_DST_IP; + if (zpi.dst_port_min != 0 || zpi.proto == IPPROTO_ICMP) + zpi.filter_bm |= PBR_FILTER_DST_PORT; + if (zpi.src_port_min != 0 || zpi.proto == IPPROTO_ICMP) + zpi.filter_bm |= PBR_FILTER_SRC_PORT; + if (zpi.dst_port_max != 0) + zpi.filter_bm |= PBR_FILTER_DST_PORT_RANGE; + if (zpi.src_port_max != 0) + zpi.filter_bm |= PBR_FILTER_SRC_PORT_RANGE; + if (zpi.proto != 0) + zpi.filter_bm |= PBR_FILTER_PROTO; + + if (!(zpi.dst.family == AF_INET + || zpi.dst.family == AF_INET6)) { + zlog_warn( + "Unsupported PBR destination IP family: %s (%hhu)", + family2str(zpi.dst.family), zpi.dst.family); + goto stream_failure; + } + if (!(zpi.src.family == AF_INET + || zpi.src.family == AF_INET6)) { + zlog_warn( + "Unsupported PBR source IP family: %s (%hhu)", + family2str(zpi.src.family), zpi.src.family); + goto stream_failure; + } + + /* calculate backpointer */ + zpi.backpointer = + zebra_pbr_lookup_ipset_pername(ipset.ipset_name); + + if (!zpi.backpointer) { + zlog_warn("ipset name specified: %s does not exist", + ipset.ipset_name); + goto stream_failure; + } + + if (hdr->command == ZEBRA_IPSET_ENTRY_ADD) + zebra_pbr_add_ipset_entry(&zpi); + else + zebra_pbr_del_ipset_entry(&zpi); + } + +stream_failure: + return; +} + + +static inline void zebra_neigh_register(ZAPI_HANDLER_ARGS) +{ + afi_t afi; + + STREAM_GETW(msg, afi); + if (afi <= AFI_UNSPEC || afi >= AFI_MAX) { + zlog_warn( + "Invalid AFI %u while registering for neighbors notifications", + afi); + goto stream_failure; + } + vrf_bitmap_set(client->nhrp_neighinfo[afi], zvrf_id(zvrf)); +stream_failure: + return; +} + +static inline void zebra_neigh_unregister(ZAPI_HANDLER_ARGS) +{ + afi_t afi; + + STREAM_GETW(msg, afi); + if (afi <= AFI_UNSPEC || afi >= AFI_MAX) { + zlog_warn( + "Invalid AFI %u while unregistering from neighbor notifications", + afi); + goto stream_failure; + } + vrf_bitmap_unset(client->nhrp_neighinfo[afi], zvrf_id(zvrf)); +stream_failure: + return; +} + +static inline void zebra_gre_get(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + ifindex_t idx; + struct interface *ifp; + struct zebra_if *zebra_if = NULL; + struct zebra_l2info_gre *gre_info; + struct interface *ifp_link = NULL; + vrf_id_t vrf_id_link = VRF_UNKNOWN; + vrf_id_t vrf_id = zvrf->vrf->vrf_id; + + s = msg; + STREAM_GETL(s, idx); + ifp = if_lookup_by_index(idx, vrf_id); + + if (ifp) + zebra_if = ifp->info; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_GRE_UPDATE, vrf_id); + + if (ifp && IS_ZEBRA_IF_GRE(ifp) && zebra_if) { + gre_info = &zebra_if->l2info.gre; + + stream_putl(s, idx); + stream_putl(s, gre_info->ikey); + stream_putl(s, gre_info->ikey); + stream_putl(s, gre_info->ifindex_link); + + ifp_link = if_lookup_by_index_per_ns( + zebra_ns_lookup(gre_info->link_nsid), + gre_info->ifindex_link); + if (ifp_link) + vrf_id_link = ifp_link->vrf->vrf_id; + stream_putl(s, vrf_id_link); + stream_putl(s, gre_info->vtep_ip.s_addr); + stream_putl(s, gre_info->vtep_ip_remote.s_addr); + } else { + stream_putl(s, idx); + stream_putl(s, 0); + stream_putl(s, 0); + stream_putl(s, IFINDEX_INTERNAL); + stream_putl(s, VRF_UNKNOWN); + stream_putl(s, 0); + stream_putl(s, 0); + } + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + zserv_send_message(client, s); + + return; + stream_failure: + return; +} + +static inline void zebra_configure_arp(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + uint8_t fam; + ifindex_t idx; + struct interface *ifp; + + s = msg; + STREAM_GETC(s, fam); + if (fam != AF_INET && fam != AF_INET6) + return; + STREAM_GETL(s, idx); + ifp = if_lookup_by_index_per_ns(zvrf->zns, idx); + if (!ifp) + return; + dplane_neigh_table_update(ifp, fam, 1, 0, 0); +stream_failure: + return; +} + +static inline void zebra_neigh_ip_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_neigh_ip api = {}; + int ret; + const struct interface *ifp; + + s = msg; + ret = zclient_neigh_ip_decode(s, &api); + if (ret < 0) + return; + ifp = if_lookup_by_index(api.index, zvrf_id(zvrf)); + if (!ifp) + return; + dplane_neigh_ip_update(DPLANE_OP_NEIGH_IP_INSTALL, ifp, &api.ip_out, + &api.ip_in, api.ndm_state, client->proto); +} + + +static inline void zebra_neigh_ip_del(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_neigh_ip api = {}; + int ret; + struct interface *ifp; + + s = msg; + ret = zclient_neigh_ip_decode(s, &api); + if (ret < 0) + return; + ifp = if_lookup_by_index(api.index, zvrf_id(zvrf)); + if (!ifp) + return; + dplane_neigh_ip_update(DPLANE_OP_NEIGH_IP_DELETE, ifp, &api.ip_out, + &api.ip_in, api.ndm_state, client->proto); +} + + +static inline void zread_iptable(ZAPI_HANDLER_ARGS) +{ + struct zebra_pbr_iptable *zpi = + XCALLOC(MTYPE_TMP, sizeof(struct zebra_pbr_iptable)); + struct stream *s; + + s = msg; + + zpi->interface_name_list = list_new(); + zpi->sock = client->sock; + zpi->vrf_id = zvrf->vrf->vrf_id; + STREAM_GETL(s, zpi->unique); + STREAM_GETL(s, zpi->type); + STREAM_GETL(s, zpi->filter_bm); + STREAM_GETL(s, zpi->action); + STREAM_GETL(s, zpi->fwmark); + STREAM_GET(&zpi->ipset_name, s, ZEBRA_IPSET_NAME_SIZE); + STREAM_GETC(s, zpi->family); + STREAM_GETW(s, zpi->pkt_len_min); + STREAM_GETW(s, zpi->pkt_len_max); + STREAM_GETW(s, zpi->tcp_flags); + STREAM_GETW(s, zpi->tcp_mask_flags); + STREAM_GETC(s, zpi->dscp_value); + STREAM_GETC(s, zpi->fragment); + STREAM_GETC(s, zpi->protocol); + STREAM_GETW(s, zpi->flow_label); + STREAM_GETL(s, zpi->nb_interface); + zebra_pbr_iptable_update_interfacelist(s, zpi); + + if (hdr->command == ZEBRA_IPTABLE_ADD) + zebra_pbr_add_iptable(zpi); + else + zebra_pbr_del_iptable(zpi); + +stream_failure: + zebra_pbr_iptable_free(zpi); + zpi = NULL; + return; +} + +static inline void zread_neigh_discover(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + ifindex_t ifindex; + struct interface *ifp; + struct prefix p; + struct ipaddr ip; + + s = msg; + + STREAM_GETL(s, ifindex); + + ifp = if_lookup_by_index_per_ns(zvrf->zns, ifindex); + if (!ifp) { + zlog_debug("Failed to lookup ifindex: %u", ifindex); + return; + } + + STREAM_GETC(s, p.family); + STREAM_GETC(s, p.prefixlen); + STREAM_GET(&p.u.prefix, s, prefix_blen(&p)); + + if (p.family == AF_INET) + SET_IPADDR_V4(&ip); + else + SET_IPADDR_V6(&ip); + + memcpy(&ip.ip.addr, &p.u.prefix, prefix_blen(&p)); + + dplane_neigh_discover(ifp, &ip); + +stream_failure: + return; +} + +static inline void zebra_gre_source_set(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + ifindex_t idx, link_idx; + vrf_id_t link_vrf_id; + struct interface *ifp; + struct interface *ifp_link; + vrf_id_t vrf_id = zvrf->vrf->vrf_id; + struct zebra_if *zif, *gre_zif; + struct zebra_l2info_gre *gre_info; + unsigned int mtu; + + s = msg; + STREAM_GETL(s, idx); + ifp = if_lookup_by_index(idx, vrf_id); + STREAM_GETL(s, link_idx); + STREAM_GETL(s, link_vrf_id); + STREAM_GETL(s, mtu); + + ifp_link = if_lookup_by_index(link_idx, link_vrf_id); + if (!ifp_link || !ifp) { + zlog_warn("GRE (index %u, VRF %u) or GRE link interface (index %u, VRF %u) not found, when setting GRE params", + idx, vrf_id, link_idx, link_vrf_id); + return; + } + + if (!IS_ZEBRA_IF_GRE(ifp)) + return; + + gre_zif = (struct zebra_if *)ifp->info; + zif = (struct zebra_if *)ifp_link->info; + if (!zif || !gre_zif) + return; + + gre_info = &zif->l2info.gre; + if (!gre_info) + return; + + if (!mtu) + mtu = ifp->mtu; + + /* if gre link already set or mtu did not change, do not set it */ + if (gre_zif->link && gre_zif->link == ifp_link && mtu == ifp->mtu) + return; + + dplane_gre_set(ifp, ifp_link, mtu, gre_info); + + stream_failure: + return; +} + +static void zsend_error_msg(struct zserv *client, enum zebra_error_types error, + struct zmsghdr *bad_hdr) +{ + + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_ERROR, bad_hdr->vrf_id); + + zserv_encode_error(s, error); + + client->error_cnt++; + zserv_send_message(client, s); +} + +static void zserv_error_no_vrf(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_PACKET && IS_ZEBRA_DEBUG_RECV) + zlog_debug("ZAPI message specifies unknown VRF: %d", + hdr->vrf_id); + + zsend_error_msg(client, ZEBRA_NO_VRF, hdr); +} + +static void zserv_error_invalid_msg_type(ZAPI_HANDLER_ARGS) +{ + zlog_info("Zebra received unknown command %d", hdr->command); + + zsend_error_msg(client, ZEBRA_INVALID_MSG_TYPE, hdr); +} + +void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = { + [ZEBRA_ROUTER_ID_ADD] = zread_router_id_add, + [ZEBRA_ROUTER_ID_DELETE] = zread_router_id_delete, + [ZEBRA_INTERFACE_ADD] = zread_interface_add, + [ZEBRA_INTERFACE_DELETE] = zread_interface_delete, + [ZEBRA_INTERFACE_SET_PROTODOWN] = zread_interface_set_protodown, + [ZEBRA_ROUTE_ADD] = zread_route_add, + [ZEBRA_ROUTE_DELETE] = zread_route_del, + [ZEBRA_REDISTRIBUTE_ADD] = zebra_redistribute_add, + [ZEBRA_REDISTRIBUTE_DELETE] = zebra_redistribute_delete, + [ZEBRA_REDISTRIBUTE_DEFAULT_ADD] = zebra_redistribute_default_add, + [ZEBRA_REDISTRIBUTE_DEFAULT_DELETE] = zebra_redistribute_default_delete, + [ZEBRA_NEXTHOP_LOOKUP_MRIB] = zread_nexthop_lookup_mrib, + [ZEBRA_HELLO] = zread_hello, + [ZEBRA_NEXTHOP_REGISTER] = zread_rnh_register, + [ZEBRA_NEXTHOP_UNREGISTER] = zread_rnh_unregister, + [ZEBRA_BFD_DEST_UPDATE] = zebra_ptm_bfd_dst_register, + [ZEBRA_BFD_DEST_REGISTER] = zebra_ptm_bfd_dst_register, + [ZEBRA_BFD_DEST_DEREGISTER] = zebra_ptm_bfd_dst_deregister, +#if HAVE_BFDD > 0 + [ZEBRA_BFD_DEST_REPLAY] = zebra_ptm_bfd_dst_replay, +#endif /* HAVE_BFDD */ + [ZEBRA_VRF_UNREGISTER] = zread_vrf_unregister, + [ZEBRA_VRF_LABEL] = zread_vrf_label, + [ZEBRA_BFD_CLIENT_REGISTER] = zebra_ptm_bfd_client_register, + [ZEBRA_INTERFACE_ENABLE_RADV] = zebra_interface_radv_enable, + [ZEBRA_INTERFACE_DISABLE_RADV] = zebra_interface_radv_disable, + [ZEBRA_SR_POLICY_SET] = zread_sr_policy_set, + [ZEBRA_SR_POLICY_DELETE] = zread_sr_policy_delete, + [ZEBRA_MPLS_LABELS_ADD] = zread_mpls_labels_add, + [ZEBRA_MPLS_LABELS_DELETE] = zread_mpls_labels_delete, + [ZEBRA_MPLS_LABELS_REPLACE] = zread_mpls_labels_replace, + [ZEBRA_IPMR_ROUTE_STATS] = zebra_ipmr_route_stats, + [ZEBRA_LABEL_MANAGER_CONNECT] = zread_label_manager_request, + [ZEBRA_LABEL_MANAGER_CONNECT_ASYNC] = zread_label_manager_request, + [ZEBRA_GET_LABEL_CHUNK] = zread_label_manager_request, + [ZEBRA_RELEASE_LABEL_CHUNK] = zread_label_manager_request, + [ZEBRA_FEC_REGISTER] = zread_fec_register, + [ZEBRA_FEC_UNREGISTER] = zread_fec_unregister, + [ZEBRA_ADVERTISE_DEFAULT_GW] = zebra_vxlan_advertise_gw_macip, + [ZEBRA_ADVERTISE_SVI_MACIP] = zebra_vxlan_advertise_svi_macip, + [ZEBRA_ADVERTISE_SUBNET] = zebra_vxlan_advertise_subnet, + [ZEBRA_ADVERTISE_ALL_VNI] = zebra_vxlan_advertise_all_vni, + [ZEBRA_REMOTE_ES_VTEP_ADD] = zebra_evpn_proc_remote_es, + [ZEBRA_REMOTE_ES_VTEP_DEL] = zebra_evpn_proc_remote_es, + [ZEBRA_REMOTE_VTEP_ADD] = zebra_vxlan_remote_vtep_add_zapi, + [ZEBRA_REMOTE_VTEP_DEL] = zebra_vxlan_remote_vtep_del_zapi, + [ZEBRA_REMOTE_MACIP_ADD] = zebra_vxlan_remote_macip_add, + [ZEBRA_REMOTE_MACIP_DEL] = zebra_vxlan_remote_macip_del, + [ZEBRA_DUPLICATE_ADDR_DETECTION] = zebra_vxlan_dup_addr_detection, + [ZEBRA_INTERFACE_SET_MASTER] = zread_interface_set_master, + [ZEBRA_PW_ADD] = zread_pseudowire, + [ZEBRA_PW_DELETE] = zread_pseudowire, + [ZEBRA_PW_SET] = zread_pseudowire, + [ZEBRA_PW_UNSET] = zread_pseudowire, + [ZEBRA_RULE_ADD] = zread_rule, + [ZEBRA_RULE_DELETE] = zread_rule, + [ZEBRA_TABLE_MANAGER_CONNECT] = zread_table_manager_request, + [ZEBRA_GET_TABLE_CHUNK] = zread_table_manager_request, + [ZEBRA_RELEASE_TABLE_CHUNK] = zread_table_manager_request, + [ZEBRA_IPSET_CREATE] = zread_ipset, + [ZEBRA_IPSET_DESTROY] = zread_ipset, + [ZEBRA_IPSET_ENTRY_ADD] = zread_ipset_entry, + [ZEBRA_IPSET_ENTRY_DELETE] = zread_ipset_entry, + [ZEBRA_IPTABLE_ADD] = zread_iptable, + [ZEBRA_IPTABLE_DELETE] = zread_iptable, + [ZEBRA_VXLAN_FLOOD_CONTROL] = zebra_vxlan_flood_control, + [ZEBRA_VXLAN_SG_REPLAY] = zebra_vxlan_sg_replay, + [ZEBRA_MLAG_CLIENT_REGISTER] = zebra_mlag_client_register, + [ZEBRA_MLAG_CLIENT_UNREGISTER] = zebra_mlag_client_unregister, + [ZEBRA_MLAG_FORWARD_MSG] = zebra_mlag_forward_client_msg, + [ZEBRA_SRV6_MANAGER_GET_LOCATOR_CHUNK] = zread_srv6_manager_request, + [ZEBRA_SRV6_MANAGER_RELEASE_LOCATOR_CHUNK] = zread_srv6_manager_request, + [ZEBRA_CLIENT_CAPABILITIES] = zread_client_capabilities, + [ZEBRA_NEIGH_DISCOVER] = zread_neigh_discover, + [ZEBRA_NHG_ADD] = zread_nhg_add, + [ZEBRA_NHG_DEL] = zread_nhg_del, + [ZEBRA_ROUTE_NOTIFY_REQUEST] = zread_route_notify_request, + [ZEBRA_EVPN_REMOTE_NH_ADD] = zebra_evpn_proc_remote_nh, + [ZEBRA_EVPN_REMOTE_NH_DEL] = zebra_evpn_proc_remote_nh, + [ZEBRA_NEIGH_IP_ADD] = zebra_neigh_ip_add, + [ZEBRA_NEIGH_IP_DEL] = zebra_neigh_ip_del, + [ZEBRA_NHRP_NEIGH_REGISTER] = zebra_neigh_register, + [ZEBRA_NHRP_NEIGH_UNREGISTER] = zebra_neigh_unregister, + [ZEBRA_CONFIGURE_ARP] = zebra_configure_arp, + [ZEBRA_GRE_GET] = zebra_gre_get, + [ZEBRA_GRE_SOURCE_SET] = zebra_gre_source_set, +}; + +/* + * Process a batch of zapi messages. + */ +void zserv_handle_commands(struct zserv *client, struct stream_fifo *fifo) +{ + struct zmsghdr hdr; + struct zebra_vrf *zvrf; + struct stream *msg; + struct stream_fifo temp_fifo; + + stream_fifo_init(&temp_fifo); + + while (stream_fifo_head(fifo)) { + msg = stream_fifo_pop(fifo); + + if (STREAM_READABLE(msg) > ZEBRA_MAX_PACKET_SIZ) { + if (IS_ZEBRA_DEBUG_PACKET && IS_ZEBRA_DEBUG_RECV) + zlog_debug( + "ZAPI message is %zu bytes long but the maximum packet size is %u; dropping", + STREAM_READABLE(msg), + ZEBRA_MAX_PACKET_SIZ); + goto continue_loop; + } + + zapi_parse_header(msg, &hdr); + + if (IS_ZEBRA_DEBUG_PACKET && IS_ZEBRA_DEBUG_RECV + && IS_ZEBRA_DEBUG_DETAIL) + zserv_log_message(NULL, msg, &hdr); + + hdr.length -= ZEBRA_HEADER_SIZE; + + /* Before checking for a handler function, check for + * special messages that are handled in another module; + * we'll treat these as opaque. + */ + if (zebra_opaque_handles_msgid(hdr.command)) { + /* Reset message buffer */ + stream_set_getp(msg, 0); + + stream_fifo_push(&temp_fifo, msg); + + /* Continue without freeing the message */ + msg = NULL; + goto continue_loop; + } + + /* lookup vrf */ + zvrf = zebra_vrf_lookup_by_id(hdr.vrf_id); + if (!zvrf) { + zserv_error_no_vrf(client, &hdr, msg, zvrf); + goto continue_loop; + } + + if (hdr.command >= array_size(zserv_handlers) + || zserv_handlers[hdr.command] == NULL) { + zserv_error_invalid_msg_type(client, &hdr, msg, zvrf); + goto continue_loop; + } + + zserv_handlers[hdr.command](client, &hdr, msg, zvrf); + +continue_loop: + stream_free(msg); + } + + /* Dispatch any special messages from the temp fifo */ + if (stream_fifo_head(&temp_fifo) != NULL) + zebra_opaque_enqueue_batch(&temp_fifo); + + stream_fifo_deinit(&temp_fifo); +} diff --git a/zebra/zapi_msg.h b/zebra/zapi_msg.h new file mode 100644 index 0000000..4fdc863 --- /dev/null +++ b/zebra/zapi_msg.h @@ -0,0 +1,129 @@ +/* + * Zebra API message creation & consumption. + * Portions: + * Copyright (C) 1997-1999 Kunihiro Ishiguro + * Copyright (C) 2015-2018 Cumulus Networks, Inc. + * et al. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "lib/if.h" +#include "lib/vrf.h" +#include "lib/zclient.h" +#include "lib/pbr.h" + +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_errors.h" +#include "zebra/label_manager.h" +#include "zebra/zebra_srv6.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This is called to process inbound ZAPI messages. + * + * client + * the client datastructure + * + * fifo + * a batch of messages + */ +extern void zserv_handle_commands(struct zserv *client, + struct stream_fifo *fifo); + +extern int zsend_vrf_add(struct zserv *zclient, struct zebra_vrf *zvrf); +extern int zsend_vrf_delete(struct zserv *zclient, struct zebra_vrf *zvrf); +extern int zsend_interface_add(struct zserv *zclient, struct interface *ifp); +extern int zsend_interface_delete(struct zserv *zclient, struct interface *ifp); +extern int zsend_interface_addresses(struct zserv *zclient, + struct interface *ifp); +extern int zsend_interface_address(int cmd, struct zserv *zclient, + struct interface *ifp, + struct connected *ifc); +extern void nbr_connected_add_ipv6(struct interface *ifp, + struct in6_addr *address); +extern void nbr_connected_delete_ipv6(struct interface *ifp, + struct in6_addr *address); +extern int zsend_interface_update(int cmd, struct zserv *client, + struct interface *ifp); +extern int zsend_redistribute_route(int cmd, struct zserv *zclient, + const struct route_node *rn, + const struct route_entry *re); + +extern int zsend_router_id_update(struct zserv *zclient, afi_t afi, + struct prefix *p, vrf_id_t vrf_id); +extern int zsend_interface_vrf_update(struct zserv *zclient, + struct interface *ifp, vrf_id_t vrf_id); +extern int zsend_interface_link_params(struct zserv *zclient, + struct interface *ifp); +extern int zsend_pw_update(struct zserv *client, struct zebra_pw *pw); +extern int zsend_route_notify_owner(const struct route_node *rn, + struct route_entry *re, + enum zapi_route_notify_owner note, + afi_t afi, safi_t safi); +extern int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, + enum zapi_route_notify_owner note); + +extern void zsend_rule_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_rule_notify_owner note); + +extern void zsend_iptable_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_iptable_notify_owner note); +extern void zsend_ipset_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_ipset_notify_owner note); +extern void +zsend_ipset_entry_notify_owner(const struct zebra_dplane_ctx *ctx, + enum zapi_ipset_entry_notify_owner note); +extern bool zserv_nexthop_num_warn(const char *caller, const struct prefix *p, + const unsigned int nexthop_num); + +extern void zsend_capabilities_all_clients(void); +extern int zsend_assign_label_chunk_response(struct zserv *client, + vrf_id_t vrf_id, + struct label_manager_chunk *lmc); +extern int zsend_label_manager_connect_response(struct zserv *client, + vrf_id_t vrf_id, + unsigned short result); +extern int zsend_sr_policy_notify_status(uint32_t color, + struct ipaddr *endpoint, char *name, + int status); +extern void zsend_nhrp_neighbor_notify(int cmd, struct interface *ifp, + struct ipaddr *ipaddr, int ndm_state, + union sockunion *link_layer_ipv4); + +extern int zsend_client_close_notify(struct zserv *client, + struct zserv *closed_client); + +int zsend_nhg_notify(uint16_t type, uint16_t instance, uint32_t session_id, + uint32_t id, enum zapi_nhg_notify_owner note); + +extern void zapi_re_opaque_free(struct re_opaque *opaque); + +extern int zsend_zebra_srv6_locator_add(struct zserv *client, + struct srv6_locator *loc); +extern int zsend_zebra_srv6_locator_delete(struct zserv *client, + struct srv6_locator *loc); +extern int zsend_srv6_manager_get_locator_chunk_response(struct zserv *client, + vrf_id_t vrf_id, struct srv6_locator *loc); + +#ifdef __cplusplus +} +#endif diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c new file mode 100644 index 0000000..73dc2ab --- /dev/null +++ b/zebra/zebra_dplane.c @@ -0,0 +1,6614 @@ +/* + * Zebra dataplane layer. + * Copyright (c) 2018 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "lib/libfrr.h" +#include "lib/debug.h" +#include "lib/frratomic.h" +#include "lib/frr_pthread.h" +#include "lib/memory.h" +#include "lib/queue.h" +#include "lib/zebra.h" +#include "zebra/netconf_netlink.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_mpls.h" +#include "zebra/rt.h" +#include "zebra/debug.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_neigh.h" +#include "printfrr.h" + +/* Memory types */ +DEFINE_MTYPE_STATIC(ZEBRA, DP_CTX, "Zebra DPlane Ctx"); +DEFINE_MTYPE_STATIC(ZEBRA, DP_INTF, "Zebra DPlane Intf"); +DEFINE_MTYPE_STATIC(ZEBRA, DP_PROV, "Zebra DPlane Provider"); +DEFINE_MTYPE_STATIC(ZEBRA, DP_NETFILTER, "Zebra Netfilter Internal Object"); +DEFINE_MTYPE_STATIC(ZEBRA, DP_NS, "DPlane NSes"); + +#ifndef AOK +# define AOK 0 +#endif + +/* Control for collection of extra interface info with route updates; a plugin + * can enable the extra info via a dplane api. + */ +static bool dplane_collect_extra_intf_info; + +/* Enable test dataplane provider */ +/*#define DPLANE_TEST_PROVIDER 1 */ + +/* Default value for max queued incoming updates */ +const uint32_t DPLANE_DEFAULT_MAX_QUEUED = 200; + +/* Default value for new work per cycle */ +const uint32_t DPLANE_DEFAULT_NEW_WORK = 100; + +/* Validation check macro for context blocks */ +/* #define DPLANE_DEBUG 1 */ + +#ifdef DPLANE_DEBUG + +# define DPLANE_CTX_VALID(p) \ + assert((p) != NULL) + +#else + +# define DPLANE_CTX_VALID(p) + +#endif /* DPLANE_DEBUG */ + +/* + * Nexthop information captured for nexthop/nexthop group updates + */ +struct dplane_nexthop_info { + uint32_t id; + uint32_t old_id; + afi_t afi; + vrf_id_t vrf_id; + int type; + + struct nexthop_group ng; + struct nh_grp nh_grp[MULTIPATH_NUM]; + uint8_t nh_grp_count; +}; + +/* + * Optional extra info about interfaces used in route updates' nexthops. + */ +struct dplane_intf_extra { + vrf_id_t vrf_id; + uint32_t ifindex; + uint32_t flags; + uint32_t status; + + TAILQ_ENTRY(dplane_intf_extra) link; +}; + +/* + * Route information captured for route updates. + */ +struct dplane_route_info { + + /* Dest and (optional) source prefixes */ + struct prefix zd_dest; + struct prefix zd_src; + + afi_t zd_afi; + safi_t zd_safi; + + int zd_type; + int zd_old_type; + + route_tag_t zd_tag; + route_tag_t zd_old_tag; + uint32_t zd_metric; + uint32_t zd_old_metric; + + uint16_t zd_instance; + uint16_t zd_old_instance; + + uint8_t zd_distance; + uint8_t zd_old_distance; + + uint32_t zd_mtu; + uint32_t zd_nexthop_mtu; + + /* Nexthop hash entry info */ + struct dplane_nexthop_info nhe; + + /* Nexthops */ + uint32_t zd_nhg_id; + struct nexthop_group zd_ng; + + /* Backup nexthops (if present) */ + struct nexthop_group backup_ng; + + /* "Previous" nexthops, used only in route updates without netlink */ + struct nexthop_group zd_old_ng; + struct nexthop_group old_backup_ng; + + /* Optional list of extra interface info */ + TAILQ_HEAD(dp_intf_extra_q, dplane_intf_extra) intf_extra_q; +}; + +/* + * Pseudowire info for the dataplane + */ +struct dplane_pw_info { + int type; + int af; + int status; + uint32_t flags; + uint32_t nhg_id; + union g_addr dest; + mpls_label_t local_label; + mpls_label_t remote_label; + + /* Nexthops that are valid and installed */ + struct nexthop_group fib_nhg; + + /* Primary and backup nexthop sets, copied from the resolving route. */ + struct nexthop_group primary_nhg; + struct nexthop_group backup_nhg; + + union pw_protocol_fields fields; +}; + +/* + * Bridge port info for the dataplane + */ +struct dplane_br_port_info { + uint32_t sph_filter_cnt; + struct in_addr sph_filters[ES_VTEP_MAX_CNT]; + /* DPLANE_BR_PORT_XXX - see zebra_dplane.h*/ + uint32_t flags; + uint32_t backup_nhg_id; +}; + +/* + * Interface/prefix info for the dataplane + */ +struct dplane_intf_info { + + uint32_t metric; + uint32_t flags; + + bool protodown; + bool pd_reason_val; + +#define DPLANE_INTF_CONNECTED (1 << 0) /* Connected peer, p2p */ +#define DPLANE_INTF_SECONDARY (1 << 1) +#define DPLANE_INTF_BROADCAST (1 << 2) +#define DPLANE_INTF_HAS_DEST DPLANE_INTF_CONNECTED +#define DPLANE_INTF_HAS_LABEL (1 << 4) + + /* Interface address/prefix */ + struct prefix prefix; + + /* Dest address, for p2p, or broadcast prefix */ + struct prefix dest_prefix; + + char *label; + char label_buf[32]; +}; + +/* + * EVPN MAC address info for the dataplane. + */ +struct dplane_mac_info { + vlanid_t vid; + ifindex_t br_ifindex; + struct ethaddr mac; + struct in_addr vtep_ip; + bool is_sticky; + uint32_t nhg_id; + uint32_t update_flags; +}; + +/* + * Neighbor info for the dataplane + */ +struct dplane_neigh_info { + struct ipaddr ip_addr; + union { + struct ethaddr mac; + struct ipaddr ip_addr; + } link; + uint32_t flags; + uint16_t state; + uint32_t update_flags; +}; + +/* + * Neighbor Table + */ +struct dplane_neigh_table { + uint8_t family; + uint32_t app_probes; + uint32_t ucast_probes; + uint32_t mcast_probes; +}; + +/* + * Policy based routing rule info for the dataplane + */ +struct dplane_ctx_rule { + uint32_t priority; + + /* The route table pointed by this rule */ + uint32_t table; + + /* Filter criteria */ + uint32_t filter_bm; + uint32_t fwmark; + uint8_t dsfield; + struct prefix src_ip; + struct prefix dst_ip; + uint8_t ip_proto; + uint16_t src_port; + uint16_t dst_port; + + uint8_t action_pcp; + uint16_t action_vlan_id; + uint16_t action_vlan_flags; + + uint32_t action_queue_id; + + char ifname[INTERFACE_NAMSIZ + 1]; + struct ethaddr smac; + struct ethaddr dmac; + int out_ifindex; + intptr_t dp_flow_ptr; +}; + +struct dplane_rule_info { + /* + * Originating zclient sock fd, so we can know who to send + * back to. + */ + int sock; + + int unique; + int seq; + + struct dplane_ctx_rule new; + struct dplane_ctx_rule old; +}; + +struct dplane_gre_ctx { + uint32_t link_ifindex; + unsigned int mtu; + struct zebra_l2info_gre info; +}; + + +/* + * Network interface configuration info - aligned with netlink's NETCONF + * info. The flags values are public, in the dplane.h file... + */ +struct dplane_netconf_info { + enum dplane_netconf_status_e mpls_val; + enum dplane_netconf_status_e mcast_val; + enum dplane_netconf_status_e linkdown_val; +}; + +/* + * Traffic control contexts for the dplane + */ +struct dplane_tc_info { + /* Rate spec (unit: Bytes/s) */ + uint64_t rate; + uint64_t ceil; + + /* TODO: custom burst */ + + /* Filter components for "tfilter" */ + uint32_t filter_bm; + struct prefix src_ip; + struct prefix dst_ip; + uint8_t ip_proto; + + /* TODO: more filter components */ +}; + +/* + * The context block used to exchange info about route updates across + * the boundary between the zebra main context (and pthread) and the + * dataplane layer (and pthread). + */ +struct zebra_dplane_ctx { + + /* Operation code */ + enum dplane_op_e zd_op; + + /* Status on return */ + enum zebra_dplane_result zd_status; + + /* Dplane provider id */ + uint32_t zd_provider; + + /* Flags - used by providers, e.g. */ + int zd_flags; + + bool zd_is_update; + + uint32_t zd_seq; + uint32_t zd_old_seq; + + /* Some updates may be generated by notifications: allow the + * plugin to notice and ignore results from its own notifications. + */ + uint32_t zd_notif_provider; + + /* TODO -- internal/sub-operation status? */ + enum zebra_dplane_result zd_remote_status; + enum zebra_dplane_result zd_kernel_status; + + vrf_id_t zd_vrf_id; + uint32_t zd_table_id; + + char zd_ifname[INTERFACE_NAMSIZ]; + ifindex_t zd_ifindex; + + /* Support info for different kinds of updates */ + union { + struct dplane_route_info rinfo; + struct zebra_lsp lsp; + struct dplane_pw_info pw; + struct dplane_br_port_info br_port; + struct dplane_intf_info intf; + struct dplane_mac_info macinfo; + struct dplane_neigh_info neigh; + struct dplane_rule_info rule; + struct dplane_tc_info tc; + struct zebra_pbr_iptable iptable; + struct zebra_pbr_ipset ipset; + struct { + struct zebra_pbr_ipset_entry entry; + struct zebra_pbr_ipset_info info; + } ipset_entry; + struct dplane_neigh_table neightable; + struct dplane_gre_ctx gre; + struct dplane_netconf_info netconf; + } u; + + /* Namespace info, used especially for netlink kernel communication */ + struct zebra_dplane_info zd_ns_info; + + /* Embedded list linkage */ + TAILQ_ENTRY(zebra_dplane_ctx) zd_q_entries; +}; + +/* Flag that can be set by a pre-kernel provider as a signal that an update + * should bypass the kernel. + */ +#define DPLANE_CTX_FLAG_NO_KERNEL 0x01 + + +/* + * Registration block for one dataplane provider. + */ +struct zebra_dplane_provider { + /* Name */ + char dp_name[DPLANE_PROVIDER_NAMELEN + 1]; + + /* Priority, for ordering among providers */ + uint8_t dp_priority; + + /* Id value */ + uint32_t dp_id; + + /* Mutex */ + pthread_mutex_t dp_mutex; + + /* Plugin-provided extra data */ + void *dp_data; + + /* Flags */ + int dp_flags; + + int (*dp_start)(struct zebra_dplane_provider *prov); + + int (*dp_fp)(struct zebra_dplane_provider *prov); + + int (*dp_fini)(struct zebra_dplane_provider *prov, bool early_p); + + _Atomic uint32_t dp_in_counter; + _Atomic uint32_t dp_in_queued; + _Atomic uint32_t dp_in_max; + _Atomic uint32_t dp_out_counter; + _Atomic uint32_t dp_out_queued; + _Atomic uint32_t dp_out_max; + _Atomic uint32_t dp_error_counter; + + /* Queue of contexts inbound to the provider */ + struct dplane_ctx_q dp_ctx_in_q; + + /* Queue of completed contexts outbound from the provider back + * towards the dataplane module. + */ + struct dplane_ctx_q dp_ctx_out_q; + + /* Embedded list linkage for provider objects */ + TAILQ_ENTRY(zebra_dplane_provider) dp_prov_link; +}; + +/* Declare types for list of zns info objects */ +PREDECL_DLIST(zns_info_list); + +struct dplane_zns_info { + struct zebra_dplane_info info; + + /* Request data from the OS */ + struct thread *t_request; + + /* Read event */ + struct thread *t_read; + + /* List linkage */ + struct zns_info_list_item link; +}; + +/* + * Globals + */ +static struct zebra_dplane_globals { + /* Mutex to control access to dataplane components */ + pthread_mutex_t dg_mutex; + + /* Results callback registered by zebra 'core' */ + int (*dg_results_cb)(struct dplane_ctx_q *ctxlist); + + /* Sentinel for beginning of shutdown */ + volatile bool dg_is_shutdown; + + /* Sentinel for end of shutdown */ + volatile bool dg_run; + + /* Update context queue inbound to the dataplane */ + TAILQ_HEAD(zdg_ctx_q, zebra_dplane_ctx) dg_update_ctx_q; + + /* Ordered list of providers */ + TAILQ_HEAD(zdg_prov_q, zebra_dplane_provider) dg_providers_q; + + /* List of info about each zns */ + struct zns_info_list_head dg_zns_list; + + /* Counter used to assign internal ids to providers */ + uint32_t dg_provider_id; + + /* Limit number of pending, unprocessed updates */ + _Atomic uint32_t dg_max_queued_updates; + + /* Control whether system route notifications should be produced. */ + bool dg_sys_route_notifs; + + /* Limit number of new updates dequeued at once, to pace an + * incoming burst. + */ + uint32_t dg_updates_per_cycle; + + _Atomic uint32_t dg_routes_in; + _Atomic uint32_t dg_routes_queued; + _Atomic uint32_t dg_routes_queued_max; + _Atomic uint32_t dg_route_errors; + _Atomic uint32_t dg_other_errors; + + _Atomic uint32_t dg_nexthops_in; + _Atomic uint32_t dg_nexthop_errors; + + _Atomic uint32_t dg_lsps_in; + _Atomic uint32_t dg_lsp_errors; + + _Atomic uint32_t dg_pws_in; + _Atomic uint32_t dg_pw_errors; + + _Atomic uint32_t dg_br_port_in; + _Atomic uint32_t dg_br_port_errors; + + _Atomic uint32_t dg_intf_addrs_in; + _Atomic uint32_t dg_intf_addr_errors; + _Atomic uint32_t dg_intf_changes; + _Atomic uint32_t dg_intf_changes_errors; + + _Atomic uint32_t dg_macs_in; + _Atomic uint32_t dg_mac_errors; + + _Atomic uint32_t dg_neighs_in; + _Atomic uint32_t dg_neigh_errors; + + _Atomic uint32_t dg_rules_in; + _Atomic uint32_t dg_rule_errors; + + _Atomic uint32_t dg_update_yields; + + _Atomic uint32_t dg_iptable_in; + _Atomic uint32_t dg_iptable_errors; + + _Atomic uint32_t dg_ipset_in; + _Atomic uint32_t dg_ipset_errors; + _Atomic uint32_t dg_ipset_entry_in; + _Atomic uint32_t dg_ipset_entry_errors; + + _Atomic uint32_t dg_neightable_in; + _Atomic uint32_t dg_neightable_errors; + + _Atomic uint32_t dg_gre_set_in; + _Atomic uint32_t dg_gre_set_errors; + + _Atomic uint32_t dg_intfs_in; + _Atomic uint32_t dg_intf_errors; + + _Atomic uint32_t dg_tcs_in; + _Atomic uint32_t dg_tcs_errors; + + /* Dataplane pthread */ + struct frr_pthread *dg_pthread; + + /* Event-delivery context 'master' for the dplane */ + struct thread_master *dg_master; + + /* Event/'thread' pointer for queued updates */ + struct thread *dg_t_update; + + /* Event pointer for pending shutdown check loop */ + struct thread *dg_t_shutdown_check; + +} zdplane_info; + +/* Instantiate zns list type */ +DECLARE_DLIST(zns_info_list, struct dplane_zns_info, link); + +/* + * Lock and unlock for interactions with the zebra 'core' pthread + */ +#define DPLANE_LOCK() pthread_mutex_lock(&zdplane_info.dg_mutex) +#define DPLANE_UNLOCK() pthread_mutex_unlock(&zdplane_info.dg_mutex) + + +/* + * Lock and unlock for individual providers + */ +#define DPLANE_PROV_LOCK(p) pthread_mutex_lock(&((p)->dp_mutex)) +#define DPLANE_PROV_UNLOCK(p) pthread_mutex_unlock(&((p)->dp_mutex)) + +/* Prototypes */ +static void dplane_thread_loop(struct thread *event); +static enum zebra_dplane_result lsp_update_internal(struct zebra_lsp *lsp, + enum dplane_op_e op); +static enum zebra_dplane_result pw_update_internal(struct zebra_pw *pw, + enum dplane_op_e op); +static enum zebra_dplane_result intf_addr_update_internal( + const struct interface *ifp, const struct connected *ifc, + enum dplane_op_e op); +static enum zebra_dplane_result mac_update_common( + enum dplane_op_e op, const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, const struct ethaddr *mac, + struct in_addr vtep_ip, bool sticky, uint32_t nhg_id, + uint32_t update_flags); +static enum zebra_dplane_result +neigh_update_internal(enum dplane_op_e op, const struct interface *ifp, + const void *link, int link_family, + const struct ipaddr *ip, uint32_t flags, uint16_t state, + uint32_t update_flags, int protocol); + +/* + * Public APIs + */ + +/* Obtain thread_master for dataplane thread */ +struct thread_master *dplane_get_thread_master(void) +{ + return zdplane_info.dg_master; +} + +/* + * Allocate a dataplane update context + */ +struct zebra_dplane_ctx *dplane_ctx_alloc(void) +{ + struct zebra_dplane_ctx *p; + + /* TODO -- just alloc'ing memory, but would like to maintain + * a pool + */ + p = XCALLOC(MTYPE_DP_CTX, sizeof(struct zebra_dplane_ctx)); + + return p; +} + +/* Enable system route notifications */ +void dplane_enable_sys_route_notifs(void) +{ + zdplane_info.dg_sys_route_notifs = true; +} + +/* + * Clean up dependent/internal allocations inside a context object + */ +static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx) +{ + struct dplane_intf_extra *if_extra, *if_tmp; + + /* + * Some internal allocations may need to be freed, depending on + * the type of info captured in the ctx. + */ + switch (ctx->zd_op) { + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + + /* Free allocated nexthops */ + if (ctx->u.rinfo.zd_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free(ctx->u.rinfo.zd_ng.nexthop); + + ctx->u.rinfo.zd_ng.nexthop = NULL; + } + + /* Free backup info also (if present) */ + if (ctx->u.rinfo.backup_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free(ctx->u.rinfo.backup_ng.nexthop); + + ctx->u.rinfo.backup_ng.nexthop = NULL; + } + + if (ctx->u.rinfo.zd_old_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free(ctx->u.rinfo.zd_old_ng.nexthop); + + ctx->u.rinfo.zd_old_ng.nexthop = NULL; + } + + if (ctx->u.rinfo.old_backup_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free(ctx->u.rinfo.old_backup_ng.nexthop); + + ctx->u.rinfo.old_backup_ng.nexthop = NULL; + } + + /* Optional extra interface info */ + TAILQ_FOREACH_SAFE(if_extra, &ctx->u.rinfo.intf_extra_q, + link, if_tmp) { + TAILQ_REMOVE(&ctx->u.rinfo.intf_extra_q, if_extra, + link); + XFREE(MTYPE_DP_INTF, if_extra); + } + + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: { + if (ctx->u.rinfo.nhe.ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free(ctx->u.rinfo.nhe.ng.nexthop); + + ctx->u.rinfo.nhe.ng.nexthop = NULL; + } + break; + } + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + { + struct zebra_nhlfe *nhlfe; + + /* Unlink and free allocated NHLFEs */ + frr_each_safe(nhlfe_list, &ctx->u.lsp.nhlfe_list, nhlfe) { + nhlfe_list_del(&ctx->u.lsp.nhlfe_list, nhlfe); + zebra_mpls_nhlfe_free(nhlfe); + } + + /* Unlink and free allocated backup NHLFEs, if present */ + frr_each_safe(nhlfe_list, + &(ctx->u.lsp.backup_nhlfe_list), nhlfe) { + nhlfe_list_del(&ctx->u.lsp.backup_nhlfe_list, + nhlfe); + zebra_mpls_nhlfe_free(nhlfe); + } + + /* Clear pointers in lsp struct, in case we're caching + * free context structs. + */ + nhlfe_list_init(&ctx->u.lsp.nhlfe_list); + ctx->u.lsp.best_nhlfe = NULL; + nhlfe_list_init(&ctx->u.lsp.backup_nhlfe_list); + + break; + } + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + /* Free allocated nexthops */ + if (ctx->u.pw.fib_nhg.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free(ctx->u.pw.fib_nhg.nexthop); + + ctx->u.pw.fib_nhg.nexthop = NULL; + } + if (ctx->u.pw.primary_nhg.nexthop) { + nexthops_free(ctx->u.pw.primary_nhg.nexthop); + + ctx->u.pw.primary_nhg.nexthop = NULL; + } + if (ctx->u.pw.backup_nhg.nexthop) { + nexthops_free(ctx->u.pw.backup_nhg.nexthop); + + ctx->u.pw.backup_nhg.nexthop = NULL; + } + break; + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + /* Maybe free label string, if allocated */ + if (ctx->u.intf.label != NULL && + ctx->u.intf.label != ctx->u.intf.label_buf) { + XFREE(MTYPE_DP_CTX, ctx->u.intf.label); + ctx->u.intf.label = NULL; + } + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NONE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; + + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + break; + case DPLANE_OP_NEIGH_TABLE_UPDATE: + break; + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + if (ctx->u.iptable.interface_name_list) + list_delete(&ctx->u.iptable.interface_name_list); + break; + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_NETCONFIG: + break; + } +} + +/* + * Free a dataplane results context. + */ +static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) +{ + if (pctx == NULL) + return; + + DPLANE_CTX_VALID(*pctx); + + /* TODO -- just freeing memory, but would like to maintain + * a pool + */ + + /* Some internal allocations may need to be freed, depending on + * the type of info captured in the ctx. + */ + dplane_ctx_free_internal(*pctx); + + XFREE(MTYPE_DP_CTX, *pctx); +} + +/* + * Reset an allocated context object for re-use. All internal allocations are + * freed and the context is memset. + */ +void dplane_ctx_reset(struct zebra_dplane_ctx *ctx) +{ + dplane_ctx_free_internal(ctx); + memset(ctx, 0, sizeof(*ctx)); +} + +/* + * Return a context block to the dplane module after processing + */ +void dplane_ctx_fini(struct zebra_dplane_ctx **pctx) +{ + /* TODO -- maintain pool; for now, just free */ + dplane_ctx_free(pctx); +} + +/* Enqueue a context block */ +void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, + const struct zebra_dplane_ctx *ctx) +{ + TAILQ_INSERT_TAIL(q, (struct zebra_dplane_ctx *)ctx, zd_q_entries); +} + +/* Append a list of context blocks to another list */ +void dplane_ctx_list_append(struct dplane_ctx_q *to_list, + struct dplane_ctx_q *from_list) +{ + if (TAILQ_FIRST(from_list)) { + TAILQ_CONCAT(to_list, from_list, zd_q_entries); + + /* And clear 'from' list */ + TAILQ_INIT(from_list); + } +} + +struct zebra_dplane_ctx *dplane_ctx_get_head(struct dplane_ctx_q *q) +{ + struct zebra_dplane_ctx *ctx = TAILQ_FIRST(q); + + return ctx; +} + +/* Dequeue a context block from the head of a list */ +struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q) +{ + struct zebra_dplane_ctx *ctx = TAILQ_FIRST(q); + + if (ctx) + TAILQ_REMOVE(q, ctx, zd_q_entries); + + return ctx; +} + +/* + * Accessors for information from the context object + */ +enum zebra_dplane_result dplane_ctx_get_status( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_status; +} + +void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx, + enum zebra_dplane_result status) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_status = status; +} + +/* Retrieve last/current provider id */ +uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->zd_provider; +} + +/* Providers run before the kernel can control whether a kernel + * update should be done. + */ +void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + SET_FLAG(ctx->zd_flags, DPLANE_CTX_FLAG_NO_KERNEL); +} + +bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return CHECK_FLAG(ctx->zd_flags, DPLANE_CTX_FLAG_NO_KERNEL); +} + +void dplane_ctx_set_op(struct zebra_dplane_ctx *ctx, enum dplane_op_e op) +{ + DPLANE_CTX_VALID(ctx); + ctx->zd_op = op; +} + +enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_op; +} + +const char *dplane_op2str(enum dplane_op_e op) +{ + const char *ret = "UNKNOWN"; + + switch (op) { + case DPLANE_OP_NONE: + ret = "NONE"; + break; + + /* Route update */ + case DPLANE_OP_ROUTE_INSTALL: + ret = "ROUTE_INSTALL"; + break; + case DPLANE_OP_ROUTE_UPDATE: + ret = "ROUTE_UPDATE"; + break; + case DPLANE_OP_ROUTE_DELETE: + ret = "ROUTE_DELETE"; + break; + case DPLANE_OP_ROUTE_NOTIFY: + ret = "ROUTE_NOTIFY"; + break; + + /* Nexthop update */ + case DPLANE_OP_NH_INSTALL: + ret = "NH_INSTALL"; + break; + case DPLANE_OP_NH_UPDATE: + ret = "NH_UPDATE"; + break; + case DPLANE_OP_NH_DELETE: + ret = "NH_DELETE"; + break; + + case DPLANE_OP_LSP_INSTALL: + ret = "LSP_INSTALL"; + break; + case DPLANE_OP_LSP_UPDATE: + ret = "LSP_UPDATE"; + break; + case DPLANE_OP_LSP_DELETE: + ret = "LSP_DELETE"; + break; + case DPLANE_OP_LSP_NOTIFY: + ret = "LSP_NOTIFY"; + break; + + case DPLANE_OP_PW_INSTALL: + ret = "PW_INSTALL"; + break; + case DPLANE_OP_PW_UNINSTALL: + ret = "PW_UNINSTALL"; + break; + + case DPLANE_OP_SYS_ROUTE_ADD: + ret = "SYS_ROUTE_ADD"; + break; + case DPLANE_OP_SYS_ROUTE_DELETE: + ret = "SYS_ROUTE_DEL"; + break; + + case DPLANE_OP_BR_PORT_UPDATE: + ret = "BR_PORT_UPDATE"; + break; + + case DPLANE_OP_ADDR_INSTALL: + ret = "ADDR_INSTALL"; + break; + case DPLANE_OP_ADDR_UNINSTALL: + ret = "ADDR_UNINSTALL"; + break; + + case DPLANE_OP_MAC_INSTALL: + ret = "MAC_INSTALL"; + break; + case DPLANE_OP_MAC_DELETE: + ret = "MAC_DELETE"; + break; + + case DPLANE_OP_NEIGH_INSTALL: + ret = "NEIGH_INSTALL"; + break; + case DPLANE_OP_NEIGH_UPDATE: + ret = "NEIGH_UPDATE"; + break; + case DPLANE_OP_NEIGH_DELETE: + ret = "NEIGH_DELETE"; + break; + case DPLANE_OP_VTEP_ADD: + ret = "VTEP_ADD"; + break; + case DPLANE_OP_VTEP_DELETE: + ret = "VTEP_DELETE"; + break; + + case DPLANE_OP_RULE_ADD: + ret = "RULE_ADD"; + break; + case DPLANE_OP_RULE_DELETE: + ret = "RULE_DELETE"; + break; + case DPLANE_OP_RULE_UPDATE: + ret = "RULE_UPDATE"; + break; + + case DPLANE_OP_NEIGH_DISCOVER: + ret = "NEIGH_DISCOVER"; + break; + + case DPLANE_OP_IPTABLE_ADD: + ret = "IPTABLE_ADD"; + break; + case DPLANE_OP_IPTABLE_DELETE: + ret = "IPTABLE_DELETE"; + break; + case DPLANE_OP_IPSET_ADD: + ret = "IPSET_ADD"; + break; + case DPLANE_OP_IPSET_DELETE: + ret = "IPSET_DELETE"; + break; + case DPLANE_OP_IPSET_ENTRY_ADD: + ret = "IPSET_ENTRY_ADD"; + break; + case DPLANE_OP_IPSET_ENTRY_DELETE: + ret = "IPSET_ENTRY_DELETE"; + break; + case DPLANE_OP_NEIGH_IP_INSTALL: + ret = "NEIGH_IP_INSTALL"; + break; + case DPLANE_OP_NEIGH_IP_DELETE: + ret = "NEIGH_IP_DELETE"; + break; + case DPLANE_OP_NEIGH_TABLE_UPDATE: + ret = "NEIGH_TABLE_UPDATE"; + break; + + case DPLANE_OP_GRE_SET: + ret = "GRE_SET"; + break; + + case DPLANE_OP_INTF_ADDR_ADD: + return "INTF_ADDR_ADD"; + + case DPLANE_OP_INTF_ADDR_DEL: + return "INTF_ADDR_DEL"; + + case DPLANE_OP_INTF_NETCONFIG: + return "INTF_NETCONFIG"; + + case DPLANE_OP_INTF_INSTALL: + ret = "INTF_INSTALL"; + break; + case DPLANE_OP_INTF_UPDATE: + ret = "INTF_UPDATE"; + break; + case DPLANE_OP_INTF_DELETE: + ret = "INTF_DELETE"; + break; + + case DPLANE_OP_TC_INSTALL: + ret = "TC_INSTALL"; + break; + case DPLANE_OP_TC_UPDATE: + ret = "TC_UPDATE"; + break; + case DPLANE_OP_TC_DELETE: + ret = "TC_DELETE"; + break; + } + + return ret; +} + +const char *dplane_res2str(enum zebra_dplane_result res) +{ + const char *ret = "<Unknown>"; + + switch (res) { + case ZEBRA_DPLANE_REQUEST_FAILURE: + ret = "FAILURE"; + break; + case ZEBRA_DPLANE_REQUEST_QUEUED: + ret = "QUEUED"; + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + ret = "SUCCESS"; + break; + } + + return ret; +} + +void dplane_ctx_set_dest(struct zebra_dplane_ctx *ctx, + const struct prefix *dest) +{ + DPLANE_CTX_VALID(ctx); + + prefix_copy(&(ctx->u.rinfo.zd_dest), dest); +} + +const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.zd_dest); +} + +void dplane_ctx_set_src(struct zebra_dplane_ctx *ctx, const struct prefix *src) +{ + DPLANE_CTX_VALID(ctx); + + if (src) + prefix_copy(&(ctx->u.rinfo.zd_src), src); + else + memset(&(ctx->u.rinfo.zd_src), 0, sizeof(struct prefix)); +} + +/* Source prefix is a little special - return NULL for "no src prefix" */ +const struct prefix *dplane_ctx_get_src(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + if (ctx->u.rinfo.zd_src.prefixlen == 0 && + IN6_IS_ADDR_UNSPECIFIED(&(ctx->u.rinfo.zd_src.u.prefix6))) { + return NULL; + } else { + return &(ctx->u.rinfo.zd_src); + } +} + +bool dplane_ctx_is_update(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_is_update; +} + +uint32_t dplane_ctx_get_seq(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_seq; +} + +uint32_t dplane_ctx_get_old_seq(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_old_seq; +} + +void dplane_ctx_set_vrf(struct zebra_dplane_ctx *ctx, vrf_id_t vrf) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_vrf_id = vrf; +} + +vrf_id_t dplane_ctx_get_vrf(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_vrf_id; +} + +/* In some paths we have only a namespace id */ +void dplane_ctx_set_ns_id(struct zebra_dplane_ctx *ctx, ns_id_t nsid) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_ns_info.ns_id = nsid; +} + +ns_id_t dplane_ctx_get_ns_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_ns_info.ns_id; +} + +bool dplane_ctx_is_from_notif(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->zd_notif_provider != 0); +} + +uint32_t dplane_ctx_get_notif_provider(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_notif_provider; +} + +void dplane_ctx_set_notif_provider(struct zebra_dplane_ctx *ctx, + uint32_t id) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_notif_provider = id; +} + +const char *dplane_ctx_get_ifname(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_ifname; +} + +void dplane_ctx_set_ifname(struct zebra_dplane_ctx *ctx, const char *ifname) +{ + DPLANE_CTX_VALID(ctx); + + if (!ifname) + return; + + strlcpy(ctx->zd_ifname, ifname, sizeof(ctx->zd_ifname)); +} + +ifindex_t dplane_ctx_get_ifindex(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_ifindex; +} + +void dplane_ctx_set_ifindex(struct zebra_dplane_ctx *ctx, ifindex_t ifindex) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_ifindex = ifindex; +} + +void dplane_ctx_set_type(struct zebra_dplane_ctx *ctx, int type) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rinfo.zd_type = type; +} + +int dplane_ctx_get_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_type; +} + +int dplane_ctx_get_old_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_old_type; +} + +void dplane_ctx_set_afi(struct zebra_dplane_ctx *ctx, afi_t afi) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rinfo.zd_afi = afi; +} + +afi_t dplane_ctx_get_afi(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_afi; +} + +void dplane_ctx_set_safi(struct zebra_dplane_ctx *ctx, safi_t safi) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rinfo.zd_safi = safi; +} + +safi_t dplane_ctx_get_safi(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_safi; +} + +void dplane_ctx_set_table(struct zebra_dplane_ctx *ctx, uint32_t table) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_table_id = table; +} + +uint32_t dplane_ctx_get_table(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_table_id; +} + +route_tag_t dplane_ctx_get_tag(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_tag; +} + +void dplane_ctx_set_tag(struct zebra_dplane_ctx *ctx, route_tag_t tag) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rinfo.zd_tag = tag; +} + +route_tag_t dplane_ctx_get_old_tag(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_old_tag; +} + +uint16_t dplane_ctx_get_instance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_instance; +} + +void dplane_ctx_set_instance(struct zebra_dplane_ctx *ctx, uint16_t instance) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rinfo.zd_instance = instance; +} + +uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_old_instance; +} + +uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_metric; +} + +uint32_t dplane_ctx_get_old_metric(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_old_metric; +} + +uint32_t dplane_ctx_get_mtu(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_mtu; +} + +uint32_t dplane_ctx_get_nh_mtu(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_nexthop_mtu; +} + +uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_distance; +} + +void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rinfo.zd_distance = distance; +} + +uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rinfo.zd_old_distance; +} + +uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.rate; +} + +uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.ceil; +} + +uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.filter_bm; +} + +const struct prefix * +dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.tc.src_ip); +} + +const struct prefix * +dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.tc.dst_ip); +} + +uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.ip_proto; +} + +/* + * Set the nexthops associated with a context: note that processing code + * may well expect that nexthops are in canonical (sorted) order, so we + * will enforce that here. + */ +void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh) +{ + DPLANE_CTX_VALID(ctx); + + if (ctx->u.rinfo.zd_ng.nexthop) { + nexthops_free(ctx->u.rinfo.zd_ng.nexthop); + ctx->u.rinfo.zd_ng.nexthop = NULL; + } + nexthop_group_copy_nh_sorted(&(ctx->u.rinfo.zd_ng), nh); +} + +/* + * Set the list of backup nexthops; their ordering is preserved (they're not + * re-sorted.) + */ +void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx, + const struct nexthop_group *nhg) +{ + struct nexthop *nh, *last_nh, *nexthop; + + DPLANE_CTX_VALID(ctx); + + if (ctx->u.rinfo.backup_ng.nexthop) { + nexthops_free(ctx->u.rinfo.backup_ng.nexthop); + ctx->u.rinfo.backup_ng.nexthop = NULL; + } + + last_nh = NULL; + + /* Be careful to preserve the order of the backup list */ + for (nh = nhg->nexthop; nh; nh = nh->next) { + nexthop = nexthop_dup(nh, NULL); + + if (last_nh) + NEXTHOP_APPEND(last_nh, nexthop); + else + ctx->u.rinfo.backup_ng.nexthop = nexthop; + + last_nh = nexthop; + } +} + +uint32_t dplane_ctx_get_nhg_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.zd_nhg_id; +} + +const struct nexthop_group *dplane_ctx_get_ng( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.zd_ng); +} + +const struct nexthop_group * +dplane_ctx_get_backup_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.backup_ng); +} + +const struct nexthop_group * +dplane_ctx_get_old_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.zd_old_ng); +} + +const struct nexthop_group * +dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.old_backup_ng); +} + +const struct zebra_dplane_info *dplane_ctx_get_ns( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->zd_ns_info); +} + +int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + +#ifdef HAVE_NETLINK + return ctx->zd_ns_info.sock; +#else + return -1; +#endif +} + +/* Accessors for nexthop information */ +uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.id; +} + +uint32_t dplane_ctx_get_old_nhe_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.old_id; +} + +afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.afi; +} + +vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.vrf_id; +} + +int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.type; +} + +const struct nexthop_group * +dplane_ctx_get_nhe_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.rinfo.nhe.ng); +} + +const struct nh_grp * +dplane_ctx_get_nhe_nh_grp(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.nh_grp; +} + +uint8_t dplane_ctx_get_nhe_nh_grp_count(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.nh_grp_count; +} + +/* Accessors for LSP information */ + +mpls_label_t dplane_ctx_get_in_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.lsp.ile.in_label; +} + +void dplane_ctx_set_in_label(struct zebra_dplane_ctx *ctx, mpls_label_t label) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.lsp.ile.in_label = label; +} + +uint8_t dplane_ctx_get_addr_family(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.lsp.addr_family; +} + +void dplane_ctx_set_addr_family(struct zebra_dplane_ctx *ctx, + uint8_t family) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.lsp.addr_family = family; +} + +uint32_t dplane_ctx_get_lsp_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.lsp.flags; +} + +void dplane_ctx_set_lsp_flags(struct zebra_dplane_ctx *ctx, + uint32_t flags) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.lsp.flags = flags; +} + +const struct nhlfe_list_head *dplane_ctx_get_nhlfe_list( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.lsp.nhlfe_list); +} + +const struct nhlfe_list_head *dplane_ctx_get_backup_nhlfe_list( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.lsp.backup_nhlfe_list); +} + +struct zebra_nhlfe *dplane_ctx_add_nhlfe(struct zebra_dplane_ctx *ctx, + enum lsp_types_t lsp_type, + enum nexthop_types_t nh_type, + const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, + mpls_label_t *out_labels) +{ + struct zebra_nhlfe *nhlfe; + + DPLANE_CTX_VALID(ctx); + + nhlfe = zebra_mpls_lsp_add_nhlfe(&(ctx->u.lsp), + lsp_type, nh_type, gate, + ifindex, num_labels, out_labels); + + return nhlfe; +} + +struct zebra_nhlfe *dplane_ctx_add_backup_nhlfe( + struct zebra_dplane_ctx *ctx, enum lsp_types_t lsp_type, + enum nexthop_types_t nh_type, const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, mpls_label_t *out_labels) +{ + struct zebra_nhlfe *nhlfe; + + DPLANE_CTX_VALID(ctx); + + nhlfe = zebra_mpls_lsp_add_backup_nhlfe(&(ctx->u.lsp), + lsp_type, nh_type, gate, + ifindex, num_labels, + out_labels); + + return nhlfe; +} + +const struct zebra_nhlfe * +dplane_ctx_get_best_nhlfe(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.lsp.best_nhlfe; +} + +const struct zebra_nhlfe * +dplane_ctx_set_best_nhlfe(struct zebra_dplane_ctx *ctx, + struct zebra_nhlfe *nhlfe) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.lsp.best_nhlfe = nhlfe; + return ctx->u.lsp.best_nhlfe; +} + +uint32_t dplane_ctx_get_lsp_num_ecmp(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.lsp.num_ecmp; +} + +mpls_label_t dplane_ctx_get_pw_local_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.pw.local_label; +} + +mpls_label_t dplane_ctx_get_pw_remote_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.pw.remote_label; +} + +int dplane_ctx_get_pw_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.pw.type; +} + +int dplane_ctx_get_pw_af(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.pw.af; +} + +uint32_t dplane_ctx_get_pw_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.pw.flags; +} + +int dplane_ctx_get_pw_status(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.pw.status; +} + +void dplane_ctx_set_pw_status(struct zebra_dplane_ctx *ctx, int status) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.pw.status = status; +} + +const union g_addr *dplane_ctx_get_pw_dest( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.pw.dest); +} + +const union pw_protocol_fields *dplane_ctx_get_pw_proto( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.pw.fields); +} + +const struct nexthop_group * +dplane_ctx_get_pw_nhg(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.pw.fib_nhg); +} + +const struct nexthop_group * +dplane_ctx_get_pw_primary_nhg(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.pw.primary_nhg); +} + +const struct nexthop_group * +dplane_ctx_get_pw_backup_nhg(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.pw.backup_nhg); +} + +/* Accessors for interface information */ +uint32_t dplane_ctx_get_intf_metric(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.metric; +} + +void dplane_ctx_set_intf_metric(struct zebra_dplane_ctx *ctx, uint32_t metric) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.intf.metric = metric; +} + +uint32_t dplane_ctx_get_intf_pd_reason_val(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.pd_reason_val; +} + +void dplane_ctx_set_intf_pd_reason_val(struct zebra_dplane_ctx *ctx, bool val) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.intf.pd_reason_val = val; +} + +bool dplane_ctx_intf_is_protodown(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.protodown; +} + +/* Is interface addr p2p? */ +bool dplane_ctx_intf_is_connected(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_CONNECTED); +} + +bool dplane_ctx_intf_is_secondary(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_SECONDARY); +} + +bool dplane_ctx_intf_is_broadcast(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_BROADCAST); +} + +void dplane_ctx_intf_set_connected(struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.intf.flags |= DPLANE_INTF_CONNECTED; +} + +void dplane_ctx_intf_set_secondary(struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.intf.flags |= DPLANE_INTF_SECONDARY; +} + +void dplane_ctx_intf_set_broadcast(struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.intf.flags |= DPLANE_INTF_BROADCAST; +} + +const struct prefix *dplane_ctx_get_intf_addr( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.intf.prefix); +} + +void dplane_ctx_set_intf_addr(struct zebra_dplane_ctx *ctx, + const struct prefix *p) +{ + DPLANE_CTX_VALID(ctx); + + prefix_copy(&(ctx->u.intf.prefix), p); +} + +bool dplane_ctx_intf_has_dest(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_HAS_DEST); +} + +const struct prefix *dplane_ctx_get_intf_dest( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.intf.dest_prefix); +} + +void dplane_ctx_set_intf_dest(struct zebra_dplane_ctx *ctx, + const struct prefix *p) +{ + DPLANE_CTX_VALID(ctx); + + prefix_copy(&(ctx->u.intf.dest_prefix), p); +} + +bool dplane_ctx_intf_has_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_HAS_LABEL); +} + +const char *dplane_ctx_get_intf_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.label; +} + +void dplane_ctx_set_intf_label(struct zebra_dplane_ctx *ctx, const char *label) +{ + size_t len; + + DPLANE_CTX_VALID(ctx); + + if (ctx->u.intf.label && ctx->u.intf.label != ctx->u.intf.label_buf) + XFREE(MTYPE_DP_CTX, ctx->u.intf.label); + + ctx->u.intf.label = NULL; + + if (label) { + ctx->u.intf.flags |= DPLANE_INTF_HAS_LABEL; + + /* Use embedded buffer if it's adequate; else allocate. */ + len = strlen(label); + + if (len < sizeof(ctx->u.intf.label_buf)) { + strlcpy(ctx->u.intf.label_buf, label, + sizeof(ctx->u.intf.label_buf)); + ctx->u.intf.label = ctx->u.intf.label_buf; + } else { + ctx->u.intf.label = XSTRDUP(MTYPE_DP_CTX, label); + } + } else { + ctx->u.intf.flags &= ~DPLANE_INTF_HAS_LABEL; + } +} + +/* Accessors for MAC information */ +vlanid_t dplane_ctx_mac_get_vlan(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.vid; +} + +bool dplane_ctx_mac_is_sticky(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.is_sticky; +} + +uint32_t dplane_ctx_mac_get_nhg_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.nhg_id; +} + +uint32_t dplane_ctx_mac_get_update_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.update_flags; +} + +const struct ethaddr *dplane_ctx_mac_get_addr( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.macinfo.mac); +} + +const struct in_addr *dplane_ctx_mac_get_vtep_ip( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.macinfo.vtep_ip); +} + +ifindex_t dplane_ctx_mac_get_br_ifindex(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.br_ifindex; +} + +/* Accessors for neighbor information */ +const struct ipaddr *dplane_ctx_neigh_get_ipaddr( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.neigh.ip_addr); +} + +const struct ipaddr * +dplane_ctx_neigh_get_link_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.neigh.link.ip_addr); +} + +const struct ethaddr *dplane_ctx_neigh_get_mac( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return &(ctx->u.neigh.link.mac); +} + +uint32_t dplane_ctx_neigh_get_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.neigh.flags; +} + +uint16_t dplane_ctx_neigh_get_state(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.neigh.state; +} + +uint32_t dplane_ctx_neigh_get_update_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.neigh.update_flags; +} + +/* Accessor for GRE set */ +uint32_t +dplane_ctx_gre_get_link_ifindex(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.gre.link_ifindex; +} + +unsigned int +dplane_ctx_gre_get_mtu(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.gre.mtu; +} + +const struct zebra_l2info_gre * +dplane_ctx_gre_get_info(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &ctx->u.gre.info; +} + +/* Accessors for PBR rule information */ +int dplane_ctx_rule_get_sock(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.sock; +} + +const char *dplane_ctx_rule_get_ifname(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.ifname; +} + +int dplane_ctx_rule_get_unique(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.unique; +} + +int dplane_ctx_rule_get_seq(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.seq; +} + +uint32_t dplane_ctx_rule_get_priority(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.priority; +} + +uint32_t dplane_ctx_rule_get_old_priority(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.priority; +} + +uint32_t dplane_ctx_rule_get_table(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.table; +} + +uint32_t dplane_ctx_rule_get_old_table(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.table; +} + +uint32_t dplane_ctx_rule_get_filter_bm(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.filter_bm; +} + +uint32_t dplane_ctx_rule_get_old_filter_bm(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.filter_bm; +} + +uint32_t dplane_ctx_rule_get_fwmark(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.fwmark; +} + +uint32_t dplane_ctx_rule_get_old_fwmark(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.fwmark; +} + +uint8_t dplane_ctx_rule_get_ipproto(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.ip_proto; +} + +uint8_t dplane_ctx_rule_get_old_ipproto(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.ip_proto; +} + +uint16_t dplane_ctx_rule_get_src_port(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.src_port; +} + +uint16_t dplane_ctx_rule_get_old_src_port(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.src_port; +} + +uint16_t dplane_ctx_rule_get_dst_port(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.dst_port; +} + +uint16_t dplane_ctx_rule_get_old_dst_port(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.dst_port; +} + +uint8_t dplane_ctx_rule_get_dsfield(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.dsfield; +} + +uint8_t dplane_ctx_rule_get_old_dsfield(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.dsfield; +} + +const struct prefix * +dplane_ctx_rule_get_src_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rule.new.src_ip); +} + +const struct prefix * +dplane_ctx_rule_get_old_src_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rule.old.src_ip); +} + +const struct prefix * +dplane_ctx_rule_get_dst_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rule.new.dst_ip); +} + +const struct prefix * +dplane_ctx_rule_get_old_dst_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rule.old.dst_ip); +} + +uint32_t dplane_ctx_get_br_port_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.br_port.flags; +} + +uint32_t +dplane_ctx_get_br_port_sph_filter_cnt(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.br_port.sph_filter_cnt; +} + +const struct in_addr * +dplane_ctx_get_br_port_sph_filters(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.br_port.sph_filters; +} + +uint32_t +dplane_ctx_get_br_port_backup_nhg_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.br_port.backup_nhg_id; +} + +/* Accessors for PBR iptable information */ +void dplane_ctx_get_pbr_iptable(const struct zebra_dplane_ctx *ctx, + struct zebra_pbr_iptable *table) +{ + DPLANE_CTX_VALID(ctx); + + memcpy(table, &ctx->u.iptable, sizeof(struct zebra_pbr_iptable)); +} + +void dplane_ctx_get_pbr_ipset(const struct zebra_dplane_ctx *ctx, + struct zebra_pbr_ipset *ipset) +{ + DPLANE_CTX_VALID(ctx); + + assert(ipset); + + if (ctx->zd_op == DPLANE_OP_IPSET_ENTRY_ADD || + ctx->zd_op == DPLANE_OP_IPSET_ENTRY_DELETE) { + memset(ipset, 0, sizeof(struct zebra_pbr_ipset)); + ipset->type = ctx->u.ipset_entry.info.type; + ipset->family = ctx->u.ipset_entry.info.family; + memcpy(&ipset->ipset_name, &ctx->u.ipset_entry.info.ipset_name, + ZEBRA_IPSET_NAME_SIZE); + } else + memcpy(ipset, &ctx->u.ipset, sizeof(struct zebra_pbr_ipset)); +} + +void dplane_ctx_get_pbr_ipset_entry(const struct zebra_dplane_ctx *ctx, + struct zebra_pbr_ipset_entry *entry) +{ + DPLANE_CTX_VALID(ctx); + + assert(entry); + + memcpy(entry, &ctx->u.ipset_entry.entry, sizeof(struct zebra_pbr_ipset_entry)); +} + +const struct ethaddr * +dplane_ctx_rule_get_smac(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rule.new.smac); +} + +const struct ethaddr * +dplane_ctx_rule_get_dmac(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rule.new.dmac); +} + +int dplane_ctx_rule_get_out_ifindex(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.out_ifindex; +} + +intptr_t dplane_ctx_rule_get_old_dp_flow_ptr(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.old.dp_flow_ptr; +} + +intptr_t dplane_ctx_rule_get_dp_flow_ptr(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.rule.new.dp_flow_ptr; +} + +void dplane_ctx_rule_set_dp_flow_ptr(struct zebra_dplane_ctx *ctx, + intptr_t dp_flow_ptr) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.rule.new.dp_flow_ptr = dp_flow_ptr; +} + +/* + * End of dplane context accessors + */ + +/* Optional extra info about interfaces in nexthops - a plugin must enable + * this extra info. + */ +const struct dplane_intf_extra * +dplane_ctx_get_intf_extra(const struct zebra_dplane_ctx *ctx) +{ + return TAILQ_FIRST(&ctx->u.rinfo.intf_extra_q); +} + +const struct dplane_intf_extra * +dplane_ctx_intf_extra_next(const struct zebra_dplane_ctx *ctx, + const struct dplane_intf_extra *ptr) +{ + return TAILQ_NEXT(ptr, link); +} + +vrf_id_t dplane_intf_extra_get_vrfid(const struct dplane_intf_extra *ptr) +{ + return ptr->vrf_id; +} + +uint32_t dplane_intf_extra_get_ifindex(const struct dplane_intf_extra *ptr) +{ + return ptr->ifindex; +} + +uint32_t dplane_intf_extra_get_flags(const struct dplane_intf_extra *ptr) +{ + return ptr->flags; +} + +uint32_t dplane_intf_extra_get_status(const struct dplane_intf_extra *ptr) +{ + return ptr->status; +} + +/* + * End of interface extra info accessors + */ + +uint8_t dplane_ctx_neightable_get_family(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.neightable.family; +} + +uint32_t +dplane_ctx_neightable_get_app_probes(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.neightable.app_probes; +} + +uint32_t +dplane_ctx_neightable_get_ucast_probes(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.neightable.ucast_probes; +} + +uint32_t +dplane_ctx_neightable_get_mcast_probes(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.neightable.mcast_probes; +} + +enum dplane_netconf_status_e +dplane_ctx_get_netconf_mpls(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.netconf.mpls_val; +} + +enum dplane_netconf_status_e +dplane_ctx_get_netconf_mcast(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.netconf.mcast_val; +} + +enum dplane_netconf_status_e +dplane_ctx_get_netconf_linkdown(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.netconf.linkdown_val; +} + +void dplane_ctx_set_netconf_mpls(struct zebra_dplane_ctx *ctx, + enum dplane_netconf_status_e val) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.netconf.mpls_val = val; +} + +void dplane_ctx_set_netconf_mcast(struct zebra_dplane_ctx *ctx, + enum dplane_netconf_status_e val) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.netconf.mcast_val = val; +} + +void dplane_ctx_set_netconf_linkdown(struct zebra_dplane_ctx *ctx, + enum dplane_netconf_status_e val) +{ + DPLANE_CTX_VALID(ctx); + + ctx->u.netconf.linkdown_val = val; +} + + +/* + * Retrieve the limit on the number of pending, unprocessed updates. + */ +uint32_t dplane_get_in_queue_limit(void) +{ + return atomic_load_explicit(&zdplane_info.dg_max_queued_updates, + memory_order_relaxed); +} + +/* + * Configure limit on the number of pending, queued updates. + */ +void dplane_set_in_queue_limit(uint32_t limit, bool set) +{ + /* Reset to default on 'unset' */ + if (!set) + limit = DPLANE_DEFAULT_MAX_QUEUED; + + atomic_store_explicit(&zdplane_info.dg_max_queued_updates, limit, + memory_order_relaxed); +} + +/* + * Retrieve the current queue depth of incoming, unprocessed updates + */ +uint32_t dplane_get_in_queue_len(void) +{ + return atomic_load_explicit(&zdplane_info.dg_routes_queued, + memory_order_seq_cst); +} + +/* + * Internal helper that copies information from a zebra ns object; this is + * called in the zebra main pthread context as part of dplane ctx init. + */ +static void ctx_info_from_zns(struct zebra_dplane_info *ns_info, + struct zebra_ns *zns) +{ + ns_info->ns_id = zns->ns_id; + +#if defined(HAVE_NETLINK) + ns_info->is_cmd = true; + ns_info->sock = zns->netlink_dplane_out.sock; + ns_info->seq = zns->netlink_dplane_out.seq; +#endif /* NETLINK */ +} + +/* + * Common dataplane context init with zebra namespace info. + */ +static int dplane_ctx_ns_init(struct zebra_dplane_ctx *ctx, + struct zebra_ns *zns, + bool is_update) +{ + ctx_info_from_zns(&(ctx->zd_ns_info), zns); /* */ + + ctx->zd_is_update = is_update; + +#if defined(HAVE_NETLINK) + /* Increment message counter after copying to context struct - may need + * two messages in some 'update' cases. + */ + if (is_update) + zns->netlink_dplane_out.seq += 2; + else + zns->netlink_dplane_out.seq++; +#endif /* HAVE_NETLINK */ + + return AOK; +} + +/* + * Initialize a context block for a route update from zebra data structs. + */ +int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct route_node *rn, struct route_entry *re) +{ + int ret = EINVAL; + const struct route_table *table = NULL; + const struct rib_table_info *info; + const struct prefix *p, *src_p; + struct zebra_ns *zns; + struct zebra_vrf *zvrf; + struct nexthop *nexthop; + struct zebra_l3vni *zl3vni; + const struct interface *ifp; + struct dplane_intf_extra *if_extra; + + if (!ctx || !rn || !re) + goto done; + + TAILQ_INIT(&ctx->u.rinfo.intf_extra_q); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + ctx->u.rinfo.zd_type = re->type; + ctx->u.rinfo.zd_old_type = re->type; + + /* Prefixes: dest, and optional source */ + srcdest_rnode_prefixes(rn, &p, &src_p); + + prefix_copy(&(ctx->u.rinfo.zd_dest), p); + + if (src_p) + prefix_copy(&(ctx->u.rinfo.zd_src), src_p); + else + memset(&(ctx->u.rinfo.zd_src), 0, sizeof(ctx->u.rinfo.zd_src)); + + ctx->zd_table_id = re->table; + + ctx->u.rinfo.zd_metric = re->metric; + ctx->u.rinfo.zd_old_metric = re->metric; + ctx->zd_vrf_id = re->vrf_id; + ctx->u.rinfo.zd_mtu = re->mtu; + ctx->u.rinfo.zd_nexthop_mtu = re->nexthop_mtu; + ctx->u.rinfo.zd_instance = re->instance; + ctx->u.rinfo.zd_tag = re->tag; + ctx->u.rinfo.zd_old_tag = re->tag; + ctx->u.rinfo.zd_distance = re->distance; + + table = srcdest_rnode_table(rn); + info = table->info; + + ctx->u.rinfo.zd_afi = info->afi; + ctx->u.rinfo.zd_safi = info->safi; + + /* Copy nexthops; recursive info is included too */ + copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), + re->nhe->nhg.nexthop, NULL); + ctx->u.rinfo.zd_nhg_id = re->nhe->id; + + /* Copy backup nexthop info, if present */ + if (re->nhe->backup_info && re->nhe->backup_info->nhe) { + copy_nexthops(&(ctx->u.rinfo.backup_ng.nexthop), + re->nhe->backup_info->nhe->nhg.nexthop, NULL); + } + + /* + * Ensure that the dplane nexthops' flags are clear and copy + * encapsulation information. + */ + for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) { + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + /* Optionally capture extra interface info while we're in the + * main zebra pthread - a plugin has to ask for this info. + */ + if (dplane_collect_extra_intf_info) { + ifp = if_lookup_by_index(nexthop->ifindex, + nexthop->vrf_id); + + if (ifp) { + if_extra = XCALLOC( + MTYPE_DP_INTF, + sizeof(struct dplane_intf_extra)); + if_extra->vrf_id = nexthop->vrf_id; + if_extra->ifindex = nexthop->ifindex; + if_extra->flags = ifp->flags; + if_extra->status = ifp->status; + + TAILQ_INSERT_TAIL(&ctx->u.rinfo.intf_extra_q, + if_extra, link); + } + } + + /* Check for available evpn encapsulations. */ + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN)) + continue; + + zl3vni = zl3vni_from_vrf(nexthop->vrf_id); + if (zl3vni && is_l3vni_oper_up(zl3vni)) { + nexthop->nh_encap_type = NET_VXLAN; + nexthop->nh_encap.vni = zl3vni->vni; + } + } + + /* Don't need some info when capturing a system notification */ + if (op == DPLANE_OP_SYS_ROUTE_ADD || + op == DPLANE_OP_SYS_ROUTE_DELETE) { + ret = AOK; + goto done; + } + + /* Extract ns info - can't use pointers to 'core' structs */ + zvrf = vrf_info_lookup(re->vrf_id); + zns = zvrf->zns; + dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_ROUTE_UPDATE)); + +#ifdef HAVE_NETLINK + { + struct nhg_hash_entry *nhe = zebra_nhg_resolve(re->nhe); + + ctx->u.rinfo.nhe.id = nhe->id; + ctx->u.rinfo.nhe.old_id = 0; + /* + * Check if the nhe is installed/queued before doing anything + * with this route. + * + * If its a delete we only use the prefix anyway, so this only + * matters for INSTALL/UPDATE. + */ + if (zebra_nhg_kernel_nexthops_enabled() + && (((op == DPLANE_OP_ROUTE_INSTALL) + || (op == DPLANE_OP_ROUTE_UPDATE)) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED))) { + ret = ENOENT; + goto done; + } + + re->nhe_installed_id = nhe->id; + } +#endif /* HAVE_NETLINK */ + + /* Trying out the sequence number idea, so we can try to detect + * when a result is stale. + */ + re->dplane_sequence = zebra_router_get_next_sequence(); + ctx->zd_seq = re->dplane_sequence; + + ret = AOK; + +done: + return ret; +} + +int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op) +{ + int ret = EINVAL; + + struct zebra_ns *zns = NULL; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* TODO: init traffic control qdisc */ + zns = zebra_ns_lookup(NS_DEFAULT); + + dplane_ctx_ns_init(ctx, zns, true); + + ret = AOK; + + return ret; +} + +/** + * dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @nhe: Nexthop group hash entry + * + * Return: Result status + */ +int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct nhg_hash_entry *nhe) +{ + struct zebra_vrf *zvrf = NULL; + struct zebra_ns *zns = NULL; + int ret = EINVAL; + + if (!ctx || !nhe) + goto done; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* Copy over nhe info */ + ctx->u.rinfo.nhe.id = nhe->id; + ctx->u.rinfo.nhe.afi = nhe->afi; + ctx->u.rinfo.nhe.vrf_id = nhe->vrf_id; + ctx->u.rinfo.nhe.type = nhe->type; + + nexthop_group_copy(&(ctx->u.rinfo.nhe.ng), &(nhe->nhg)); + + /* If this is a group, convert it to a grp array of ids */ + if (!zebra_nhg_depends_is_empty(nhe) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE)) + ctx->u.rinfo.nhe.nh_grp_count = zebra_nhg_nhe2grp( + ctx->u.rinfo.nhe.nh_grp, nhe, MULTIPATH_NUM); + + zvrf = vrf_info_lookup(nhe->vrf_id); + + /* + * Fallback to default namespace if the vrf got ripped out from under + * us. + */ + zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT); + + /* + * TODO: Might not need to mark this as an update, since + * it probably won't require two messages + */ + dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_NH_UPDATE)); + + ret = AOK; + +done: + return ret; +} + +/** + * dplane_ctx_intf_init() - Initialize a context block for a interface update + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @ifp: Interface + * + * Return: Result status + */ +int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + const struct interface *ifp) +{ + struct zebra_ns *zns; + struct zebra_if *zif; + int ret = EINVAL; + bool set_pdown, unset_pdown; + + if (!ctx || !ifp) + goto done; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + ctx->zd_ifindex = ifp->ifindex; + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + + /* Copy over ifp info */ + ctx->u.intf.metric = ifp->metric; + ctx->u.intf.flags = ifp->flags; + + /* Copy over extra zebra info, if available */ + zif = (struct zebra_if *)ifp->info; + + if (zif) { + set_pdown = !!(zif->flags & ZIF_FLAG_SET_PROTODOWN); + unset_pdown = !!(zif->flags & ZIF_FLAG_UNSET_PROTODOWN); + + if (zif->protodown_rc && + ZEBRA_IF_IS_PROTODOWN_ONLY_EXTERNAL(zif) == false) + ctx->u.intf.pd_reason_val = true; + + /* + * See if we have new protodown state to set, otherwise keep + * current state + */ + if (set_pdown) + ctx->u.intf.protodown = true; + else if (unset_pdown) + ctx->u.intf.protodown = false; + else + ctx->u.intf.protodown = !!ZEBRA_IF_IS_PROTODOWN(zif); + } + + dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_INTF_UPDATE)); + ctx->zd_is_update = (op == DPLANE_OP_INTF_UPDATE); + + ret = AOK; + +done: + return ret; +} + +/* + * Capture information for an LSP update in a dplane context. + */ +int dplane_ctx_lsp_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct zebra_lsp *lsp) +{ + int ret = AOK; + struct zebra_nhlfe *nhlfe, *new_nhlfe; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* Capture namespace info */ + dplane_ctx_ns_init(ctx, zebra_ns_lookup(NS_DEFAULT), + (op == DPLANE_OP_LSP_UPDATE)); + + memset(&ctx->u.lsp, 0, sizeof(ctx->u.lsp)); + + nhlfe_list_init(&(ctx->u.lsp.nhlfe_list)); + nhlfe_list_init(&(ctx->u.lsp.backup_nhlfe_list)); + + /* This may be called to create/init a dplane context, not necessarily + * to copy an lsp object. + */ + if (lsp == NULL) { + ret = AOK; + goto done; + } + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("init dplane ctx %s: in-label %u ecmp# %d", + dplane_op2str(op), lsp->ile.in_label, + lsp->num_ecmp); + + ctx->u.lsp.ile = lsp->ile; + ctx->u.lsp.addr_family = lsp->addr_family; + ctx->u.lsp.num_ecmp = lsp->num_ecmp; + ctx->u.lsp.flags = lsp->flags; + + /* Copy source LSP's nhlfes, and capture 'best' nhlfe */ + frr_each(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + /* Not sure if this is meaningful... */ + if (nhlfe->nexthop == NULL) + continue; + + new_nhlfe = zebra_mpls_lsp_add_nh(&(ctx->u.lsp), nhlfe->type, + nhlfe->nexthop); + if (new_nhlfe == NULL || new_nhlfe->nexthop == NULL) { + ret = ENOMEM; + break; + } + + /* Need to copy flags and backup info too */ + new_nhlfe->flags = nhlfe->flags; + new_nhlfe->nexthop->flags = nhlfe->nexthop->flags; + + if (CHECK_FLAG(new_nhlfe->nexthop->flags, + NEXTHOP_FLAG_HAS_BACKUP)) { + new_nhlfe->nexthop->backup_num = + nhlfe->nexthop->backup_num; + memcpy(new_nhlfe->nexthop->backup_idx, + nhlfe->nexthop->backup_idx, + new_nhlfe->nexthop->backup_num); + } + + if (nhlfe == lsp->best_nhlfe) + ctx->u.lsp.best_nhlfe = new_nhlfe; + } + + if (ret != AOK) + goto done; + + /* Capture backup nhlfes/nexthops */ + frr_each(nhlfe_list, &lsp->backup_nhlfe_list, nhlfe) { + /* Not sure if this is meaningful... */ + if (nhlfe->nexthop == NULL) + continue; + + new_nhlfe = zebra_mpls_lsp_add_backup_nh(&(ctx->u.lsp), + nhlfe->type, + nhlfe->nexthop); + if (new_nhlfe == NULL || new_nhlfe->nexthop == NULL) { + ret = ENOMEM; + break; + } + + /* Need to copy flags too */ + new_nhlfe->flags = nhlfe->flags; + new_nhlfe->nexthop->flags = nhlfe->nexthop->flags; + } + + /* On error the ctx will be cleaned-up, so we don't need to + * deal with any allocated nhlfe or nexthop structs here. + */ +done: + + return ret; +} + +/* + * Capture information for an LSP update in a dplane context. + */ +static int dplane_ctx_pw_init(struct zebra_dplane_ctx *ctx, + enum dplane_op_e op, + struct zebra_pw *pw) +{ + int ret = EINVAL; + struct prefix p; + afi_t afi; + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + const struct nexthop_group *nhg; + struct nexthop *nh, *newnh, *last_nh; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("init dplane ctx %s: pw '%s', loc %u, rem %u", + dplane_op2str(op), pw->ifname, pw->local_label, + pw->remote_label); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* Capture namespace info: no netlink support as of 12/18, + * but just in case... + */ + dplane_ctx_ns_init(ctx, zebra_ns_lookup(NS_DEFAULT), false); + + memset(&ctx->u.pw, 0, sizeof(ctx->u.pw)); + + /* This name appears to be c-string, so we use string copy. */ + strlcpy(ctx->zd_ifname, pw->ifname, sizeof(ctx->zd_ifname)); + + ctx->zd_vrf_id = pw->vrf_id; + ctx->zd_ifindex = pw->ifindex; + ctx->u.pw.type = pw->type; + ctx->u.pw.af = pw->af; + ctx->u.pw.local_label = pw->local_label; + ctx->u.pw.remote_label = pw->remote_label; + ctx->u.pw.flags = pw->flags; + + ctx->u.pw.dest = pw->nexthop; + + ctx->u.pw.fields = pw->data; + + /* Capture nexthop info for the pw destination. We need to look + * up and use zebra datastructs, but we're running in the zebra + * pthread here so that should be ok. + */ + memcpy(&p.u, &pw->nexthop, sizeof(pw->nexthop)); + p.family = pw->af; + p.prefixlen = ((pw->af == AF_INET) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN); + + afi = (pw->af == AF_INET) ? AFI_IP : AFI_IP6; + table = zebra_vrf_table(afi, SAFI_UNICAST, pw->vrf_id); + if (table == NULL) + goto done; + + rn = route_node_match(table, &p); + if (rn == NULL) + goto done; + + re = NULL; + RNODE_FOREACH_RE(rn, re) { + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) + break; + } + + if (re) { + /* We'll capture a 'fib' list of nexthops that meet our + * criteria: installed, and labelled. + */ + nhg = rib_get_fib_nhg(re); + last_nh = NULL; + + if (nhg && nhg->nexthop) { + for (ALL_NEXTHOPS_PTR(nhg, nh)) { + if (!CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) + || CHECK_FLAG(nh->flags, + NEXTHOP_FLAG_RECURSIVE) + || nh->nh_label == NULL) + continue; + + newnh = nexthop_dup(nh, NULL); + + if (last_nh) + NEXTHOP_APPEND(last_nh, newnh); + else + ctx->u.pw.fib_nhg.nexthop = newnh; + last_nh = newnh; + } + } + + /* Include any installed backup nexthops also. */ + nhg = rib_get_fib_backup_nhg(re); + if (nhg && nhg->nexthop) { + for (ALL_NEXTHOPS_PTR(nhg, nh)) { + if (!CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) + || CHECK_FLAG(nh->flags, + NEXTHOP_FLAG_RECURSIVE) + || nh->nh_label == NULL) + continue; + + newnh = nexthop_dup(nh, NULL); + + if (last_nh) + NEXTHOP_APPEND(last_nh, newnh); + else + ctx->u.pw.fib_nhg.nexthop = newnh; + last_nh = newnh; + } + } + + /* Copy primary nexthops; recursive info is included too */ + assert(re->nhe != NULL); /* SA warning */ + copy_nexthops(&(ctx->u.pw.primary_nhg.nexthop), + re->nhe->nhg.nexthop, NULL); + ctx->u.pw.nhg_id = re->nhe->id; + + /* Copy backup nexthop info, if present */ + if (re->nhe->backup_info && re->nhe->backup_info->nhe) { + copy_nexthops(&(ctx->u.pw.backup_nhg.nexthop), + re->nhe->backup_info->nhe->nhg.nexthop, + NULL); + } + } + route_unlock_node(rn); + + ret = AOK; + +done: + return ret; +} + +/** + * dplane_ctx_rule_init_single() - Initialize a dataplane representation of a + * PBR rule. + * + * @dplane_rule: Dataplane internal representation of a rule + * @rule: PBR rule + */ +static void dplane_ctx_rule_init_single(struct dplane_ctx_rule *dplane_rule, + struct zebra_pbr_rule *rule) +{ + struct zebra_neigh_ent *n; + + dplane_rule->priority = rule->rule.priority; + dplane_rule->table = rule->rule.action.table; + + dplane_rule->filter_bm = rule->rule.filter.filter_bm; + dplane_rule->fwmark = rule->rule.filter.fwmark; + dplane_rule->dsfield = rule->rule.filter.dsfield; + dplane_rule->ip_proto = rule->rule.filter.ip_proto; + dplane_rule->src_port = rule->rule.filter.src_port; + dplane_rule->dst_port = rule->rule.filter.dst_port; + prefix_copy(&(dplane_rule->dst_ip), &rule->rule.filter.dst_ip); + prefix_copy(&(dplane_rule->src_ip), &rule->rule.filter.src_ip); + + dplane_rule->action_pcp = rule->rule.action.pcp; + dplane_rule->action_vlan_flags = rule->rule.action.vlan_flags; + dplane_rule->action_vlan_id = rule->rule.action.vlan_id; + dplane_rule->action_queue_id = rule->rule.action.queue_id; + + strlcpy(dplane_rule->ifname, rule->ifname, INTERFACE_NAMSIZ); + dplane_rule->dp_flow_ptr = rule->action.dp_flow_ptr; + n = rule->action.neigh; + if (n && (n->flags & ZEBRA_NEIGH_ENT_ACTIVE)) { + struct interface *ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(NS_DEFAULT), n->ifindex); + if (ifp) { + dplane_rule->out_ifindex = n->ifindex; + memcpy(&dplane_rule->dmac, &n->mac, ETH_ALEN); + memcpy(&dplane_rule->smac, ifp->hw_addr, ETH_ALEN); + } else { + dplane_rule->out_ifindex = 0; + } + } +} + +/** + * dplane_ctx_rule_init() - Initialize a context block for a PBR rule update. + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @new_rule: PBR rule + * + * Return: Result status + */ +static int dplane_ctx_rule_init(struct zebra_dplane_ctx *ctx, + enum dplane_op_e op, + struct zebra_pbr_rule *new_rule, + struct zebra_pbr_rule *old_rule) +{ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "init dplane ctx %s: IF %s Prio %u Fwmark %u Src %pFX Dst %pFX Table %u", + dplane_op2str(op), new_rule->ifname, + new_rule->rule.priority, new_rule->rule.filter.fwmark, + &new_rule->rule.filter.src_ip, + &new_rule->rule.filter.dst_ip, + new_rule->rule.action.table); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + dplane_ctx_ns_init(ctx, zebra_ns_lookup(NS_DEFAULT), + op == DPLANE_OP_RULE_UPDATE); + + ctx->zd_vrf_id = new_rule->vrf_id; + strlcpy(ctx->zd_ifname, new_rule->ifname, sizeof(ctx->zd_ifname)); + + ctx->u.rule.sock = new_rule->sock; + ctx->u.rule.unique = new_rule->rule.unique; + ctx->u.rule.seq = new_rule->rule.seq; + + dplane_ctx_rule_init_single(&ctx->u.rule.new, new_rule); + if (op == DPLANE_OP_RULE_UPDATE) { + dplane_ctx_rule_init_single(&ctx->u.rule.old, old_rule); + /* clear the dp_flow_ptr in the old_rule - it is about to be + * deleted + */ + old_rule->action.dp_flow_ptr = (intptr_t)NULL; + } + + return AOK; +} + +static void zebra_dplane_interface_name_list_deletion(void *data) +{ + XFREE(MTYPE_DP_NETFILTER, data); +} + +/** + * dplane_ctx_iptable_init() - Initialize a context block for a PBR iptable + * update. + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @new_rule: PBR iptable + * + * Return: Result status + */ +static int dplane_ctx_iptable_init(struct zebra_dplane_ctx *ctx, + enum dplane_op_e op, + struct zebra_pbr_iptable *iptable) +{ + char *ifname; + struct listnode *node; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug( + "init dplane ctx %s: Unique %u Fwmark %u Family %s Action %s", + dplane_op2str(op), iptable->unique, iptable->fwmark, + family2str(iptable->family), + iptable->action == ZEBRA_IPTABLES_DROP ? "Drop" + : "Forward"); + } + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + dplane_ctx_ns_init(ctx, zebra_ns_lookup(NS_DEFAULT), false); + + ctx->zd_vrf_id = iptable->vrf_id; + memcpy(&ctx->u.iptable, iptable, sizeof(struct zebra_pbr_iptable)); + if (iptable->nb_interface > 0) { + ctx->u.iptable.interface_name_list = list_new(); + ctx->u.iptable.interface_name_list->del = + zebra_dplane_interface_name_list_deletion; + for (ALL_LIST_ELEMENTS_RO(iptable->interface_name_list, node, + ifname)) { + listnode_add(ctx->u.iptable.interface_name_list, + XSTRDUP(MTYPE_DP_NETFILTER, ifname)); + } + } + return AOK; +} + +/** + * dplane_ctx_ipset_init() - Initialize a context block for a PBR ipset update. + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @new_rule: PBR ipset + * + * Return: Result status + */ +static int dplane_ctx_ipset_init(struct zebra_dplane_ctx *ctx, + enum dplane_op_e op, + struct zebra_pbr_ipset *ipset) +{ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("init dplane ctx %s: %s Unique %u Family %s Type %s", + dplane_op2str(op), ipset->ipset_name, ipset->unique, + family2str(ipset->family), + zebra_pbr_ipset_type2str(ipset->type)); + } + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + dplane_ctx_ns_init(ctx, zebra_ns_lookup(NS_DEFAULT), false); + + ctx->zd_vrf_id = ipset->vrf_id; + + memcpy(&ctx->u.ipset, ipset, sizeof(struct zebra_pbr_ipset)); + return AOK; +} + +/** + * dplane_ctx_ipset_entry_init() - Initialize a context block for a PBR ipset + * update. + * + * @ctx: Dataplane context to init + * @op: Operation being performed + * @new_rule: PBR ipset + * + * Return: Result status + */ +static int +dplane_ctx_ipset_entry_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct zebra_pbr_ipset_entry *ipset_entry) +{ + struct zebra_pbr_ipset *ipset; + + ipset = ipset_entry->backpointer; + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("init dplane ctx %s: %s Unique %u filter %u", + dplane_op2str(op), ipset->ipset_name, + ipset_entry->unique, ipset_entry->filter_bm); + } + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + dplane_ctx_ns_init(ctx, zebra_ns_lookup(NS_DEFAULT), false); + + ctx->zd_vrf_id = ipset->vrf_id; + + memcpy(&ctx->u.ipset_entry.entry, ipset_entry, + sizeof(struct zebra_pbr_ipset_entry)); + ctx->u.ipset_entry.entry.backpointer = NULL; + ctx->u.ipset_entry.info.type = ipset->type; + ctx->u.ipset_entry.info.family = ipset->family; + memcpy(&ctx->u.ipset_entry.info.ipset_name, &ipset->ipset_name, + ZEBRA_IPSET_NAME_SIZE); + + return AOK; +} + + +/* + * Enqueue a new update, + * and ensure an event is active for the dataplane pthread. + */ +static int dplane_update_enqueue(struct zebra_dplane_ctx *ctx) +{ + int ret = EINVAL; + uint32_t high, curr; + + /* Enqueue for processing by the dataplane pthread */ + DPLANE_LOCK(); + { + TAILQ_INSERT_TAIL(&zdplane_info.dg_update_ctx_q, ctx, + zd_q_entries); + } + DPLANE_UNLOCK(); + + curr = atomic_fetch_add_explicit( + &(zdplane_info.dg_routes_queued), + 1, memory_order_seq_cst); + + curr++; /* We got the pre-incremented value */ + + /* Maybe update high-water counter also */ + high = atomic_load_explicit(&zdplane_info.dg_routes_queued_max, + memory_order_seq_cst); + while (high < curr) { + if (atomic_compare_exchange_weak_explicit( + &zdplane_info.dg_routes_queued_max, + &high, curr, + memory_order_seq_cst, + memory_order_seq_cst)) + break; + } + + /* Ensure that an event for the dataplane thread is active */ + ret = dplane_provider_work_ready(); + + return ret; +} + +/* + * Utility that prepares a route update and enqueues it for processing + */ +static enum zebra_dplane_result +dplane_route_update_internal(struct route_node *rn, + struct route_entry *re, + struct route_entry *old_re, + enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + + /* Init context with info from zebra data structs */ + ret = dplane_ctx_route_init(ctx, op, rn, re); + if (ret == AOK) { + /* Capture some extra info for update case + * where there's a different 'old' route. + */ + if ((op == DPLANE_OP_ROUTE_UPDATE) && + old_re && (old_re != re)) { + + old_re->dplane_sequence = + zebra_router_get_next_sequence(); + ctx->zd_old_seq = old_re->dplane_sequence; + + ctx->u.rinfo.zd_old_tag = old_re->tag; + ctx->u.rinfo.zd_old_type = old_re->type; + ctx->u.rinfo.zd_old_instance = old_re->instance; + ctx->u.rinfo.zd_old_distance = old_re->distance; + ctx->u.rinfo.zd_old_metric = old_re->metric; + ctx->u.rinfo.nhe.old_id = old_re->nhe->id; + +#ifndef HAVE_NETLINK + /* For bsd, capture previous re's nexthops too, sigh. + * We'll need these to do per-nexthop deletes. + */ + copy_nexthops(&(ctx->u.rinfo.zd_old_ng.nexthop), + old_re->nhe->nhg.nexthop, NULL); + + if (zebra_nhg_get_backup_nhg(old_re->nhe) != NULL) { + struct nexthop_group *nhg; + struct nexthop **nh; + + nhg = zebra_nhg_get_backup_nhg(old_re->nhe); + nh = &(ctx->u.rinfo.old_backup_ng.nexthop); + + if (nhg->nexthop) + copy_nexthops(nh, nhg->nexthop, NULL); + } +#endif /* !HAVE_NETLINK */ + } + + /* + * If the old and new context type, and nexthop group id + * are the same there is no need to send down a route replace + * as that we know we have sent a nexthop group replace + * or an upper level protocol has sent us the exact + * same route again. + */ + if ((dplane_ctx_get_type(ctx) == dplane_ctx_get_old_type(ctx)) + && (dplane_ctx_get_nhe_id(ctx) + == dplane_ctx_get_old_nhe_id(ctx)) + && (dplane_ctx_get_nhe_id(ctx) >= ZEBRA_NHG_PROTO_LOWER)) { + struct nexthop *nexthop; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug( + "%s: Ignoring Route exactly the same", + __func__); + + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), + nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE)) + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB); + } + + if ((op == DPLANE_OP_ROUTE_UPDATE) && old_re && re && + (old_re != re) && + !CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) + SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + + dplane_ctx_free(&ctx); + return ZEBRA_DPLANE_REQUEST_SUCCESS; + } + + /* Enqueue context for processing */ + ret = dplane_update_enqueue(ctx); + } + + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_routes_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +static enum zebra_dplane_result dplane_tc_update_internal(enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + + if (!ctx) { + ret = ENOMEM; + goto done; + } + + /* Init context with info from zebra data structs */ + ret = dplane_ctx_tc_init(ctx, op); + + if (ret == AOK) + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_in, 1, + memory_order_relaxed); + if (ret == AOK) { + result = ZEBRA_DPLANE_REQUEST_QUEUED; + } else { + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result dplane_tc_update(void) +{ + return dplane_tc_update_internal(DPLANE_OP_TC_UPDATE); +} + +/** + * dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes + * + * @nhe: Nexthop group hash entry where the change occured + * @op: The operation to be enqued + * + * Return: Result of the change + */ +static enum zebra_dplane_result +dplane_nexthop_update_internal(struct nhg_hash_entry *nhe, enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + if (!ctx) { + ret = ENOMEM; + goto done; + } + + ret = dplane_ctx_nexthop_init(ctx, op, nhe); + if (ret == AOK) + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_nexthops_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_nexthop_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Enqueue a route 'add' for the dataplane. + */ +enum zebra_dplane_result dplane_route_add(struct route_node *rn, + struct route_entry *re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, NULL, + DPLANE_OP_ROUTE_INSTALL); + +done: + return ret; +} + +/* + * Enqueue a route update for the dataplane. + */ +enum zebra_dplane_result dplane_route_update(struct route_node *rn, + struct route_entry *re, + struct route_entry *old_re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, old_re, + DPLANE_OP_ROUTE_UPDATE); +done: + return ret; +} + +/* + * Enqueue a route removal for the dataplane. + */ +enum zebra_dplane_result dplane_route_delete(struct route_node *rn, + struct route_entry *re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, NULL, + DPLANE_OP_ROUTE_DELETE); + +done: + return ret; +} + +/* + * Notify the dplane when system/connected routes change. + */ +enum zebra_dplane_result dplane_sys_route_add(struct route_node *rn, + struct route_entry *re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + /* Ignore this event unless a provider plugin has requested it. */ + if (!zdplane_info.dg_sys_route_notifs) { + ret = ZEBRA_DPLANE_REQUEST_SUCCESS; + goto done; + } + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, NULL, + DPLANE_OP_SYS_ROUTE_ADD); + +done: + return ret; +} + +/* + * Notify the dplane when system/connected routes are deleted. + */ +enum zebra_dplane_result dplane_sys_route_del(struct route_node *rn, + struct route_entry *re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + /* Ignore this event unless a provider plugin has requested it. */ + if (!zdplane_info.dg_sys_route_notifs) { + ret = ZEBRA_DPLANE_REQUEST_SUCCESS; + goto done; + } + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, NULL, + DPLANE_OP_SYS_ROUTE_DELETE); + +done: + return ret; +} + +/* + * Update from an async notification, to bring other fibs up-to-date. + */ +enum zebra_dplane_result +dplane_route_notif_update(struct route_node *rn, + struct route_entry *re, + enum dplane_op_e op, + struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *new_ctx = NULL; + struct nexthop *nexthop; + struct nexthop_group *nhg; + + if (rn == NULL || re == NULL) + goto done; + + new_ctx = dplane_ctx_alloc(); + if (new_ctx == NULL) + goto done; + + /* Init context with info from zebra data structs */ + dplane_ctx_route_init(new_ctx, op, rn, re); + + /* For add/update, need to adjust the nexthops so that we match + * the notification state, which may not be the route-entry/RIB + * state. + */ + if (op == DPLANE_OP_ROUTE_UPDATE || + op == DPLANE_OP_ROUTE_INSTALL) { + + nexthops_free(new_ctx->u.rinfo.zd_ng.nexthop); + new_ctx->u.rinfo.zd_ng.nexthop = NULL; + + nhg = rib_get_fib_nhg(re); + if (nhg && nhg->nexthop) + copy_nexthops(&(new_ctx->u.rinfo.zd_ng.nexthop), + nhg->nexthop, NULL); + + /* Check for installed backup nexthops also */ + nhg = rib_get_fib_backup_nhg(re); + if (nhg && nhg->nexthop) { + copy_nexthops(&(new_ctx->u.rinfo.zd_ng.nexthop), + nhg->nexthop, NULL); + } + + for (ALL_NEXTHOPS(new_ctx->u.rinfo.zd_ng, nexthop)) + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + } + + /* Capture info about the source of the notification, in 'ctx' */ + dplane_ctx_set_notif_provider(new_ctx, + dplane_ctx_get_notif_provider(ctx)); + + ret = dplane_update_enqueue(new_ctx); + +done: + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else if (new_ctx) + dplane_ctx_free(&new_ctx); + + return result; +} + +/* + * Enqueue a nexthop add for the dataplane. + */ +enum zebra_dplane_result dplane_nexthop_add(struct nhg_hash_entry *nhe) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (nhe) + ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_INSTALL); + return ret; +} + +/* + * Enqueue a nexthop update for the dataplane. + * + * Might not need this func since zebra's nexthop objects should be immutable? + */ +enum zebra_dplane_result dplane_nexthop_update(struct nhg_hash_entry *nhe) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (nhe) + ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_UPDATE); + return ret; +} + +/* + * Enqueue a nexthop removal for the dataplane. + */ +enum zebra_dplane_result dplane_nexthop_delete(struct nhg_hash_entry *nhe) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (nhe) + ret = dplane_nexthop_update_internal(nhe, DPLANE_OP_NH_DELETE); + + return ret; +} + +/* + * Enqueue LSP add for the dataplane. + */ +enum zebra_dplane_result dplane_lsp_add(struct zebra_lsp *lsp) +{ + enum zebra_dplane_result ret = + lsp_update_internal(lsp, DPLANE_OP_LSP_INSTALL); + + return ret; +} + +/* + * Enqueue LSP update for the dataplane. + */ +enum zebra_dplane_result dplane_lsp_update(struct zebra_lsp *lsp) +{ + enum zebra_dplane_result ret = + lsp_update_internal(lsp, DPLANE_OP_LSP_UPDATE); + + return ret; +} + +/* + * Enqueue LSP delete for the dataplane. + */ +enum zebra_dplane_result dplane_lsp_delete(struct zebra_lsp *lsp) +{ + enum zebra_dplane_result ret = + lsp_update_internal(lsp, DPLANE_OP_LSP_DELETE); + + return ret; +} + +/* Update or un-install resulting from an async notification */ +enum zebra_dplane_result +dplane_lsp_notif_update(struct zebra_lsp *lsp, enum dplane_op_e op, + struct zebra_dplane_ctx *notif_ctx) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + struct nhlfe_list_head *head; + struct zebra_nhlfe *nhlfe, *new_nhlfe; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + if (ctx == NULL) { + ret = ENOMEM; + goto done; + } + + /* Copy info from zebra LSP */ + ret = dplane_ctx_lsp_init(ctx, op, lsp); + if (ret != AOK) + goto done; + + /* Add any installed backup nhlfes */ + head = &(ctx->u.lsp.backup_nhlfe_list); + frr_each(nhlfe_list, head, nhlfe) { + + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED) && + CHECK_FLAG(nhlfe->nexthop->flags, NEXTHOP_FLAG_FIB)) { + new_nhlfe = zebra_mpls_lsp_add_nh(&(ctx->u.lsp), + nhlfe->type, + nhlfe->nexthop); + + /* Need to copy flags too */ + new_nhlfe->flags = nhlfe->flags; + new_nhlfe->nexthop->flags = nhlfe->nexthop->flags; + } + } + + /* Capture info about the source of the notification */ + dplane_ctx_set_notif_provider( + ctx, + dplane_ctx_get_notif_provider(notif_ctx)); + + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_lsps_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_lsp_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + return result; +} + +/* + * Enqueue pseudowire install for the dataplane. + */ +enum zebra_dplane_result dplane_pw_install(struct zebra_pw *pw) +{ + return pw_update_internal(pw, DPLANE_OP_PW_INSTALL); +} + +/* + * Enqueue pseudowire un-install for the dataplane. + */ +enum zebra_dplane_result dplane_pw_uninstall(struct zebra_pw *pw) +{ + return pw_update_internal(pw, DPLANE_OP_PW_UNINSTALL); +} + +/* + * Common internal LSP update utility + */ +static enum zebra_dplane_result lsp_update_internal(struct zebra_lsp *lsp, + enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + + ret = dplane_ctx_lsp_init(ctx, op, lsp); + if (ret != AOK) + goto done; + + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_lsps_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_lsp_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Internal, common handler for pseudowire updates. + */ +static enum zebra_dplane_result pw_update_internal(struct zebra_pw *pw, + enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + + ctx = dplane_ctx_alloc(); + + ret = dplane_ctx_pw_init(ctx, op, pw); + if (ret != AOK) + goto done; + + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_pws_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_pw_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Enqueue access br_port update. + */ +enum zebra_dplane_result +dplane_br_port_update(const struct interface *ifp, bool non_df, + uint32_t sph_filter_cnt, + const struct in_addr *sph_filters, uint32_t backup_nhg_id) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + uint32_t flags = 0; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + struct zebra_ns *zns; + enum dplane_op_e op = DPLANE_OP_BR_PORT_UPDATE; + + if (non_df) + flags |= DPLANE_BR_PORT_NON_DF; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_EVPN_MH_ES) { + uint32_t i; + char vtep_str[ES_VTEP_LIST_STR_SZ]; + + vtep_str[0] = '\0'; + for (i = 0; i < sph_filter_cnt; ++i) { + snprintfrr(vtep_str + strlen(vtep_str), + sizeof(vtep_str) - strlen(vtep_str), "%pI4 ", + &sph_filters[i]); + } + zlog_debug( + "init br_port ctx %s: ifp %s, flags 0x%x backup_nhg 0x%x sph %s", + dplane_op2str(op), ifp->name, flags, backup_nhg_id, + vtep_str); + } + + ctx = dplane_ctx_alloc(); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + ctx->zd_ifindex = ifp->ifindex; + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + + /* Init the br-port-specific data area */ + memset(&ctx->u.br_port, 0, sizeof(ctx->u.br_port)); + + ctx->u.br_port.flags = flags; + ctx->u.br_port.backup_nhg_id = backup_nhg_id; + ctx->u.br_port.sph_filter_cnt = sph_filter_cnt; + memcpy(ctx->u.br_port.sph_filters, sph_filters, + sizeof(ctx->u.br_port.sph_filters[0]) * sph_filter_cnt); + + /* Enqueue for processing on the dplane pthread */ + ret = dplane_update_enqueue(ctx); + + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_br_port_in, 1, + memory_order_relaxed); + + if (ret == AOK) { + result = ZEBRA_DPLANE_REQUEST_QUEUED; + } else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_br_port_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result +dplane_intf_mpls_modify_state(const struct interface *ifp, bool set) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + struct zebra_dplane_ctx *ctx; + struct zebra_ns *zns; + int ret = EINVAL; + + ctx = dplane_ctx_alloc(); + ctx->zd_op = DPLANE_OP_INTF_NETCONFIG; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + ctx->zd_ifindex = ifp->ifindex; + if (set) + dplane_ctx_set_netconf_mpls(ctx, DPLANE_NETCONF_STATUS_ENABLED); + else + dplane_ctx_set_netconf_mpls(ctx, + DPLANE_NETCONF_STATUS_DISABLED); + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intf_changes, 1, + memory_order_relaxed); + + ret = dplane_update_enqueue(ctx); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intf_changes_errors, + 1, memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Enqueue interface address add for the dataplane. + */ +enum zebra_dplane_result dplane_intf_addr_set(const struct interface *ifp, + const struct connected *ifc) +{ +#if !defined(HAVE_NETLINK) && defined(HAVE_STRUCT_IFALIASREQ) + /* Extra checks for this OS path. */ + + /* Don't configure PtP addresses on broadcast ifs or reverse */ + if (!(ifp->flags & IFF_POINTOPOINT) != !CONNECTED_PEER(ifc)) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Failed to set intf addr: mismatch p2p and connected"); + + return ZEBRA_DPLANE_REQUEST_FAILURE; + } +#endif + + return intf_addr_update_internal(ifp, ifc, DPLANE_OP_ADDR_INSTALL); +} + +/* + * Enqueue interface address remove/uninstall for the dataplane. + */ +enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp, + const struct connected *ifc) +{ + return intf_addr_update_internal(ifp, ifc, DPLANE_OP_ADDR_UNINSTALL); +} + +static enum zebra_dplane_result intf_addr_update_internal( + const struct interface *ifp, const struct connected *ifc, + enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + struct zebra_ns *zns; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("init intf ctx %s: idx %d, addr %u:%pFX", + dplane_op2str(op), ifp->ifindex, ifp->vrf->vrf_id, + ifc->address); + + ctx = dplane_ctx_alloc(); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + /* Init the interface-addr-specific area */ + memset(&ctx->u.intf, 0, sizeof(ctx->u.intf)); + + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + ctx->zd_ifindex = ifp->ifindex; + ctx->u.intf.prefix = *(ifc->address); + + if (if_is_broadcast(ifp)) + ctx->u.intf.flags |= DPLANE_INTF_BROADCAST; + + if (CONNECTED_PEER(ifc)) { + ctx->u.intf.dest_prefix = *(ifc->destination); + ctx->u.intf.flags |= + (DPLANE_INTF_CONNECTED | DPLANE_INTF_HAS_DEST); + } + + if (CHECK_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY)) + ctx->u.intf.flags |= DPLANE_INTF_SECONDARY; + + if (ifc->label) { + size_t len; + + ctx->u.intf.flags |= DPLANE_INTF_HAS_LABEL; + + /* Use embedded buffer if it's adequate; else allocate. */ + len = strlen(ifc->label); + + if (len < sizeof(ctx->u.intf.label_buf)) { + strlcpy(ctx->u.intf.label_buf, ifc->label, + sizeof(ctx->u.intf.label_buf)); + ctx->u.intf.label = ctx->u.intf.label_buf; + } else { + ctx->u.intf.label = XSTRDUP(MTYPE_DP_CTX, ifc->label); + } + } + + ret = dplane_update_enqueue(ctx); + + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intf_addrs_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intf_addr_errors, + 1, memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/** + * dplane_intf_update_internal() - Helper for enqueuing interface changes + * + * @ifp: Interface where the change occured + * @op: The operation to be enqued + * + * Return: Result of the change + */ +static enum zebra_dplane_result +dplane_intf_update_internal(const struct interface *ifp, enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + if (!ctx) { + ret = ENOMEM; + goto done; + } + + ret = dplane_ctx_intf_init(ctx, op, ifp); + if (ret == AOK) + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intfs_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_intf_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Enqueue a interface add for the dataplane. + */ +enum zebra_dplane_result dplane_intf_add(const struct interface *ifp) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (ifp) + ret = dplane_intf_update_internal(ifp, DPLANE_OP_INTF_INSTALL); + return ret; +} + +/* + * Enqueue a interface update for the dataplane. + */ +enum zebra_dplane_result dplane_intf_update(const struct interface *ifp) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (ifp) + ret = dplane_intf_update_internal(ifp, DPLANE_OP_INTF_UPDATE); + return ret; +} + +/* + * Enqueue a interface delete for the dataplane. + */ +enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (ifp) + ret = dplane_intf_update_internal(ifp, DPLANE_OP_INTF_DELETE); + return ret; +} + +/* + * Enqueue vxlan/evpn mac add (or update). + */ +enum zebra_dplane_result dplane_rem_mac_add(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky, + uint32_t nhg_id, + bool was_static) +{ + enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_MAC_REMOTE; + if (was_static) + update_flags |= DPLANE_MAC_WAS_STATIC; + + /* Use common helper api */ + result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp, + vid, mac, vtep_ip, sticky, nhg_id, update_flags); + return result; +} + +/* + * Enqueue vxlan/evpn mac delete. + */ +enum zebra_dplane_result dplane_rem_mac_del(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip) +{ + enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_MAC_REMOTE; + + /* Use common helper api */ + result = mac_update_common(DPLANE_OP_MAC_DELETE, ifp, bridge_ifp, + vid, mac, vtep_ip, false, 0, update_flags); + return result; +} + +/* + * API to configure link local with either MAC address or IP information + */ +enum zebra_dplane_result dplane_neigh_ip_update(enum dplane_op_e op, + const struct interface *ifp, + struct ipaddr *link_ip, + struct ipaddr *ip, + uint32_t ndm_state, int protocol) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + uint16_t state = 0; + uint32_t update_flags; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: init link ctx %s: ifp %s, link_ip %pIA ip %pIA", + __func__, dplane_op2str(op), ifp->name, link_ip, ip); + + if (ndm_state == ZEBRA_NEIGH_STATE_REACHABLE) + state = DPLANE_NUD_REACHABLE; + else if (ndm_state == ZEBRA_NEIGH_STATE_FAILED) + state = DPLANE_NUD_FAILED; + + update_flags = DPLANE_NEIGH_NO_EXTENSION; + + result = neigh_update_internal(op, ifp, (const void *)link_ip, + ipaddr_family(link_ip), ip, 0, state, + update_flags, protocol); + + return result; +} + +/* + * Enqueue local mac add (or update). + */ +enum zebra_dplane_result dplane_local_mac_add(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + bool sticky, + uint32_t set_static, + uint32_t set_inactive) +{ + enum zebra_dplane_result result; + uint32_t update_flags = 0; + struct in_addr vtep_ip; + + if (set_static) + update_flags |= DPLANE_MAC_SET_STATIC; + + if (set_inactive) + update_flags |= DPLANE_MAC_SET_INACTIVE; + + vtep_ip.s_addr = 0; + + /* Use common helper api */ + result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp, + vid, mac, vtep_ip, sticky, 0, + update_flags); + return result; +} + +/* + * Enqueue local mac del + */ +enum zebra_dplane_result +dplane_local_mac_del(const struct interface *ifp, + const struct interface *bridge_ifp, vlanid_t vid, + const struct ethaddr *mac) +{ + enum zebra_dplane_result result; + struct in_addr vtep_ip; + + vtep_ip.s_addr = 0; + + /* Use common helper api */ + result = mac_update_common(DPLANE_OP_MAC_DELETE, ifp, bridge_ifp, vid, + mac, vtep_ip, false, 0, 0); + return result; +} +/* + * Public api to init an empty context - either newly-allocated or + * reset/cleared - for a MAC update. + */ +void dplane_mac_init(struct zebra_dplane_ctx *ctx, + const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky, + uint32_t nhg_id, + uint32_t update_flags) +{ + struct zebra_ns *zns; + + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + ctx->zd_ifindex = ifp->ifindex; + + /* Init the mac-specific data area */ + memset(&ctx->u.macinfo, 0, sizeof(ctx->u.macinfo)); + + ctx->u.macinfo.br_ifindex = br_ifp->ifindex; + ctx->u.macinfo.vtep_ip = vtep_ip; + ctx->u.macinfo.mac = *mac; + ctx->u.macinfo.vid = vid; + ctx->u.macinfo.is_sticky = sticky; + ctx->u.macinfo.nhg_id = nhg_id; + ctx->u.macinfo.update_flags = update_flags; +} + +/* + * Common helper api for MAC address/vxlan updates + */ +static enum zebra_dplane_result +mac_update_common(enum dplane_op_e op, + const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky, + uint32_t nhg_id, + uint32_t update_flags) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("init mac ctx %s: mac %pEA, ifp %s, vtep %pI4", + dplane_op2str(op), mac, ifp->name, &vtep_ip); + + ctx = dplane_ctx_alloc(); + ctx->zd_op = op; + + /* Common init for the ctx */ + dplane_mac_init(ctx, ifp, br_ifp, vid, mac, vtep_ip, sticky, + nhg_id, update_flags); + + /* Enqueue for processing on the dplane pthread */ + ret = dplane_update_enqueue(ctx); + + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_macs_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_mac_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Enqueue evpn neighbor add for the dataplane. + */ +enum zebra_dplane_result dplane_rem_neigh_add(const struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *mac, + uint32_t flags, bool was_static) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + uint32_t update_flags = 0; + + update_flags |= DPLANE_NEIGH_REMOTE; + + if (was_static) + update_flags |= DPLANE_NEIGH_WAS_STATIC; + + result = neigh_update_internal( + DPLANE_OP_NEIGH_INSTALL, ifp, (const void *)mac, AF_ETHERNET, + ip, flags, DPLANE_NUD_NOARP, update_flags, 0); + + return result; +} + +/* + * Enqueue local neighbor add for the dataplane. + */ +enum zebra_dplane_result dplane_local_neigh_add(const struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *mac, + bool set_router, bool set_static, + bool set_inactive) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + uint32_t update_flags = 0; + uint32_t ntf = 0; + uint16_t state; + + if (set_static) + update_flags |= DPLANE_NEIGH_SET_STATIC; + + if (set_inactive) { + update_flags |= DPLANE_NEIGH_SET_INACTIVE; + state = DPLANE_NUD_STALE; + } else { + state = DPLANE_NUD_REACHABLE; + } + + if (set_router) + ntf |= DPLANE_NTF_ROUTER; + + result = neigh_update_internal(DPLANE_OP_NEIGH_INSTALL, ifp, + (const void *)mac, AF_ETHERNET, ip, ntf, + state, update_flags, 0); + + return result; +} + +/* + * Enqueue evpn neighbor delete for the dataplane. + */ +enum zebra_dplane_result dplane_rem_neigh_delete(const struct interface *ifp, + const struct ipaddr *ip) +{ + enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_NEIGH_REMOTE; + + result = neigh_update_internal(DPLANE_OP_NEIGH_DELETE, ifp, NULL, + AF_ETHERNET, ip, 0, 0, update_flags, 0); + + return result; +} + +/* + * Enqueue evpn VTEP add for the dataplane. + */ +enum zebra_dplane_result dplane_vtep_add(const struct interface *ifp, + const struct in_addr *ip, + vni_t vni) +{ + enum zebra_dplane_result result; + struct ethaddr mac = { {0, 0, 0, 0, 0, 0} }; + struct ipaddr addr; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Install %pI4 into flood list for VNI %u intf %s(%u)", + ip, vni, ifp->name, ifp->ifindex); + + SET_IPADDR_V4(&addr); + addr.ipaddr_v4 = *ip; + + result = neigh_update_internal(DPLANE_OP_VTEP_ADD, ifp, &mac, + AF_ETHERNET, &addr, 0, 0, 0, 0); + + return result; +} + +/* + * Enqueue evpn VTEP add for the dataplane. + */ +enum zebra_dplane_result dplane_vtep_delete(const struct interface *ifp, + const struct in_addr *ip, + vni_t vni) +{ + enum zebra_dplane_result result; + struct ethaddr mac = { {0, 0, 0, 0, 0, 0} }; + struct ipaddr addr; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Uninstall %pI4 from flood list for VNI %u intf %s(%u)", + ip, vni, ifp->name, ifp->ifindex); + + SET_IPADDR_V4(&addr); + addr.ipaddr_v4 = *ip; + + result = neigh_update_internal(DPLANE_OP_VTEP_DELETE, ifp, + (const void *)&mac, AF_ETHERNET, &addr, + 0, 0, 0, 0); + + return result; +} + +enum zebra_dplane_result dplane_neigh_discover(const struct interface *ifp, + const struct ipaddr *ip) +{ + enum zebra_dplane_result result; + + result = neigh_update_internal(DPLANE_OP_NEIGH_DISCOVER, ifp, NULL, + AF_ETHERNET, ip, DPLANE_NTF_USE, + DPLANE_NUD_INCOMPLETE, 0, 0); + + return result; +} + +enum zebra_dplane_result dplane_neigh_table_update(const struct interface *ifp, + const uint8_t family, + const uint32_t app_probes, + const uint32_t ucast_probes, + const uint32_t mcast_probes) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + struct zebra_ns *zns; + enum dplane_op_e op = DPLANE_OP_NEIGH_TABLE_UPDATE; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("set neigh ctx %s: ifp %s, family %s", + dplane_op2str(op), ifp->name, family2str(family)); + } + + ctx = dplane_ctx_alloc(); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + ctx->zd_ifindex = ifp->ifindex; + + /* Init the neighbor-specific data area */ + memset(&ctx->u.neightable, 0, sizeof(ctx->u.neightable)); + + ctx->u.neightable.family = family; + ctx->u.neightable.app_probes = app_probes; + ctx->u.neightable.ucast_probes = ucast_probes; + ctx->u.neightable.mcast_probes = mcast_probes; + + /* Enqueue for processing on the dplane pthread */ + ret = dplane_update_enqueue(ctx); + + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_neightable_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_neightable_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Common helper api for neighbor updates + */ +static enum zebra_dplane_result +neigh_update_internal(enum dplane_op_e op, const struct interface *ifp, + const void *link, const int link_family, + const struct ipaddr *ip, uint32_t flags, uint16_t state, + uint32_t update_flags, int protocol) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + struct zebra_ns *zns; + const struct ethaddr *mac = NULL; + const struct ipaddr *link_ip = NULL; + + if (link_family == AF_ETHERNET) + mac = (const struct ethaddr *)link; + else + link_ip = (const struct ipaddr *)link; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + char buf1[PREFIX_STRLEN]; + + buf1[0] = '\0'; + if (link_family == AF_ETHERNET) + prefix_mac2str(mac, buf1, sizeof(buf1)); + else + ipaddr2str(link_ip, buf1, sizeof(buf1)); + zlog_debug("init neigh ctx %s: ifp %s, %s %s, ip %pIA", + dplane_op2str(op), ifp->name, + link_family == AF_ETHERNET ? "mac " : "link ", + buf1, ip); + } + + ctx = dplane_ctx_alloc(); + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf->vrf_id; + dplane_ctx_set_type(ctx, protocol); + + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + strlcpy(ctx->zd_ifname, ifp->name, sizeof(ctx->zd_ifname)); + ctx->zd_ifindex = ifp->ifindex; + + /* Init the neighbor-specific data area */ + memset(&ctx->u.neigh, 0, sizeof(ctx->u.neigh)); + + ctx->u.neigh.ip_addr = *ip; + if (mac) + ctx->u.neigh.link.mac = *mac; + else if (link_ip) + ctx->u.neigh.link.ip_addr = *link_ip; + + ctx->u.neigh.flags = flags; + ctx->u.neigh.state = state; + ctx->u.neigh.update_flags = update_flags; + + /* Enqueue for processing on the dplane pthread */ + ret = dplane_update_enqueue(ctx); + + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_neighs_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_neigh_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Common helper api for PBR rule updates + */ +static enum zebra_dplane_result +rule_update_internal(enum dplane_op_e op, struct zebra_pbr_rule *new_rule, + struct zebra_pbr_rule *old_rule) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + struct zebra_dplane_ctx *ctx; + int ret; + + ctx = dplane_ctx_alloc(); + + ret = dplane_ctx_rule_init(ctx, op, new_rule, old_rule); + if (ret != AOK) + goto done; + + ret = dplane_update_enqueue(ctx); + +done: + atomic_fetch_add_explicit(&zdplane_info.dg_rules_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_rule_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result dplane_pbr_rule_add(struct zebra_pbr_rule *rule) +{ + return rule_update_internal(DPLANE_OP_RULE_ADD, rule, NULL); +} + +enum zebra_dplane_result dplane_pbr_rule_delete(struct zebra_pbr_rule *rule) +{ + return rule_update_internal(DPLANE_OP_RULE_DELETE, rule, NULL); +} + +enum zebra_dplane_result dplane_pbr_rule_update(struct zebra_pbr_rule *old_rule, + struct zebra_pbr_rule *new_rule) +{ + return rule_update_internal(DPLANE_OP_RULE_UPDATE, new_rule, old_rule); +} +/* + * Common helper api for iptable updates + */ +static enum zebra_dplane_result +iptable_update_internal(enum dplane_op_e op, struct zebra_pbr_iptable *iptable) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + struct zebra_dplane_ctx *ctx; + int ret; + + if ((op == DPLANE_OP_IPTABLE_ADD && + CHECK_FLAG(iptable->internal_flags, IPTABLE_INSTALL_QUEUED)) || + (op == DPLANE_OP_IPTABLE_DELETE && + CHECK_FLAG(iptable->internal_flags, IPTABLE_UNINSTALL_QUEUED))) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "update dplane ctx %s: iptable %s already in progress", + dplane_op2str(op), iptable->ipset_name); + return result; + } + + ctx = dplane_ctx_alloc(); + + ret = dplane_ctx_iptable_init(ctx, op, iptable); + if (ret != AOK) + goto done; + + ret = dplane_update_enqueue(ctx); + +done: + atomic_fetch_add_explicit(&zdplane_info.dg_iptable_in, 1, + memory_order_relaxed); + + if (ret == AOK) { + result = ZEBRA_DPLANE_REQUEST_QUEUED; + if (op == DPLANE_OP_IPTABLE_ADD) + SET_FLAG(iptable->internal_flags, + IPTABLE_INSTALL_QUEUED); + else + SET_FLAG(iptable->internal_flags, + IPTABLE_UNINSTALL_QUEUED); + } else { + atomic_fetch_add_explicit(&zdplane_info.dg_iptable_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + return result; +} + +enum zebra_dplane_result +dplane_pbr_iptable_add(struct zebra_pbr_iptable *iptable) +{ + return iptable_update_internal(DPLANE_OP_IPTABLE_ADD, iptable); +} + +enum zebra_dplane_result +dplane_pbr_iptable_delete(struct zebra_pbr_iptable *iptable) +{ + return iptable_update_internal(DPLANE_OP_IPTABLE_DELETE, iptable); +} + +/* + * Common helper api for ipset updates + */ +static enum zebra_dplane_result +ipset_update_internal(enum dplane_op_e op, struct zebra_pbr_ipset *ipset) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + struct zebra_dplane_ctx *ctx; + int ret; + + ctx = dplane_ctx_alloc(); + + ret = dplane_ctx_ipset_init(ctx, op, ipset); + if (ret != AOK) + goto done; + + ret = dplane_update_enqueue(ctx); + +done: + atomic_fetch_add_explicit(&zdplane_info.dg_ipset_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_ipset_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result dplane_pbr_ipset_add(struct zebra_pbr_ipset *ipset) +{ + return ipset_update_internal(DPLANE_OP_IPSET_ADD, ipset); +} + +enum zebra_dplane_result dplane_pbr_ipset_delete(struct zebra_pbr_ipset *ipset) +{ + return ipset_update_internal(DPLANE_OP_IPSET_DELETE, ipset); +} + +/* + * Common helper api for ipset updates + */ +static enum zebra_dplane_result +ipset_entry_update_internal(enum dplane_op_e op, + struct zebra_pbr_ipset_entry *ipset_entry) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + struct zebra_dplane_ctx *ctx; + int ret; + + ctx = dplane_ctx_alloc(); + + ret = dplane_ctx_ipset_entry_init(ctx, op, ipset_entry); + if (ret != AOK) + goto done; + + ret = dplane_update_enqueue(ctx); + +done: + atomic_fetch_add_explicit(&zdplane_info.dg_ipset_entry_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit(&zdplane_info.dg_ipset_entry_errors, + 1, memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result +dplane_pbr_ipset_entry_add(struct zebra_pbr_ipset_entry *ipset) +{ + return ipset_entry_update_internal(DPLANE_OP_IPSET_ENTRY_ADD, ipset); +} + +enum zebra_dplane_result +dplane_pbr_ipset_entry_delete(struct zebra_pbr_ipset_entry *ipset) +{ + return ipset_entry_update_internal(DPLANE_OP_IPSET_ENTRY_DELETE, ipset); +} + +/* + * Common helper api for GRE set + */ +enum zebra_dplane_result +dplane_gre_set(struct interface *ifp, struct interface *ifp_link, + unsigned int mtu, const struct zebra_l2info_gre *gre_info) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + struct zebra_dplane_ctx *ctx; + enum dplane_op_e op = DPLANE_OP_GRE_SET; + int ret; + struct zebra_ns *zns; + + ctx = dplane_ctx_alloc(); + + if (!ifp) + return result; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("init dplane ctx %s: if %s link %s%s", + dplane_op2str(op), ifp->name, + ifp_link ? "set" : "unset", ifp_link ? + ifp_link->name : ""); + } + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + zns = zebra_ns_lookup(ifp->vrf->vrf_id); + if (!zns) + return result; + dplane_ctx_ns_init(ctx, zns, false); + + dplane_ctx_set_ifname(ctx, ifp->name); + ctx->zd_vrf_id = ifp->vrf->vrf_id; + ctx->zd_ifindex = ifp->ifindex; + if (ifp_link) + ctx->u.gre.link_ifindex = ifp_link->ifindex; + else + ctx->u.gre.link_ifindex = 0; + if (gre_info) + memcpy(&ctx->u.gre.info, gre_info, sizeof(ctx->u.gre.info)); + ctx->u.gre.mtu = mtu; + + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* Enqueue context for processing */ + ret = dplane_update_enqueue(ctx); + + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_gre_set_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + atomic_fetch_add_explicit( + &zdplane_info.dg_gre_set_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + result = ZEBRA_DPLANE_REQUEST_FAILURE; + } + return result; +} + +/* + * Handler for 'show dplane' + */ +int dplane_show_helper(struct vty *vty, bool detailed) +{ + uint64_t queued, queue_max, limit, errs, incoming, yields, + other_errs; + + /* Using atomics because counters are being changed in different + * pthread contexts. + */ + incoming = atomic_load_explicit(&zdplane_info.dg_routes_in, + memory_order_relaxed); + limit = atomic_load_explicit(&zdplane_info.dg_max_queued_updates, + memory_order_relaxed); + queued = atomic_load_explicit(&zdplane_info.dg_routes_queued, + memory_order_relaxed); + queue_max = atomic_load_explicit(&zdplane_info.dg_routes_queued_max, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_route_errors, + memory_order_relaxed); + yields = atomic_load_explicit(&zdplane_info.dg_update_yields, + memory_order_relaxed); + other_errs = atomic_load_explicit(&zdplane_info.dg_other_errors, + memory_order_relaxed); + + vty_out(vty, "Zebra dataplane:\nRoute updates: %"PRIu64"\n", + incoming); + vty_out(vty, "Route update errors: %"PRIu64"\n", errs); + vty_out(vty, "Other errors : %"PRIu64"\n", other_errs); + vty_out(vty, "Route update queue limit: %"PRIu64"\n", limit); + vty_out(vty, "Route update queue depth: %"PRIu64"\n", queued); + vty_out(vty, "Route update queue max: %"PRIu64"\n", queue_max); + vty_out(vty, "Dplane update yields: %"PRIu64"\n", yields); + + incoming = atomic_load_explicit(&zdplane_info.dg_lsps_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_lsp_errors, + memory_order_relaxed); + vty_out(vty, "LSP updates: %"PRIu64"\n", incoming); + vty_out(vty, "LSP update errors: %"PRIu64"\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_pws_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_pw_errors, + memory_order_relaxed); + vty_out(vty, "PW updates: %"PRIu64"\n", incoming); + vty_out(vty, "PW update errors: %"PRIu64"\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_intf_addrs_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_intf_addr_errors, + memory_order_relaxed); + vty_out(vty, "Intf addr updates: %"PRIu64"\n", incoming); + vty_out(vty, "Intf addr errors: %"PRIu64"\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_intf_changes, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_intf_changes_errors, + memory_order_relaxed); + vty_out(vty, "Intf change updates: %" PRIu64 "\n", incoming); + vty_out(vty, "Intf change errors: %" PRIu64 "\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_macs_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_mac_errors, + memory_order_relaxed); + vty_out(vty, "EVPN MAC updates: %"PRIu64"\n", incoming); + vty_out(vty, "EVPN MAC errors: %"PRIu64"\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_neighs_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_neigh_errors, + memory_order_relaxed); + vty_out(vty, "EVPN neigh updates: %"PRIu64"\n", incoming); + vty_out(vty, "EVPN neigh errors: %"PRIu64"\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_rules_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_rule_errors, + memory_order_relaxed); + vty_out(vty, "Rule updates: %" PRIu64 "\n", incoming); + vty_out(vty, "Rule errors: %" PRIu64 "\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_br_port_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_br_port_errors, + memory_order_relaxed); + vty_out(vty, "Bridge port updates: %" PRIu64 "\n", incoming); + vty_out(vty, "Bridge port errors: %" PRIu64 "\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_iptable_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_iptable_errors, + memory_order_relaxed); + vty_out(vty, "IPtable updates: %" PRIu64 "\n", incoming); + vty_out(vty, "IPtable errors: %" PRIu64 "\n", errs); + incoming = atomic_load_explicit(&zdplane_info.dg_ipset_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_ipset_errors, + memory_order_relaxed); + vty_out(vty, "IPset updates: %" PRIu64 "\n", incoming); + vty_out(vty, "IPset errors: %" PRIu64 "\n", errs); + incoming = atomic_load_explicit(&zdplane_info.dg_ipset_entry_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_ipset_entry_errors, + memory_order_relaxed); + vty_out(vty, "IPset entry updates: %" PRIu64 "\n", incoming); + vty_out(vty, "IPset entry errors: %" PRIu64 "\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_neightable_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_neightable_errors, + memory_order_relaxed); + vty_out(vty, "Neighbor Table updates: %"PRIu64"\n", incoming); + vty_out(vty, "Neighbor Table errors: %"PRIu64"\n", errs); + + incoming = atomic_load_explicit(&zdplane_info.dg_gre_set_in, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_gre_set_errors, + memory_order_relaxed); + vty_out(vty, "GRE set updates: %"PRIu64"\n", incoming); + vty_out(vty, "GRE set errors: %"PRIu64"\n", errs); + return CMD_SUCCESS; +} + +/* + * Handler for 'show dplane providers' + */ +int dplane_show_provs_helper(struct vty *vty, bool detailed) +{ + struct zebra_dplane_provider *prov; + uint64_t in, in_q, in_max, out, out_q, out_max; + + vty_out(vty, "Zebra dataplane providers:\n"); + + DPLANE_LOCK(); + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + DPLANE_UNLOCK(); + + /* Show counters, useful info from each registered provider */ + while (prov) { + + in = atomic_load_explicit(&prov->dp_in_counter, + memory_order_relaxed); + in_q = atomic_load_explicit(&prov->dp_in_queued, + memory_order_relaxed); + in_max = atomic_load_explicit(&prov->dp_in_max, + memory_order_relaxed); + out = atomic_load_explicit(&prov->dp_out_counter, + memory_order_relaxed); + out_q = atomic_load_explicit(&prov->dp_out_queued, + memory_order_relaxed); + out_max = atomic_load_explicit(&prov->dp_out_max, + memory_order_relaxed); + + vty_out(vty, "%s (%u): in: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64", out: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64"\n", + prov->dp_name, prov->dp_id, in, in_q, in_max, + out, out_q, out_max); + + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + } + + return CMD_SUCCESS; +} + +/* + * Helper for 'show run' etc. + */ +int dplane_config_write_helper(struct vty *vty) +{ + if (zdplane_info.dg_max_queued_updates != DPLANE_DEFAULT_MAX_QUEUED) + vty_out(vty, "zebra dplane limit %u\n", + zdplane_info.dg_max_queued_updates); + + return 0; +} + +/* + * Provider registration + */ +int dplane_provider_register(const char *name, + enum dplane_provider_prio prio, + int flags, + int (*start_fp)(struct zebra_dplane_provider *), + int (*fp)(struct zebra_dplane_provider *), + int (*fini_fp)(struct zebra_dplane_provider *, + bool early), + void *data, + struct zebra_dplane_provider **prov_p) +{ + int ret = 0; + struct zebra_dplane_provider *p = NULL, *last; + + /* Validate */ + if (fp == NULL) { + ret = EINVAL; + goto done; + } + + if (prio <= DPLANE_PRIO_NONE || + prio > DPLANE_PRIO_LAST) { + ret = EINVAL; + goto done; + } + + /* Allocate and init new provider struct */ + p = XCALLOC(MTYPE_DP_PROV, sizeof(struct zebra_dplane_provider)); + + pthread_mutex_init(&(p->dp_mutex), NULL); + TAILQ_INIT(&(p->dp_ctx_in_q)); + TAILQ_INIT(&(p->dp_ctx_out_q)); + + p->dp_flags = flags; + p->dp_priority = prio; + p->dp_fp = fp; + p->dp_start = start_fp; + p->dp_fini = fini_fp; + p->dp_data = data; + + /* Lock - the dplane pthread may be running */ + DPLANE_LOCK(); + + p->dp_id = ++zdplane_info.dg_provider_id; + + if (name) + strlcpy(p->dp_name, name, DPLANE_PROVIDER_NAMELEN); + else + snprintf(p->dp_name, DPLANE_PROVIDER_NAMELEN, + "provider-%u", p->dp_id); + + /* Insert into list ordered by priority */ + TAILQ_FOREACH(last, &zdplane_info.dg_providers_q, dp_prov_link) { + if (last->dp_priority > p->dp_priority) + break; + } + + if (last) + TAILQ_INSERT_BEFORE(last, p, dp_prov_link); + else + TAILQ_INSERT_TAIL(&zdplane_info.dg_providers_q, p, + dp_prov_link); + + /* And unlock */ + DPLANE_UNLOCK(); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("dplane: registered new provider '%s' (%u), prio %d", + p->dp_name, p->dp_id, p->dp_priority); + +done: + if (prov_p) + *prov_p = p; + + return ret; +} + +/* Accessors for provider attributes */ +const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov) +{ + return prov->dp_name; +} + +uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov) +{ + return prov->dp_id; +} + +void *dplane_provider_get_data(const struct zebra_dplane_provider *prov) +{ + return prov->dp_data; +} + +int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov) +{ + return zdplane_info.dg_updates_per_cycle; +} + +/* Lock/unlock a provider's mutex - iff the provider was registered with + * the THREADED flag. + */ +void dplane_provider_lock(struct zebra_dplane_provider *prov) +{ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); +} + +void dplane_provider_unlock(struct zebra_dplane_provider *prov) +{ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); +} + +/* + * Dequeue and maintain associated counter + */ +struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( + struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx = NULL; + + dplane_provider_lock(prov); + + ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); + if (ctx) { + TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries); + + atomic_fetch_sub_explicit(&prov->dp_in_queued, 1, + memory_order_relaxed); + } + + dplane_provider_unlock(prov); + + return ctx; +} + +/* + * Dequeue work to a list, return count + */ +int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, + struct dplane_ctx_q *listp) +{ + int limit, ret; + struct zebra_dplane_ctx *ctx; + + limit = zdplane_info.dg_updates_per_cycle; + + dplane_provider_lock(prov); + + for (ret = 0; ret < limit; ret++) { + ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); + if (ctx) { + TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries); + + TAILQ_INSERT_TAIL(listp, ctx, zd_q_entries); + } else { + break; + } + } + + if (ret > 0) + atomic_fetch_sub_explicit(&prov->dp_in_queued, ret, + memory_order_relaxed); + + dplane_provider_unlock(prov); + + return ret; +} + +uint32_t dplane_provider_out_ctx_queue_len(struct zebra_dplane_provider *prov) +{ + return atomic_load_explicit(&(prov->dp_out_counter), + memory_order_relaxed); +} + +/* + * Enqueue and maintain associated counter + */ +void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx) +{ + uint64_t curr, high; + + dplane_provider_lock(prov); + + TAILQ_INSERT_TAIL(&(prov->dp_ctx_out_q), ctx, + zd_q_entries); + + /* Maintain out-queue counters */ + atomic_fetch_add_explicit(&(prov->dp_out_queued), 1, + memory_order_relaxed); + curr = atomic_load_explicit(&prov->dp_out_queued, + memory_order_relaxed); + high = atomic_load_explicit(&prov->dp_out_max, + memory_order_relaxed); + if (curr > high) + atomic_store_explicit(&prov->dp_out_max, curr, + memory_order_relaxed); + + dplane_provider_unlock(prov); + + atomic_fetch_add_explicit(&(prov->dp_out_counter), 1, + memory_order_relaxed); +} + +/* + * Accessor for provider object + */ +bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov) +{ + return (prov->dp_flags & DPLANE_PROV_FLAG_THREADED); +} + +#ifdef HAVE_NETLINK +/* + * Callback when an OS (netlink) incoming event read is ready. This runs + * in the dplane pthread. + */ +static void dplane_incoming_read(struct thread *event) +{ + struct dplane_zns_info *zi = THREAD_ARG(event); + + kernel_dplane_read(&zi->info); + + /* Re-start read task */ + thread_add_read(zdplane_info.dg_master, dplane_incoming_read, zi, + zi->info.sock, &zi->t_read); +} + +/* + * Callback in the dataplane pthread that requests info from the OS and + * initiates netlink reads. + */ +static void dplane_incoming_request(struct thread *event) +{ + struct dplane_zns_info *zi = THREAD_ARG(event); + + /* Start read task */ + thread_add_read(zdplane_info.dg_master, dplane_incoming_read, zi, + zi->info.sock, &zi->t_read); + + /* Send requests */ + netlink_request_netconf(zi->info.sock); +} + +/* + * Initiate requests for existing info from the OS. This is called by the + * main pthread, but we want all activity on the dplane netlink socket to + * take place on the dplane pthread, so we schedule an event to accomplish + * that. + */ +static void dplane_kernel_info_request(struct dplane_zns_info *zi) +{ + /* If we happen to encounter an enabled zns before the dplane + * pthread is running, we'll initiate this later on. + */ + if (zdplane_info.dg_master) + thread_add_event(zdplane_info.dg_master, + dplane_incoming_request, zi, 0, + &zi->t_request); +} + +#endif /* HAVE_NETLINK */ + +/* + * Notify dplane when namespaces are enabled and disabled. The dplane + * needs to start and stop reading incoming events from the zns. In the + * common case where vrfs are _not_ namespaces, there will only be one + * of these. + * + * This is called in the main pthread. + */ +void zebra_dplane_ns_enable(struct zebra_ns *zns, bool enabled) +{ + struct dplane_zns_info *zi; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s: %s for nsid %u", __func__, + (enabled ? "ENABLED" : "DISABLED"), zns->ns_id); + + /* Search for an existing zns info entry */ + frr_each (zns_info_list, &zdplane_info.dg_zns_list, zi) { + if (zi->info.ns_id == zns->ns_id) + break; + } + + if (enabled) { + /* Create a new entry if necessary; start reading. */ + if (zi == NULL) { + zi = XCALLOC(MTYPE_DP_NS, sizeof(*zi)); + + zi->info.ns_id = zns->ns_id; + + zns_info_list_add_tail(&zdplane_info.dg_zns_list, zi); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s: nsid %u, new zi %p", __func__, + zns->ns_id, zi); + } + + /* Make sure we're up-to-date with the zns object */ +#if defined(HAVE_NETLINK) + zi->info.is_cmd = false; + zi->info.sock = zns->netlink_dplane_in.sock; + + /* Initiate requests for existing info from the OS, and + * begin reading from the netlink socket. + */ + dplane_kernel_info_request(zi); +#endif + } else if (zi) { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s: nsid %u, deleting zi %p", __func__, + zns->ns_id, zi); + + /* Stop reading, free memory */ + zns_info_list_del(&zdplane_info.dg_zns_list, zi); + + /* Stop any outstanding tasks */ + if (zdplane_info.dg_master) { + thread_cancel_async(zdplane_info.dg_master, + &zi->t_request, NULL); + + thread_cancel_async(zdplane_info.dg_master, &zi->t_read, + NULL); + } + + XFREE(MTYPE_DP_NS, zi); + } +} + +/* + * Provider api to signal that work/events are available + * for the dataplane pthread. + */ +int dplane_provider_work_ready(void) +{ + /* Note that during zebra startup, we may be offered work before + * the dataplane pthread (and thread-master) are ready. We want to + * enqueue the work, but the event-scheduling machinery may not be + * available. + */ + if (zdplane_info.dg_run) { + thread_add_event(zdplane_info.dg_master, + dplane_thread_loop, NULL, 0, + &zdplane_info.dg_t_update); + } + + return AOK; +} + +/* + * Enqueue a context directly to zebra main. + */ +void dplane_provider_enqueue_to_zebra(struct zebra_dplane_ctx *ctx) +{ + struct dplane_ctx_q temp_list; + + /* Zebra's api takes a list, so we need to use a temporary list */ + TAILQ_INIT(&temp_list); + + TAILQ_INSERT_TAIL(&temp_list, ctx, zd_q_entries); + (zdplane_info.dg_results_cb)(&temp_list); +} + +/* + * Kernel dataplane provider + */ + +static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx) +{ + char buf[PREFIX_STRLEN]; + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + zlog_debug("%u:%pFX Dplane route update ctx %p op %s", + dplane_ctx_get_vrf(ctx), dplane_ctx_get_dest(ctx), + ctx, dplane_op2str(dplane_ctx_get_op(ctx))); + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + zlog_debug("ID (%u) Dplane nexthop update ctx %p op %s", + dplane_ctx_get_nhe_id(ctx), ctx, + dplane_op2str(dplane_ctx_get_op(ctx))); + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + break; + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + zlog_debug("Dplane pw %s: op %s af %d loc: %u rem: %u", + dplane_ctx_get_ifname(ctx), + dplane_op2str(ctx->zd_op), dplane_ctx_get_pw_af(ctx), + dplane_ctx_get_pw_local_label(ctx), + dplane_ctx_get_pw_remote_label(ctx)); + break; + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + zlog_debug("Dplane intf %s, idx %u, addr %pFX", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifindex(ctx), + dplane_ctx_get_intf_addr(ctx)); + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + prefix_mac2str(dplane_ctx_mac_get_addr(ctx), buf, + sizeof(buf)); + + zlog_debug("Dplane %s, mac %s, ifindex %u", + dplane_op2str(dplane_ctx_get_op(ctx)), + buf, dplane_ctx_get_ifindex(ctx)); + break; + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + ipaddr2str(dplane_ctx_neigh_get_ipaddr(ctx), buf, + sizeof(buf)); + + zlog_debug("Dplane %s, ip %s, ifindex %u", + dplane_op2str(dplane_ctx_get_op(ctx)), + buf, dplane_ctx_get_ifindex(ctx)); + break; + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + zlog_debug("Dplane rule update op %s, if %s(%u), ctx %p", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifname(ctx), + dplane_ctx_get_ifindex(ctx), ctx); + break; + + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_BR_PORT_UPDATE: + + case DPLANE_OP_NONE: + break; + + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: { + struct zebra_pbr_iptable ipt; + + dplane_ctx_get_pbr_iptable(ctx, &ipt); + zlog_debug("Dplane iptable update op %s, unique(%u), ctx %p", + dplane_op2str(dplane_ctx_get_op(ctx)), ipt.unique, + ctx); + } break; + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: { + struct zebra_pbr_ipset ipset; + + dplane_ctx_get_pbr_ipset(ctx, &ipset); + zlog_debug("Dplane ipset update op %s, unique(%u), ctx %p", + dplane_op2str(dplane_ctx_get_op(ctx)), ipset.unique, + ctx); + } break; + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: { + struct zebra_pbr_ipset_entry ipent; + + dplane_ctx_get_pbr_ipset_entry(ctx, &ipent); + zlog_debug( + "Dplane ipset entry update op %s, unique(%u), ctx %p", + dplane_op2str(dplane_ctx_get_op(ctx)), ipent.unique, + ctx); + } break; + case DPLANE_OP_NEIGH_TABLE_UPDATE: + zlog_debug("Dplane neigh table op %s, ifp %s, family %s", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifname(ctx), + family2str(dplane_ctx_neightable_get_family(ctx))); + break; + case DPLANE_OP_GRE_SET: + zlog_debug("Dplane gre set op %s, ifp %s, link %u", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifname(ctx), + ctx->u.gre.link_ifindex); + break; + + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + zlog_debug("Dplane incoming op %s, intf %s, addr %pFX", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifname(ctx), + dplane_ctx_get_intf_addr(ctx)); + break; + + case DPLANE_OP_INTF_NETCONFIG: + zlog_debug("%s: ifindex %d, mpls %d, mcast %d", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifindex(ctx), + dplane_ctx_get_netconf_mpls(ctx), + dplane_ctx_get_netconf_mcast(ctx)); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + zlog_debug("Dplane intf %s, idx %u, protodown %d", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifindex(ctx), + dplane_ctx_intf_is_protodown(ctx)); + break; + + /* TODO: more detailed log */ + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + zlog_debug("Dplane tc ifidx %u", dplane_ctx_get_ifindex(ctx)); + break; + } +} + +static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result res = dplane_ctx_get_status(ctx); + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, + 1, memory_order_relaxed); + + if ((dplane_ctx_get_op(ctx) != DPLANE_OP_ROUTE_DELETE) + && (res == ZEBRA_DPLANE_REQUEST_SUCCESS)) { + struct nexthop *nexthop; + + /* Update installed nexthops to signal which have been + * installed. + */ + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), + nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE)) { + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB); + } + } + } + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_nexthop_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_lsp_errors, + 1, memory_order_relaxed); + break; + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_pw_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_intf_addr_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_mac_errors, + 1, memory_order_relaxed); + break; + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_neigh_errors, + 1, memory_order_relaxed); + break; + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_rule_errors, + 1, memory_order_relaxed); + break; + + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_iptable_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_ipset_errors, + 1, memory_order_relaxed); + break; + + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_ipset_entry_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_NEIGH_TABLE_UPDATE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_neightable_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_GRE_SET: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_gre_set_errors, 1, + memory_order_relaxed); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_intf_errors, + 1, memory_order_relaxed); + break; + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, + 1, memory_order_relaxed); + break; + + /* Ignore 'notifications' - no-op */ + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_BR_PORT_UPDATE: + break; + + /* TODO -- error counters for incoming events? */ + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + break; + + case DPLANE_OP_NONE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_other_errors, + 1, memory_order_relaxed); + break; + } +} + +static void kernel_dplane_process_iptable(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx) +{ + zebra_pbr_process_iptable(ctx); + dplane_provider_enqueue_out_ctx(prov, ctx); +} + +static void kernel_dplane_process_ipset(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx) +{ + zebra_pbr_process_ipset(ctx); + dplane_provider_enqueue_out_ctx(prov, ctx); +} + +static void +kernel_dplane_process_ipset_entry(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx) +{ + zebra_pbr_process_ipset_entry(ctx); + dplane_provider_enqueue_out_ctx(prov, ctx); +} + +/* + * Kernel provider callback + */ +static int kernel_dplane_process_func(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx, *tctx; + struct dplane_ctx_q work_list; + int counter, limit; + + TAILQ_INIT(&work_list); + + limit = dplane_provider_get_work_limit(prov); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': processing", + dplane_provider_get_name(prov)); + + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + kernel_dplane_log_detail(ctx); + + if ((dplane_ctx_get_op(ctx) == DPLANE_OP_IPTABLE_ADD + || dplane_ctx_get_op(ctx) == DPLANE_OP_IPTABLE_DELETE)) + kernel_dplane_process_iptable(prov, ctx); + else if ((dplane_ctx_get_op(ctx) == DPLANE_OP_IPSET_ADD + || dplane_ctx_get_op(ctx) == DPLANE_OP_IPSET_DELETE)) + kernel_dplane_process_ipset(prov, ctx); + else if ((dplane_ctx_get_op(ctx) == DPLANE_OP_IPSET_ENTRY_ADD + || dplane_ctx_get_op(ctx) + == DPLANE_OP_IPSET_ENTRY_DELETE)) + kernel_dplane_process_ipset_entry(prov, ctx); + else + TAILQ_INSERT_TAIL(&work_list, ctx, zd_q_entries); + } + + kernel_update_multi(&work_list); + + TAILQ_FOREACH_SAFE (ctx, &work_list, zd_q_entries, tctx) { + kernel_dplane_handle_result(ctx); + + TAILQ_REMOVE(&work_list, ctx, zd_q_entries); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + /* Ensure that we'll run the work loop again if there's still + * more work to do. + */ + if (counter >= limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s' reached max updates %d", + dplane_provider_get_name(prov), counter); + + atomic_fetch_add_explicit(&zdplane_info.dg_update_yields, + 1, memory_order_relaxed); + + dplane_provider_work_ready(); + } + + return 0; +} + +#ifdef DPLANE_TEST_PROVIDER + +/* + * Test dataplane provider plugin + */ + +/* + * Test provider process callback + */ +static int test_dplane_process_func(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + int counter, limit; + + /* Just moving from 'in' queue to 'out' queue */ + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': processing", + dplane_provider_get_name(prov)); + + limit = dplane_provider_get_work_limit(prov); + + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': op %s", + dplane_provider_get_name(prov), + dplane_op2str(dplane_ctx_get_op(ctx))); + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': processed %d", + dplane_provider_get_name(prov), counter); + + /* Ensure that we'll run the work loop again if there's still + * more work to do. + */ + if (counter >= limit) + dplane_provider_work_ready(); + + return 0; +} + +/* + * Test provider shutdown/fini callback + */ +static int test_dplane_shutdown_func(struct zebra_dplane_provider *prov, + bool early) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("dplane provider '%s': %sshutdown", + dplane_provider_get_name(prov), + early ? "early " : ""); + + return 0; +} +#endif /* DPLANE_TEST_PROVIDER */ + +/* + * Register default kernel provider + */ +static void dplane_provider_init(void) +{ + int ret; + + ret = dplane_provider_register("Kernel", + DPLANE_PRIO_KERNEL, + DPLANE_PROV_FLAGS_DEFAULT, NULL, + kernel_dplane_process_func, + NULL, + NULL, NULL); + + if (ret != AOK) + zlog_err("Unable to register kernel dplane provider: %d", + ret); + +#ifdef DPLANE_TEST_PROVIDER + /* Optional test provider ... */ + ret = dplane_provider_register("Test", + DPLANE_PRIO_PRE_KERNEL, + DPLANE_PROV_FLAGS_DEFAULT, NULL, + test_dplane_process_func, + test_dplane_shutdown_func, + NULL /* data */, NULL); + + if (ret != AOK) + zlog_err("Unable to register test dplane provider: %d", + ret); +#endif /* DPLANE_TEST_PROVIDER */ +} + +/* + * Allow zebra code to walk the queue of pending contexts, evaluate each one + * using a callback function. If the function returns 'true', the context + * will be dequeued and freed without being processed. + */ +int dplane_clean_ctx_queue(bool (*context_cb)(struct zebra_dplane_ctx *ctx, + void *arg), void *val) +{ + struct zebra_dplane_ctx *ctx, *temp; + struct dplane_ctx_q work_list; + + TAILQ_INIT(&work_list); + + if (context_cb == NULL) + goto done; + + /* Walk the pending context queue under the dplane lock. */ + DPLANE_LOCK(); + + TAILQ_FOREACH_SAFE(ctx, &zdplane_info.dg_update_ctx_q, zd_q_entries, + temp) { + if (context_cb(ctx, val)) { + TAILQ_REMOVE(&zdplane_info.dg_update_ctx_q, ctx, + zd_q_entries); + TAILQ_INSERT_TAIL(&work_list, ctx, zd_q_entries); + } + } + + DPLANE_UNLOCK(); + + /* Now free any contexts selected by the caller, without holding + * the lock. + */ + TAILQ_FOREACH_SAFE(ctx, &work_list, zd_q_entries, temp) { + TAILQ_REMOVE(&work_list, ctx, zd_q_entries); + dplane_ctx_fini(&ctx); + } + +done: + + return 0; +} + +/* Indicates zebra shutdown/exit is in progress. Some operations may be + * simplified or skipped during shutdown processing. + */ +bool dplane_is_in_shutdown(void) +{ + return zdplane_info.dg_is_shutdown; +} + +/* + * Enable collection of extra info about interfaces in route updates. + */ +void dplane_enable_intf_extra_info(void) +{ + dplane_collect_extra_intf_info = true; +} + +/* + * Early or pre-shutdown, de-init notification api. This runs pretty + * early during zebra shutdown, as a signal to stop new work and prepare + * for updates generated by shutdown/cleanup activity, as zebra tries to + * remove everything it's responsible for. + * NB: This runs in the main zebra pthread context. + */ +void zebra_dplane_pre_finish(void) +{ + struct zebra_dplane_provider *prov; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane pre-finish called"); + + zdplane_info.dg_is_shutdown = true; + + /* Notify provider(s) of pending shutdown. */ + TAILQ_FOREACH(prov, &zdplane_info.dg_providers_q, dp_prov_link) { + if (prov->dp_fini == NULL) + continue; + + prov->dp_fini(prov, true /* early */); + } +} + +/* + * Utility to determine whether work remains enqueued within the dplane; + * used during system shutdown processing. + */ +static bool dplane_work_pending(void) +{ + bool ret = false; + struct zebra_dplane_ctx *ctx; + struct zebra_dplane_provider *prov; + + /* TODO -- just checking incoming/pending work for now, must check + * providers + */ + DPLANE_LOCK(); + { + ctx = TAILQ_FIRST(&zdplane_info.dg_update_ctx_q); + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + } + DPLANE_UNLOCK(); + + if (ctx != NULL) { + ret = true; + goto done; + } + + while (prov) { + + dplane_provider_lock(prov); + + ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); + if (ctx == NULL) + ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q)); + + dplane_provider_unlock(prov); + + if (ctx != NULL) + break; + + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + } + + if (ctx != NULL) + ret = true; + +done: + return ret; +} + +/* + * Shutdown-time intermediate callback, used to determine when all pending + * in-flight updates are done. If there's still work to do, reschedules itself. + * If all work is done, schedules an event to the main zebra thread for + * final zebra shutdown. + * This runs in the dplane pthread context. + */ +static void dplane_check_shutdown_status(struct thread *event) +{ + struct dplane_zns_info *zi; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane shutdown status check called"); + + /* Remove any zns info entries as we stop the dplane pthread. */ + frr_each_safe (zns_info_list, &zdplane_info.dg_zns_list, zi) { + zns_info_list_del(&zdplane_info.dg_zns_list, zi); + + if (zdplane_info.dg_master) { + THREAD_OFF(zi->t_read); + THREAD_OFF(zi->t_request); + } + + XFREE(MTYPE_DP_NS, zi); + } + + if (dplane_work_pending()) { + /* Reschedule dplane check on a short timer */ + thread_add_timer_msec(zdplane_info.dg_master, + dplane_check_shutdown_status, + NULL, 100, + &zdplane_info.dg_t_shutdown_check); + + /* TODO - give up and stop waiting after a short time? */ + + } else { + /* We appear to be done - schedule a final callback event + * for the zebra main pthread. + */ + thread_add_event(zrouter.master, zebra_finalize, NULL, 0, NULL); + } +} + +/* + * Shutdown, de-init api. This runs pretty late during shutdown, + * after zebra has tried to free/remove/uninstall all routes during shutdown. + * At this point, dplane work may still remain to be done, so we can't just + * blindly terminate. If there's still work to do, we'll periodically check + * and when done, we'll enqueue a task to the zebra main thread for final + * termination processing. + * + * NB: This runs in the main zebra thread context. + */ +void zebra_dplane_finish(void) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane fini called"); + + thread_add_event(zdplane_info.dg_master, + dplane_check_shutdown_status, NULL, 0, + &zdplane_info.dg_t_shutdown_check); +} + +/* + * Main dataplane pthread event loop. The thread takes new incoming work + * and offers it to the first provider. It then iterates through the + * providers, taking complete work from each one and offering it + * to the next in order. At each step, a limited number of updates are + * processed during a cycle in order to provide some fairness. + * + * This loop through the providers is only run once, so that the dataplane + * pthread can look for other pending work - such as i/o work on behalf of + * providers. + */ +static void dplane_thread_loop(struct thread *event) +{ + struct dplane_ctx_q work_list; + struct dplane_ctx_q error_list; + struct zebra_dplane_provider *prov; + struct zebra_dplane_ctx *ctx, *tctx; + int limit, counter, error_counter; + uint64_t curr, high; + bool reschedule = false; + + /* Capture work limit per cycle */ + limit = zdplane_info.dg_updates_per_cycle; + + /* Init temporary lists used to move contexts among providers */ + TAILQ_INIT(&work_list); + TAILQ_INIT(&error_list); + error_counter = 0; + + /* Check for zebra shutdown */ + if (!zdplane_info.dg_run) + return; + + /* Dequeue some incoming work from zebra (if any) onto the temporary + * working list. + */ + DPLANE_LOCK(); + + /* Locate initial registered provider */ + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + + /* Move new work from incoming list to temp list */ + for (counter = 0; counter < limit; counter++) { + ctx = TAILQ_FIRST(&zdplane_info.dg_update_ctx_q); + if (ctx) { + TAILQ_REMOVE(&zdplane_info.dg_update_ctx_q, ctx, + zd_q_entries); + + ctx->zd_provider = prov->dp_id; + + TAILQ_INSERT_TAIL(&work_list, ctx, zd_q_entries); + } else { + break; + } + } + + DPLANE_UNLOCK(); + + atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane: incoming new work counter: %d", counter); + + /* Iterate through the registered providers, offering new incoming + * work. If the provider has outgoing work in its queue, take that + * work for the next provider + */ + while (prov) { + + /* At each iteration, the temporary work list has 'counter' + * items. + */ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane enqueues %d new work to provider '%s'", + counter, dplane_provider_get_name(prov)); + + /* Capture current provider id in each context; check for + * error status. + */ + TAILQ_FOREACH_SAFE(ctx, &work_list, zd_q_entries, tctx) { + if (dplane_ctx_get_status(ctx) == + ZEBRA_DPLANE_REQUEST_SUCCESS) { + ctx->zd_provider = prov->dp_id; + } else { + /* + * TODO -- improve error-handling: recirc + * errors backwards so that providers can + * 'undo' their work (if they want to) + */ + + /* Move to error list; will be returned + * zebra main. + */ + TAILQ_REMOVE(&work_list, ctx, zd_q_entries); + TAILQ_INSERT_TAIL(&error_list, + ctx, zd_q_entries); + error_counter++; + } + } + + /* Enqueue new work to the provider */ + dplane_provider_lock(prov); + + if (TAILQ_FIRST(&work_list)) + TAILQ_CONCAT(&(prov->dp_ctx_in_q), &work_list, + zd_q_entries); + + atomic_fetch_add_explicit(&prov->dp_in_counter, counter, + memory_order_relaxed); + atomic_fetch_add_explicit(&prov->dp_in_queued, counter, + memory_order_relaxed); + curr = atomic_load_explicit(&prov->dp_in_queued, + memory_order_relaxed); + high = atomic_load_explicit(&prov->dp_in_max, + memory_order_relaxed); + if (curr > high) + atomic_store_explicit(&prov->dp_in_max, curr, + memory_order_relaxed); + + dplane_provider_unlock(prov); + + /* Reset the temp list (though the 'concat' may have done this + * already), and the counter + */ + TAILQ_INIT(&work_list); + counter = 0; + + /* Call into the provider code. Note that this is + * unconditional: we offer to do work even if we don't enqueue + * any _new_ work. + */ + (*prov->dp_fp)(prov); + + /* Check for zebra shutdown */ + if (!zdplane_info.dg_run) + break; + + /* Dequeue completed work from the provider */ + dplane_provider_lock(prov); + + while (counter < limit) { + ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q)); + if (ctx) { + TAILQ_REMOVE(&(prov->dp_ctx_out_q), ctx, + zd_q_entries); + + TAILQ_INSERT_TAIL(&work_list, + ctx, zd_q_entries); + counter++; + } else + break; + } + + dplane_provider_unlock(prov); + + if (counter >= limit) + reschedule = true; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane dequeues %d completed work from provider %s", + counter, dplane_provider_get_name(prov)); + + /* Locate next provider */ + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + } + + /* + * We hit the work limit while processing at least one provider's + * output queue - ensure we come back and finish it. + */ + if (reschedule) + dplane_provider_work_ready(); + + /* After all providers have been serviced, enqueue any completed + * work and any errors back to zebra so it can process the results. + */ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane has %d completed, %d errors, for zebra main", + counter, error_counter); + + /* + * Hand lists through the api to zebra main, + * to reduce the number of lock/unlock cycles + */ + + /* Call through to zebra main */ + (zdplane_info.dg_results_cb)(&error_list); + + TAILQ_INIT(&error_list); + + /* Call through to zebra main */ + (zdplane_info.dg_results_cb)(&work_list); + + TAILQ_INIT(&work_list); +} + +/* + * Final phase of shutdown, after all work enqueued to dplane has been + * processed. This is called from the zebra main pthread context. + */ +void zebra_dplane_shutdown(void) +{ + struct zebra_dplane_provider *dp; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane shutdown called"); + + /* Stop dplane thread, if it's running */ + + zdplane_info.dg_run = false; + + frr_pthread_stop(zdplane_info.dg_pthread, NULL); + + /* Destroy pthread */ + frr_pthread_destroy(zdplane_info.dg_pthread); + zdplane_info.dg_pthread = NULL; + zdplane_info.dg_master = NULL; + + /* Notify provider(s) of final shutdown. + * Note that this call is in the main pthread, so providers must + * be prepared for that. + */ + TAILQ_FOREACH(dp, &zdplane_info.dg_providers_q, dp_prov_link) { + if (dp->dp_fini == NULL) + continue; + + dp->dp_fini(dp, false); + } + + /* TODO -- Clean-up provider objects */ + + /* TODO -- Clean queue(s), free memory */ +} + +/* + * Initialize the dataplane module during startup, internal/private version + */ +static void zebra_dplane_init_internal(void) +{ + memset(&zdplane_info, 0, sizeof(zdplane_info)); + + pthread_mutex_init(&zdplane_info.dg_mutex, NULL); + + TAILQ_INIT(&zdplane_info.dg_update_ctx_q); + TAILQ_INIT(&zdplane_info.dg_providers_q); + zns_info_list_init(&zdplane_info.dg_zns_list); + + zdplane_info.dg_updates_per_cycle = DPLANE_DEFAULT_NEW_WORK; + + zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED; + + /* Register default kernel 'provider' during init */ + dplane_provider_init(); +} + +/* + * Start the dataplane pthread. This step needs to be run later than the + * 'init' step, in case zebra has fork-ed. + */ +void zebra_dplane_start(void) +{ + struct dplane_zns_info *zi; + struct zebra_dplane_provider *prov; + struct frr_pthread_attr pattr = { + .start = frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop + }; + + /* Start dataplane pthread */ + + zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread", + "zebra_dplane"); + + zdplane_info.dg_master = zdplane_info.dg_pthread->master; + + zdplane_info.dg_run = true; + + /* Enqueue an initial event for the dataplane pthread */ + thread_add_event(zdplane_info.dg_master, dplane_thread_loop, NULL, 0, + &zdplane_info.dg_t_update); + + /* Enqueue requests and reads if necessary */ + frr_each (zns_info_list, &zdplane_info.dg_zns_list, zi) { +#if defined(HAVE_NETLINK) + thread_add_read(zdplane_info.dg_master, dplane_incoming_read, + zi, zi->info.sock, &zi->t_read); + dplane_kernel_info_request(zi); +#endif + } + + /* Call start callbacks for registered providers */ + + DPLANE_LOCK(); + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + DPLANE_UNLOCK(); + + while (prov) { + + if (prov->dp_start) + (prov->dp_start)(prov); + + /* Locate next provider */ + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + } + + frr_pthread_run(zdplane_info.dg_pthread, NULL); +} + +/* + * Initialize the dataplane module at startup; called by zebra rib_init() + */ +void zebra_dplane_init(int (*results_fp)(struct dplane_ctx_q *)) +{ + zebra_dplane_init_internal(); + zdplane_info.dg_results_cb = results_fp; +} diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h new file mode 100644 index 0000000..8b239a9 --- /dev/null +++ b/zebra/zebra_dplane.h @@ -0,0 +1,1035 @@ +/* + * Zebra dataplane layer api interfaces. + * Copyright (c) 2018 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_DPLANE_H +#define _ZEBRA_DPLANE_H 1 + +#include "lib/zebra.h" +#include "lib/prefix.h" +#include "lib/nexthop.h" +#include "lib/nexthop_group.h" +#include "lib/queue.h" +#include "lib/vlan.h" +#include "zebra/zebra_ns.h" +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_nhg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Key netlink info from zebra ns */ +struct zebra_dplane_info { + ns_id_t ns_id; + +#if defined(HAVE_NETLINK) + int sock; + int seq; + bool is_cmd; +#endif +}; + +/* Utility to fill in zns info from main zns struct */ +static inline void +zebra_dplane_info_from_zns(struct zebra_dplane_info *zns_info, + const struct zebra_ns *zns, bool is_cmd) +{ + zns_info->ns_id = zns->ns_id; + +#if defined(HAVE_NETLINK) + zns_info->is_cmd = is_cmd; + if (is_cmd) { + zns_info->sock = zns->netlink_cmd.sock; + zns_info->seq = zns->netlink_cmd.seq; + } else { + zns_info->sock = zns->netlink.sock; + zns_info->seq = zns->netlink.seq; + } +#endif /* NETLINK */ +} + +/* + * Notify dplane when namespaces are enabled and disabled. The dplane + * needs to start and stop reading incoming events from the ns. + */ +void zebra_dplane_ns_enable(struct zebra_ns *zns, bool enabled); + +/* + * Result codes used when returning status back to the main zebra context. + */ + +/* + * Philosophy Note: + * + * Flags being SET/UNSET do not belong in the South Bound + * Interface. This Setting belongs at the calling level + * because we can and will have multiple different interfaces + * and we will have potentially multiple different + * modules/filters to call. As such Setting/Unsetting + * success failure should be handled by the caller. + */ +enum zebra_dplane_status { + ZEBRA_DPLANE_STATUS_NONE = 0, + ZEBRA_DPLANE_INSTALL_SUCCESS, + ZEBRA_DPLANE_INSTALL_FAILURE, + ZEBRA_DPLANE_DELETE_SUCCESS, + ZEBRA_DPLANE_DELETE_FAILURE, + +}; + +enum zebra_dplane_result { + ZEBRA_DPLANE_REQUEST_QUEUED, + ZEBRA_DPLANE_REQUEST_SUCCESS, + ZEBRA_DPLANE_REQUEST_FAILURE, +}; + +/* + * API between the zebra dataplane system and the main zebra processing + * context. + */ + +/* + * Operations that the dataplane can process. + */ +enum dplane_op_e { + DPLANE_OP_NONE = 0, + + /* Route update */ + DPLANE_OP_ROUTE_INSTALL, + DPLANE_OP_ROUTE_UPDATE, + DPLANE_OP_ROUTE_DELETE, + DPLANE_OP_ROUTE_NOTIFY, + + /* Nexthop update */ + DPLANE_OP_NH_INSTALL, + DPLANE_OP_NH_UPDATE, + DPLANE_OP_NH_DELETE, + + /* LSP update */ + DPLANE_OP_LSP_INSTALL, + DPLANE_OP_LSP_UPDATE, + DPLANE_OP_LSP_DELETE, + DPLANE_OP_LSP_NOTIFY, + + /* Pseudowire update */ + DPLANE_OP_PW_INSTALL, + DPLANE_OP_PW_UNINSTALL, + + /* System route notification */ + DPLANE_OP_SYS_ROUTE_ADD, + DPLANE_OP_SYS_ROUTE_DELETE, + + /* Interface address update */ + DPLANE_OP_ADDR_INSTALL, + DPLANE_OP_ADDR_UNINSTALL, + + /* MAC address update */ + DPLANE_OP_MAC_INSTALL, + DPLANE_OP_MAC_DELETE, + + /* EVPN neighbor updates */ + DPLANE_OP_NEIGH_INSTALL, + DPLANE_OP_NEIGH_UPDATE, + DPLANE_OP_NEIGH_DELETE, + + /* EVPN VTEP updates */ + DPLANE_OP_VTEP_ADD, + DPLANE_OP_VTEP_DELETE, + + /* Policy based routing rule update */ + DPLANE_OP_RULE_ADD, + DPLANE_OP_RULE_DELETE, + DPLANE_OP_RULE_UPDATE, + + /* Link layer address discovery */ + DPLANE_OP_NEIGH_DISCOVER, + + /* bridge port update */ + DPLANE_OP_BR_PORT_UPDATE, + + /* Policy based routing iptable update */ + DPLANE_OP_IPTABLE_ADD, + DPLANE_OP_IPTABLE_DELETE, + + /* Policy based routing ipset update */ + DPLANE_OP_IPSET_ADD, + DPLANE_OP_IPSET_DELETE, + DPLANE_OP_IPSET_ENTRY_ADD, + DPLANE_OP_IPSET_ENTRY_DELETE, + + /* LINK LAYER IP address update */ + DPLANE_OP_NEIGH_IP_INSTALL, + DPLANE_OP_NEIGH_IP_DELETE, + + DPLANE_OP_NEIGH_TABLE_UPDATE, + DPLANE_OP_GRE_SET, + + /* Incoming interface address events */ + DPLANE_OP_INTF_ADDR_ADD, + DPLANE_OP_INTF_ADDR_DEL, + + /* Incoming interface config events */ + DPLANE_OP_INTF_NETCONFIG, + + /* Interface update */ + DPLANE_OP_INTF_INSTALL, + DPLANE_OP_INTF_UPDATE, + DPLANE_OP_INTF_DELETE, + + /* Traffic control */ + DPLANE_OP_TC_INSTALL, + DPLANE_OP_TC_UPDATE, + DPLANE_OP_TC_DELETE, +}; + +/* + * The vxlan/evpn neighbor management code needs some values to use + * when programming neighbor changes. Offer some platform-neutral values + * here for use within the dplane apis and plugins. + */ + +/* Neighbor cache flags */ +#define DPLANE_NTF_EXT_LEARNED 0x01 +#define DPLANE_NTF_ROUTER 0x02 +#define DPLANE_NTF_USE 0x04 + +/* Neighbor cache states */ +#define DPLANE_NUD_REACHABLE 0x01 +#define DPLANE_NUD_STALE 0x02 +#define DPLANE_NUD_NOARP 0x04 +#define DPLANE_NUD_PROBE 0x08 +#define DPLANE_NUD_INCOMPLETE 0x10 +#define DPLANE_NUD_PERMANENT 0x20 +#define DPLANE_NUD_FAILED 0x40 + +/* MAC update flags - dplane_mac_info.update_flags */ +#define DPLANE_MAC_REMOTE (1 << 0) +#define DPLANE_MAC_WAS_STATIC (1 << 1) +#define DPLANE_MAC_SET_STATIC (1 << 2) +#define DPLANE_MAC_SET_INACTIVE (1 << 3) + +/* Neigh update flags - dplane_neigh_info.update_flags */ +#define DPLANE_NEIGH_REMOTE (1 << 0) +#define DPLANE_NEIGH_WAS_STATIC (1 << 1) +#define DPLANE_NEIGH_SET_STATIC (1 << 2) +#define DPLANE_NEIGH_SET_INACTIVE (1 << 3) +#define DPLANE_NEIGH_NO_EXTENSION (1 << 4) + +#define DPLANE_BR_PORT_NON_DF (1 << 0) + +/* Definitions for the dplane 'netconf' apis, corresponding to the netlink + * NETCONF api. + * Sadly, netlink sends incremental updates, so its messages may contain + * just a single changed attribute, and not necessarily + * a complete snapshot of the attributes. + */ +enum dplane_netconf_status_e { + DPLANE_NETCONF_STATUS_UNKNOWN = 0, + DPLANE_NETCONF_STATUS_ENABLED, + DPLANE_NETCONF_STATUS_DISABLED +}; + +/* Some special ifindex values that may be part of the dplane netconf api. */ +#define DPLANE_NETCONF_IFINDEX_ALL -1 +#define DPLANE_NETCONF_IFINDEX_DEFAULT -2 + +/* Enable system route notifications */ +void dplane_enable_sys_route_notifs(void); + +/* + * The dataplane context struct is used to exchange info between the main zebra + * context and the dataplane module(s). If these are two independent pthreads, + * they cannot share existing global data structures safely. + */ + +/* Define a tailq list type for context blocks. The list is exposed/public, + * but the internal linkage in the context struct is private, so there + * are accessor apis that support enqueue and dequeue. + */ +TAILQ_HEAD(dplane_ctx_q, zebra_dplane_ctx); + +/* Declare a type for (optional) extended interface info objects. */ +TAILQ_HEAD(dplane_intf_extra_q, dplane_intf_extra); + +/* Allocate a context object */ +struct zebra_dplane_ctx *dplane_ctx_alloc(void); + +/* + * Reset an allocated context object for re-use. All internal allocations are + * freed. + */ +void dplane_ctx_reset(struct zebra_dplane_ctx *ctx); + +/* + * Allow zebra code to walk the queue of pending contexts, evaluate each one + * using a callback function. The caller can supply an optional void* arg also. + * If the function returns 'true', the context will be dequeued and freed + * without being processed. + */ +int dplane_clean_ctx_queue(bool (*context_cb)(struct zebra_dplane_ctx *ctx, + void *arg), void *val); + +/* Return a dataplane results context block after use; the caller's pointer will + * be cleared. + */ +void dplane_ctx_fini(struct zebra_dplane_ctx **pctx); + +/* Enqueue a context block to caller's tailq. This exists so that the + * context struct can remain opaque. + */ +void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, + const struct zebra_dplane_ctx *ctx); + +/* Append a list of context blocks to another list - again, just keeping + * the context struct opaque. + */ +void dplane_ctx_list_append(struct dplane_ctx_q *to_list, + struct dplane_ctx_q *from_list); + +/* Dequeue a context block from the head of caller's tailq */ +struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q); +struct zebra_dplane_ctx *dplane_ctx_get_head(struct dplane_ctx_q *q); + +/* + * Accessors for information from the context object + */ +enum zebra_dplane_result dplane_ctx_get_status( + const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx, + enum zebra_dplane_result status); +const char *dplane_res2str(enum zebra_dplane_result res); + +enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_op(struct zebra_dplane_ctx *ctx, enum dplane_op_e op); +const char *dplane_op2str(enum dplane_op_e op); + +const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_dest(struct zebra_dplane_ctx *ctx, + const struct prefix *dest); +const char *dplane_ctx_get_ifname(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_ifname(struct zebra_dplane_ctx *ctx, const char *ifname); +ifindex_t dplane_ctx_get_ifindex(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_ifindex(struct zebra_dplane_ctx *ctx, ifindex_t ifindex); + +/* Retrieve last/current provider id */ +uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx); + +/* Providers running before the kernel can control whether a kernel + * update should be done. + */ +void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx); +bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx); + +/* Source prefix is a little special - use convention to return NULL + * to mean "no src prefix" + */ +const struct prefix *dplane_ctx_get_src(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_src(struct zebra_dplane_ctx *ctx, const struct prefix *src); + +bool dplane_ctx_is_update(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_seq(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_old_seq(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_vrf(struct zebra_dplane_ctx *ctx, vrf_id_t vrf); +vrf_id_t dplane_ctx_get_vrf(const struct zebra_dplane_ctx *ctx); + +/* In some paths we have only a namespace id */ +void dplane_ctx_set_ns_id(struct zebra_dplane_ctx *ctx, ns_id_t nsid); +ns_id_t dplane_ctx_get_ns_id(const struct zebra_dplane_ctx *ctx); + +bool dplane_ctx_is_from_notif(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_notif_provider(struct zebra_dplane_ctx *ctx, + uint32_t id); +uint32_t dplane_ctx_get_notif_provider(const struct zebra_dplane_ctx *ctx); + +/* Accessors for route update information */ +void dplane_ctx_set_type(struct zebra_dplane_ctx *ctx, int type); +int dplane_ctx_get_type(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_old_type(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_afi(struct zebra_dplane_ctx *ctx, afi_t afi); +afi_t dplane_ctx_get_afi(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_safi(struct zebra_dplane_ctx *ctx, safi_t safi); +safi_t dplane_ctx_get_safi(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_table(struct zebra_dplane_ctx *ctx, uint32_t table); +uint32_t dplane_ctx_get_table(const struct zebra_dplane_ctx *ctx); +route_tag_t dplane_ctx_get_tag(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_tag(struct zebra_dplane_ctx *ctx, route_tag_t tag); +route_tag_t dplane_ctx_get_old_tag(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_get_instance(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_instance(struct zebra_dplane_ctx *ctx, uint16_t instance); +uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_old_metric(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_mtu(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_nh_mtu(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance); +uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx); + +/* Accessors for traffic control context */ +uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx); +uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx); + +void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh); +void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx, + const struct nexthop_group *nhg); + +uint32_t dplane_ctx_get_nhg_id(const struct zebra_dplane_ctx *ctx); +const struct nexthop_group *dplane_ctx_get_ng( + const struct zebra_dplane_ctx *ctx); +const struct nexthop_group *dplane_ctx_get_old_ng( + const struct zebra_dplane_ctx *ctx); + +/* Optional extra info about interfaces in nexthops - a plugin must enable + * this extra info. + */ +const struct dplane_intf_extra * +dplane_ctx_get_intf_extra(const struct zebra_dplane_ctx *ctx); + +const struct dplane_intf_extra * +dplane_ctx_intf_extra_next(const struct zebra_dplane_ctx *ctx, + const struct dplane_intf_extra *ptr); + +vrf_id_t dplane_intf_extra_get_vrfid(const struct dplane_intf_extra *ptr); +uint32_t dplane_intf_extra_get_ifindex(const struct dplane_intf_extra *ptr); +uint32_t dplane_intf_extra_get_flags(const struct dplane_intf_extra *ptr); +uint32_t dplane_intf_extra_get_status(const struct dplane_intf_extra *ptr); + +/* Backup nexthop information (list of nexthops) if present. */ +const struct nexthop_group * +dplane_ctx_get_backup_ng(const struct zebra_dplane_ctx *ctx); +const struct nexthop_group * +dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx); + +/* Accessors for nexthop information */ +uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_old_nhe_id(const struct zebra_dplane_ctx *ctx); +afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx); +vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx); +const struct nexthop_group * +dplane_ctx_get_nhe_ng(const struct zebra_dplane_ctx *ctx); +const struct nh_grp * +dplane_ctx_get_nhe_nh_grp(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_get_nhe_nh_grp_count(const struct zebra_dplane_ctx *ctx); + +/* Accessors for LSP information */ + +/* Init the internal LSP data struct - necessary before adding to it. + * If 'lsp' is non-NULL, info will be copied from it to the internal + * context data area. + */ +int dplane_ctx_lsp_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct zebra_lsp *lsp); + +mpls_label_t dplane_ctx_get_in_label(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_in_label(struct zebra_dplane_ctx *ctx, + mpls_label_t label); +uint8_t dplane_ctx_get_addr_family(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_addr_family(struct zebra_dplane_ctx *ctx, + uint8_t family); +uint32_t dplane_ctx_get_lsp_flags(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_lsp_flags(struct zebra_dplane_ctx *ctx, + uint32_t flags); +const struct nhlfe_list_head *dplane_ctx_get_nhlfe_list( + const struct zebra_dplane_ctx *ctx); +const struct nhlfe_list_head *dplane_ctx_get_backup_nhlfe_list( + const struct zebra_dplane_ctx *ctx); + +struct zebra_nhlfe *dplane_ctx_add_nhlfe(struct zebra_dplane_ctx *ctx, + enum lsp_types_t lsp_type, + enum nexthop_types_t nh_type, + const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, + mpls_label_t *out_labels); + +struct zebra_nhlfe *dplane_ctx_add_backup_nhlfe( + struct zebra_dplane_ctx *ctx, enum lsp_types_t lsp_type, + enum nexthop_types_t nh_type, const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, mpls_label_t *out_labels); + +const struct zebra_nhlfe * +dplane_ctx_get_best_nhlfe(const struct zebra_dplane_ctx *ctx); +const struct zebra_nhlfe * +dplane_ctx_set_best_nhlfe(struct zebra_dplane_ctx *ctx, + struct zebra_nhlfe *nhlfe); +uint32_t dplane_ctx_get_lsp_num_ecmp(const struct zebra_dplane_ctx *ctx); + +/* Accessors for pseudowire information */ +mpls_label_t dplane_ctx_get_pw_local_label(const struct zebra_dplane_ctx *ctx); +mpls_label_t dplane_ctx_get_pw_remote_label(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_pw_type(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_pw_af(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_pw_flags(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_pw_status(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_pw_status(struct zebra_dplane_ctx *ctx, int status); +const union g_addr *dplane_ctx_get_pw_dest( + const struct zebra_dplane_ctx *ctx); +const union pw_protocol_fields *dplane_ctx_get_pw_proto( + const struct zebra_dplane_ctx *ctx); +const struct nexthop_group *dplane_ctx_get_pw_nhg( + const struct zebra_dplane_ctx *ctx); +const struct nexthop_group * +dplane_ctx_get_pw_primary_nhg(const struct zebra_dplane_ctx *ctx); +const struct nexthop_group * +dplane_ctx_get_pw_backup_nhg(const struct zebra_dplane_ctx *ctx); + +/* Accessors for interface information */ +uint32_t dplane_ctx_get_intf_metric(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_intf_metric(struct zebra_dplane_ctx *ctx, uint32_t metric); +uint32_t dplane_ctx_get_intf_pd_reason_val(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_intf_pd_reason_val(struct zebra_dplane_ctx *ctx, bool val); +bool dplane_ctx_intf_is_protodown(const struct zebra_dplane_ctx *ctx); +/* Is interface addr p2p? */ +bool dplane_ctx_intf_is_connected(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_intf_set_connected(struct zebra_dplane_ctx *ctx); +bool dplane_ctx_intf_is_secondary(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_intf_set_secondary(struct zebra_dplane_ctx *ctx); +bool dplane_ctx_intf_is_broadcast(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_intf_set_broadcast(struct zebra_dplane_ctx *ctx); +const struct prefix *dplane_ctx_get_intf_addr( + const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_intf_addr(struct zebra_dplane_ctx *ctx, + const struct prefix *p); +bool dplane_ctx_intf_has_dest(const struct zebra_dplane_ctx *ctx); +const struct prefix *dplane_ctx_get_intf_dest( + const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_intf_dest(struct zebra_dplane_ctx *ctx, + const struct prefix *p); +bool dplane_ctx_intf_has_label(const struct zebra_dplane_ctx *ctx); +const char *dplane_ctx_get_intf_label(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_intf_label(struct zebra_dplane_ctx *ctx, const char *label); + +/* Accessors for MAC information */ +vlanid_t dplane_ctx_mac_get_vlan(const struct zebra_dplane_ctx *ctx); +bool dplane_ctx_mac_is_sticky(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_mac_get_update_flags(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_mac_get_nhg_id(const struct zebra_dplane_ctx *ctx); +const struct ethaddr *dplane_ctx_mac_get_addr( + const struct zebra_dplane_ctx *ctx); +const struct in_addr *dplane_ctx_mac_get_vtep_ip( + const struct zebra_dplane_ctx *ctx); +ifindex_t dplane_ctx_mac_get_br_ifindex(const struct zebra_dplane_ctx *ctx); + +/* Accessors for neighbor information */ +const struct ipaddr *dplane_ctx_neigh_get_ipaddr( + const struct zebra_dplane_ctx *ctx); +const struct ethaddr *dplane_ctx_neigh_get_mac( + const struct zebra_dplane_ctx *ctx); +const struct ipaddr * +dplane_ctx_neigh_get_link_ip(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_neigh_get_flags(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_neigh_get_state(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_neigh_get_update_flags(const struct zebra_dplane_ctx *ctx); + +/* Accessors for policy based routing rule information */ +int dplane_ctx_rule_get_sock(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_rule_get_unique(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_rule_get_seq(const struct zebra_dplane_ctx *ctx); +const char *dplane_ctx_rule_get_ifname(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_priority(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_old_priority(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_table(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_old_table(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_filter_bm(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_old_filter_bm(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_fwmark(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_rule_get_old_fwmark(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_rule_get_dsfield(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_rule_get_old_dsfield(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_rule_get_ipproto(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_rule_get_old_ipproto(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_rule_get_src_port(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_rule_get_old_src_port(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_rule_get_dst_port(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_rule_get_old_dst_port(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_rule_get_src_ip(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_rule_get_old_src_ip(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_rule_get_dst_ip(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_rule_get_old_dst_ip(const struct zebra_dplane_ctx *ctx); +const struct ethaddr * +dplane_ctx_rule_get_smac(const struct zebra_dplane_ctx *ctx); +const struct ethaddr * +dplane_ctx_rule_get_dmac(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_rule_get_out_ifindex(const struct zebra_dplane_ctx *ctx); +intptr_t dplane_ctx_rule_get_dp_flow_ptr(const struct zebra_dplane_ctx *ctx); +intptr_t +dplane_ctx_rule_get_old_dp_flow_ptr(const struct zebra_dplane_ctx *ctx); +void dplane_ctx_rule_set_dp_flow_ptr(struct zebra_dplane_ctx *ctx, + intptr_t dp_flow_ptr); +/* Accessors for policy based routing iptable information */ +struct zebra_pbr_iptable; +void dplane_ctx_get_pbr_iptable(const struct zebra_dplane_ctx *ctx, + struct zebra_pbr_iptable *table); +struct zebra_pbr_ipset; +void dplane_ctx_get_pbr_ipset(const struct zebra_dplane_ctx *ctx, + struct zebra_pbr_ipset *ipset); +struct zebra_pbr_ipset_entry; +void dplane_ctx_get_pbr_ipset_entry(const struct zebra_dplane_ctx *ctx, + struct zebra_pbr_ipset_entry *entry); +/* Accessors for bridge port information */ +uint32_t dplane_ctx_get_br_port_flags(const struct zebra_dplane_ctx *ctx); +uint32_t +dplane_ctx_get_br_port_sph_filter_cnt(const struct zebra_dplane_ctx *ctx); +const struct in_addr * +dplane_ctx_get_br_port_sph_filters(const struct zebra_dplane_ctx *ctx); +uint32_t +dplane_ctx_get_br_port_backup_nhg_id(const struct zebra_dplane_ctx *ctx); + +/* Accessors for neighbor table information */ +uint8_t dplane_ctx_neightable_get_family(const struct zebra_dplane_ctx *ctx); +uint32_t +dplane_ctx_neightable_get_app_probes(const struct zebra_dplane_ctx *ctx); +uint32_t +dplane_ctx_neightable_get_mcast_probes(const struct zebra_dplane_ctx *ctx); +uint32_t +dplane_ctx_neightable_get_ucast_probes(const struct zebra_dplane_ctx *ctx); + +/* Accessor for GRE set */ +uint32_t +dplane_ctx_gre_get_link_ifindex(const struct zebra_dplane_ctx *ctx); +unsigned int +dplane_ctx_gre_get_mtu(const struct zebra_dplane_ctx *ctx); +const struct zebra_l2info_gre * +dplane_ctx_gre_get_info(const struct zebra_dplane_ctx *ctx); + +/* Interface netconf info */ +enum dplane_netconf_status_e +dplane_ctx_get_netconf_mpls(const struct zebra_dplane_ctx *ctx); +enum dplane_netconf_status_e +dplane_ctx_get_netconf_mcast(const struct zebra_dplane_ctx *ctx); +enum dplane_netconf_status_e +dplane_ctx_get_netconf_linkdown(const struct zebra_dplane_ctx *ctx); + +void dplane_ctx_set_netconf_mpls(struct zebra_dplane_ctx *ctx, + enum dplane_netconf_status_e val); +void dplane_ctx_set_netconf_mcast(struct zebra_dplane_ctx *ctx, + enum dplane_netconf_status_e val); +void dplane_ctx_set_netconf_linkdown(struct zebra_dplane_ctx *ctx, + enum dplane_netconf_status_e val); + +/* Namespace fd info - esp. for netlink communication */ +const struct zebra_dplane_info *dplane_ctx_get_ns( + const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx); + +/* Indicates zebra shutdown/exit is in progress. Some operations may be + * simplified or skipped during shutdown processing. + */ +bool dplane_is_in_shutdown(void); + +/* + * Enqueue route change operations for the dataplane. + */ +enum zebra_dplane_result dplane_route_add(struct route_node *rn, + struct route_entry *re); + +enum zebra_dplane_result dplane_route_update(struct route_node *rn, + struct route_entry *re, + struct route_entry *old_re); + +enum zebra_dplane_result dplane_route_delete(struct route_node *rn, + struct route_entry *re); + +/* Notify the dplane when system/connected routes change */ +enum zebra_dplane_result dplane_sys_route_add(struct route_node *rn, + struct route_entry *re); +enum zebra_dplane_result dplane_sys_route_del(struct route_node *rn, + struct route_entry *re); + +/* Update from an async notification, to bring other fibs up-to-date */ +enum zebra_dplane_result dplane_route_notif_update( + struct route_node *rn, + struct route_entry *re, + enum dplane_op_e op, + struct zebra_dplane_ctx *ctx); + +/* + * Enqueue bridge port changes for the dataplane. + */ +enum zebra_dplane_result dplane_br_port_update( + const struct interface *ifp, bool non_df, uint32_t sph_filter_cnt, + const struct in_addr *sph_filters, uint32_t backup_nhg_id); + +/* Forward ref of nhg_hash_entry */ +struct nhg_hash_entry; +/* + * Enqueue a nexthop change operation for the dataplane. + */ +enum zebra_dplane_result dplane_nexthop_add(struct nhg_hash_entry *nhe); +enum zebra_dplane_result dplane_nexthop_update(struct nhg_hash_entry *nhe); +enum zebra_dplane_result dplane_nexthop_delete(struct nhg_hash_entry *nhe); + +/* + * Enqueue LSP change operations for the dataplane. + */ +enum zebra_dplane_result dplane_lsp_add(struct zebra_lsp *lsp); +enum zebra_dplane_result dplane_lsp_update(struct zebra_lsp *lsp); +enum zebra_dplane_result dplane_lsp_delete(struct zebra_lsp *lsp); + +/* Update or un-install resulting from an async notification */ +enum zebra_dplane_result dplane_lsp_notif_update(struct zebra_lsp *lsp, + enum dplane_op_e op, + struct zebra_dplane_ctx *ctx); + +/* + * Enqueue pseudowire operations for the dataplane. + */ +enum zebra_dplane_result dplane_pw_install(struct zebra_pw *pw); +enum zebra_dplane_result dplane_pw_uninstall(struct zebra_pw *pw); + +enum zebra_dplane_result +dplane_intf_mpls_modify_state(const struct interface *ifp, const bool set); +/* + * Enqueue interface address changes for the dataplane. + */ +enum zebra_dplane_result dplane_intf_addr_set(const struct interface *ifp, + const struct connected *ifc); +enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp, + const struct connected *ifc); + +/* + * Enqueue interface link changes for the dataplane. + */ +enum zebra_dplane_result dplane_intf_add(const struct interface *ifp); +enum zebra_dplane_result dplane_intf_update(const struct interface *ifp); +enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp); + +/* + * Enqueue interface link changes for the dataplane. + */ +enum zebra_dplane_result dplane_tc_add(void); +enum zebra_dplane_result dplane_tc_update(void); +enum zebra_dplane_result dplane_tc_delete(void); + +/* + * Link layer operations for the dataplane. + */ +enum zebra_dplane_result dplane_neigh_ip_update(enum dplane_op_e op, + const struct interface *ifp, + struct ipaddr *link_ip, + struct ipaddr *ip, + uint32_t ndm_state, + int protocol); + +/* + * Enqueue evpn mac operations for the dataplane. + */ +enum zebra_dplane_result dplane_rem_mac_add(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky, + uint32_t nhg_id, + bool was_static); + +enum zebra_dplane_result dplane_local_mac_add(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + bool sticky, + uint32_t set_static, + uint32_t set_inactive); + +enum zebra_dplane_result +dplane_local_mac_del(const struct interface *ifp, + const struct interface *bridge_ifp, vlanid_t vid, + const struct ethaddr *mac); + +enum zebra_dplane_result dplane_rem_mac_del(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip); + +/* Helper api to init an empty or new context for a MAC update */ +void dplane_mac_init(struct zebra_dplane_ctx *ctx, + const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky, + uint32_t nhg_id, uint32_t update_flags); + +/* + * Enqueue evpn neighbor updates for the dataplane. + */ +enum zebra_dplane_result dplane_rem_neigh_add(const struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *mac, + uint32_t flags, bool was_static); +enum zebra_dplane_result dplane_local_neigh_add(const struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *mac, + bool set_router, bool set_static, + bool set_inactive); +enum zebra_dplane_result dplane_rem_neigh_delete(const struct interface *ifp, + const struct ipaddr *ip); + +/* + * Enqueue evpn VTEP updates for the dataplane. + */ +enum zebra_dplane_result dplane_vtep_add(const struct interface *ifp, + const struct in_addr *ip, + vni_t vni); +enum zebra_dplane_result dplane_vtep_delete(const struct interface *ifp, + const struct in_addr *ip, + vni_t vni); + +/* + * Enqueue a neighbour discovery request for the dataplane. + */ +enum zebra_dplane_result dplane_neigh_discover(const struct interface *ifp, + const struct ipaddr *ip); + +/* + * Enqueue a neighbor table parameter set + */ +enum zebra_dplane_result dplane_neigh_table_update(const struct interface *ifp, + const uint8_t family, + const uint32_t app_probes, + const uint32_t ucast_probes, + const uint32_t mcast_probes); + +/* + * Enqueue a GRE set + */ +enum zebra_dplane_result +dplane_gre_set(struct interface *ifp, struct interface *ifp_link, + unsigned int mtu, const struct zebra_l2info_gre *gre_info); + +/* Forward ref of zebra_pbr_rule */ +struct zebra_pbr_rule; + +/* + * Enqueue policy based routing rule for the dataplane. + * It is possible that the user-defined sequence number and the one in the + * forwarding plane may not coincide, hence the API requires a separate + * rule priority - maps to preference/FRA_PRIORITY on Linux. + */ +enum zebra_dplane_result dplane_pbr_rule_add(struct zebra_pbr_rule *rule); +enum zebra_dplane_result dplane_pbr_rule_delete(struct zebra_pbr_rule *rule); +enum zebra_dplane_result +dplane_pbr_rule_update(struct zebra_pbr_rule *old_rule, + struct zebra_pbr_rule *new_rule); +/* iptable */ +enum zebra_dplane_result +dplane_pbr_iptable_add(struct zebra_pbr_iptable *iptable); +enum zebra_dplane_result +dplane_pbr_iptable_delete(struct zebra_pbr_iptable *iptable); + +/* ipset */ +struct zebra_pbr_ipset; +enum zebra_dplane_result dplane_pbr_ipset_add(struct zebra_pbr_ipset *ipset); +enum zebra_dplane_result dplane_pbr_ipset_delete(struct zebra_pbr_ipset *ipset); + +/* ipset entry */ +struct zebra_pbr_ipset_entry; +enum zebra_dplane_result +dplane_pbr_ipset_entry_add(struct zebra_pbr_ipset_entry *ipset); +enum zebra_dplane_result +dplane_pbr_ipset_entry_delete(struct zebra_pbr_ipset_entry *ipset); + +/* Encode route information into data plane context. */ +int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct route_node *rn, struct route_entry *re); + +/* Encode next hop information into data plane context. */ +int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct nhg_hash_entry *nhe); + +/* Encode interface information into data plane context. */ +int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + const struct interface *ifp); + +/* Encode traffic control information into data plane context. */ +int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op); + +/* Retrieve the limit on the number of pending, unprocessed updates. */ +uint32_t dplane_get_in_queue_limit(void); + +/* Configure limit on the number of pending, queued updates. If 'unset', reset + * to default value. + */ +void dplane_set_in_queue_limit(uint32_t limit, bool set); + +/* Retrieve the current queue depth of incoming, unprocessed updates */ +uint32_t dplane_get_in_queue_len(void); + +/* + * Vty/cli apis + */ +int dplane_show_helper(struct vty *vty, bool detailed); +int dplane_show_provs_helper(struct vty *vty, bool detailed); +int dplane_config_write_helper(struct vty *vty); + +/* + * Dataplane providers: modules that process or consume dataplane events. + */ + +struct zebra_dplane_provider; + +/* Support string name for a dataplane provider */ +#define DPLANE_PROVIDER_NAMELEN 64 + +/* Priority or ordering values for providers. The idea is that there may be + * some pre-processing, followed by an external or remote dataplane, + * followed by the kernel, followed by some post-processing step (such as + * the fpm output stream.) + */ +enum dplane_provider_prio { + DPLANE_PRIO_NONE = 0, + DPLANE_PRIO_PREPROCESS, + DPLANE_PRIO_PRE_KERNEL, + DPLANE_PRIO_KERNEL, + DPLANE_PRIO_POSTPROCESS, + DPLANE_PRIO_LAST +}; + +/* Flags values used during provider registration. */ +#define DPLANE_PROV_FLAGS_DEFAULT 0x0 + +/* Provider will be spawning its own worker thread */ +#define DPLANE_PROV_FLAG_THREADED 0x1 + +/* Provider registration: ordering or priority value, callbacks, and optional + * opaque data value. If 'prov_p', return the newly-allocated provider object + * on success. + */ + +/* Providers offer an entry-point for incoming work, called in the context of + * the dataplane pthread. The dataplane pthread enqueues any new work to the + * provider's 'inbound' queue, then calls the callback. The dataplane + * then checks the provider's outbound queue for completed work. + */ + +/* + * Providers can offer a 'start' callback; if present, the dataplane will + * call it when it is starting - when its pthread and event-scheduling + * thread_master are available. + */ + +/* Providers can offer an entry-point for shutdown and cleanup. This is called + * with 'early' during shutdown, to indicate that the dataplane subsystem + * is allowing work to move through the providers and finish. + * When called without 'early', the provider should release + * all resources (if it has any allocated). + */ +int dplane_provider_register(const char *name, + enum dplane_provider_prio prio, + int flags, + int (*start_fp)(struct zebra_dplane_provider *), + int (*fp)(struct zebra_dplane_provider *), + int (*fini_fp)(struct zebra_dplane_provider *, + bool early), + void *data, + struct zebra_dplane_provider **prov_p); + +/* Accessors for provider attributes */ +const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov); +uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov); +void *dplane_provider_get_data(const struct zebra_dplane_provider *prov); +bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov); + +/* Lock/unlock a provider's mutex - iff the provider was registered with + * the THREADED flag. + */ +void dplane_provider_lock(struct zebra_dplane_provider *prov); +void dplane_provider_unlock(struct zebra_dplane_provider *prov); + +/* Obtain thread_master for dataplane thread */ +struct thread_master *dplane_get_thread_master(void); + +/* Providers should (generally) limit number of updates per work cycle */ +int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov); + +/* Provider api to signal that work/events are available + * for the dataplane pthread. + */ +int dplane_provider_work_ready(void); + +/* Dequeue, maintain associated counter and locking */ +struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( + struct zebra_dplane_provider *prov); + +/* Dequeue work to a list, maintain counter and locking, return count */ +int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, + struct dplane_ctx_q *listp); + +/* Current completed work queue length */ +uint32_t dplane_provider_out_ctx_queue_len(struct zebra_dplane_provider *prov); + +/* Enqueue completed work, maintain associated counter and locking */ +void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx); + +/* Enqueue a context directly to zebra main. */ +void dplane_provider_enqueue_to_zebra(struct zebra_dplane_ctx *ctx); + +/* Enable collection of extra info about interfaces in route updates; + * this allows a provider/plugin to see some extra info in route update + * context objects. + */ +void dplane_enable_intf_extra_info(void); + +/* + * Initialize the dataplane modules at zebra startup. This is currently called + * by the rib module. Zebra registers a results callback with the dataplane. + * The callback is called in the dataplane pthread context, + * so the expectation is that the contexts are queued for the zebra + * main pthread. + */ +void zebra_dplane_init(int (*) (struct dplane_ctx_q *)); + +/* + * Start the dataplane pthread. This step needs to be run later than the + * 'init' step, in case zebra has fork-ed. + */ +void zebra_dplane_start(void); + +/* Finalize/cleanup apis, one called early as shutdown is starting, + * one called late at the end of zebra shutdown, and then one called + * from the zebra main pthread to stop the dplane pthread and + * free all resources. + * + * Zebra expects to try to clean up all vrfs and all routes during + * shutdown, so the dplane must be available until very late. + */ +void zebra_dplane_pre_finish(void); +void zebra_dplane_finish(void); +void zebra_dplane_shutdown(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_DPLANE_H */ diff --git a/zebra/zebra_errors.c b/zebra/zebra_errors.c new file mode 100644 index 0000000..7549a3d --- /dev/null +++ b/zebra/zebra_errors.c @@ -0,0 +1,813 @@ +/* + * Zebra-specific error messages. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Quentin Young + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "lib/ferr.h" +#include "zebra_errors.h" + +/* clang-format off */ +static struct log_ref ferr_zebra_err[] = { + { + .code = EC_ZEBRA_LM_RESPONSE, + .title = "Error reading response from label manager", + .description = "Zebra could not read the ZAPI header from the label manager", + .suggestion = "Wait for the error to resolve on its own. If it does not resolve, restart Zebra.", + }, + { + .code = EC_ZEBRA_LM_NO_SUCH_CLIENT, + .title = "Label manager could not find ZAPI client", + .description = "Zebra was unable to find a ZAPI client matching the given protocol and instance number.", + .suggestion = "Ensure clients which use the label manager are properly configured and running.", + }, + { + .code = EC_ZEBRA_LM_RELAY_FAILED, + .title = "Zebra could not relay label manager response", + .description = "Zebra found the client and instance to relay the label manager response or request to, but was not able to do so, possibly because the connection was closed.", + .suggestion = "Ensure clients which use the label manager are properly configured and running.", + }, + { + .code = EC_ZEBRA_LM_BAD_INSTANCE, + .title = "Mismatch between ZAPI instance and encoded message instance", + .description = "While relaying a request to the external label manager, Zebra noticed that the instance number encoded in the message did not match the client instance number.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_LM_EXHAUSTED_LABELS, + .title = "Zebra label manager used all available labels", + .description = "Zebra is unable to assign additional label chunks because it has exhausted its assigned label range.", + .suggestion = "Make the label range bigger and restart Zebra.", + }, + { + .code = EC_ZEBRA_LM_DAEMON_MISMATCH, + .title = "Daemon mismatch when releasing label chunks", + .description = "Zebra noticed a mismatch between a label chunk and a protocol daemon number or instance when releasing unused label chunks.", + .suggestion = "Ignore this error.", + }, + { + .code = EC_ZEBRA_LM_UNRELEASED_CHUNK, + .title = "Zebra did not free any label chunks", + .description = "Zebra's chunk cleanup procedure ran, but no label chunks were released.", + .suggestion = "Ignore this error.", + }, + { + .code = EC_ZEBRA_DP_INVALID_RC, + .title = "Dataplane returned invalid status code", + .description = "The underlying dataplane responded to a Zebra message or other interaction with an unrecognized, unknown or invalid status code.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_WQ_NONEXISTENT, + .title = "A necessary work queue does not exist.", + .description = "A necessary work queue does not exist.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_FEC_ADD_FAILED, + .title = "Failed to add FEC for MPLS client", + .description = "A client requested a label binding for a new FEC, but Zebra was unable to add the FEC to its internal table.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_FEC_LABEL_INDEX_LABEL_CONFLICT, + .title = "Refused to add FEC for MPLS client with both label index and label specified", + .description = "A client requested a label binding for a new FEC specifying a label index and a label at the same time.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_FEC_RM_FAILED, + .title = "Failed to remove FEC for MPLS client", + .description = "Zebra was unable to find and remove a FEC in its internal table.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_IRDP_LEN_MISMATCH, + .title = "IRDP message length mismatch", + .description = "The length encoded in the IP TLV does not match the length of the packet received.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_RNH_UNKNOWN_FAMILY, + .title = "Attempted to perform nexthop update for unknown address family", + .description = "Zebra attempted to perform a nexthop update for unknown address family", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_DP_INSTALL_FAIL, + .title = "Dataplane installation failure", + .description = "Installation of routes to underlying dataplane failed.", + .suggestion = "Check all configuration parameters for correctness.", + }, + { + .code = EC_ZEBRA_DP_DELETE_FAIL, + .title = "Dataplane deletion failure", + .description = "Deletion of routes from underlying dataplane failed.", + .suggestion = "Check all configuration parameters for correctness.", + }, + { + .code = EC_ZEBRA_TABLE_LOOKUP_FAILED, + .title = "Zebra table lookup failed", + .description = "Zebra attempted to look up a table for a particular address family and subsequent address family, but didn't find anything.", + .suggestion = "If you entered a command to trigger this error, make sure you entered the arguments correctly. Check your config file for any potential errors. If these look correct, seek help.", + }, + { + .code = EC_ZEBRA_NETLINK_NOT_AVAILABLE, + .title = "Netlink backend not available", + .description = "FRR was not compiled with support for Netlink. Any operations that require Netlink will fail.", + .suggestion = "Recompile FRR with Netlink, or install a package that supports this feature.", + }, + { + .code = EC_ZEBRA_PROTOBUF_NOT_AVAILABLE, + .title = "Protocol Buffers backend not available", + .description = "FRR was not compiled with support for Protocol Buffers. Any operations that require Protobuf will fail.", + .suggestion = "Recompile FRR with Protobuf support, or install a package that supports this feature.", + }, + { + .code = EC_ZEBRA_TM_EXHAUSTED_IDS, + .title = "Table manager used all available IDs", + .description = "Zebra's table manager used up all IDs available to it and can't assign any more.", + .suggestion = "Reconfigure Zebra with a larger range of table IDs.", + }, + { + .code = EC_ZEBRA_TM_DAEMON_MISMATCH, + .title = "Daemon mismatch when releasing table chunks", + .description = "Zebra noticed a mismatch between a table ID chunk and a protocol daemon number instance when releasing unused table chunks.", + .suggestion = "Ignore this error.", + }, + { + .code = EC_ZEBRA_TM_UNRELEASED_CHUNK, + .title = "Zebra did not free any table chunks", + .description = "Zebra's table chunk cleanup procedure ran, but no table chunks were released.", + .suggestion = "Ignore this error.", + }, + { + .code = EC_ZEBRA_UNKNOWN_FAMILY, + .title = "Address family specifier unrecognized", + .description = "Zebra attempted to process information from somewhere that included an address family specifier, but did not recognize the provided specifier.", + .suggestion = "Ensure that your configuration is correct. If it is, notify a developer.", + }, + { + .code = EC_ZEBRA_TM_WRONG_PROTO, + .title = "Incorrect protocol for table manager client", + .description = "Zebra's table manager only accepts connections from daemons managing dynamic routing protocols, but received a connection attempt from a daemon that does not meet this criterion.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_PROTO_OR_INSTANCE_MISMATCH, + .title = "Mismatch between message and client protocol and/or instance", + .description = "Zebra detected a mismatch between a client's protocol and/or instance numbers versus those stored in a message transiting its socket.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_LM_CANNOT_ASSIGN_CHUNK, + .title = "Label manager unable to assign label chunk", + .description = "Zebra's label manager was unable to assign a label chunk to client.", + .suggestion = "Ensure that Zebra has a sufficient label range available and that there is not a range collision.", + }, + { + .code = EC_ZEBRA_LM_ALIENS, + .title = "Label request from unidentified client", + .description = "Zebra's label manager received a label request from an unidentified client.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_TM_CANNOT_ASSIGN_CHUNK, + .title = "Table manager unable to assign table chunk", + .description = "Zebra's table manager was unable to assign a table chunk to a client.", + .suggestion = "Ensure that Zebra has sufficient table ID range available and that there is not a range collision.", + }, + { + .code = EC_ZEBRA_TM_ALIENS, + .title = "Table request from unidentified client", + .description = "Zebra's table manager received a table request from an unidentified client.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_RECVBUF, + .title = "Cannot set receive buffer size", + .description = "Socket receive buffer size could not be set in the kernel", + .suggestion = "Ignore this error.", + }, + { + .code = EC_ZEBRA_UNKNOWN_NLMSG, + .title = "Unknown Netlink message type", + .description = "Zebra received a Netlink message with an unrecognized type field.", + .suggestion = "Verify that you are running the latest version of FRR to ensure kernel compatibility. If the problem persists, notify a developer.", + }, + { + .code = EC_ZEBRA_RECVMSG_OVERRUN, + .title = "Receive buffer overrun", + .description = "The kernel's buffer for a socket has been overrun, rendering the socket invalid.", + .suggestion = "Zebra will restart itself. Notify a developer if this issue shows up frequently.", + }, + { + .code = EC_ZEBRA_NETLINK_LENGTH_ERROR, + .title = "Netlink message length mismatch", + .description = "Zebra received a Netlink message with incorrect length fields.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_NETLINK_LENGTH_ERROR, + .title = "Netlink message length mismatch", + .description = "Zebra received a Netlink message with incorrect length fields.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_UNEXPECTED_MESSAGE, + .title = "Received unexpected response from kernel", + .description = "Received unexpected response from the kernel via Netlink.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_NETLINK_BAD_SEQUENCE, + .title = "Bad sequence number in Netlink message", + .description = "Zebra received a Netlink message with a bad sequence number.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_BAD_MULTIPATH_NUM, + .title = "Multipath number was out of valid range", + .description = "Multipath number specified to Zebra must be in the appropriate range", + .suggestion = "Provide a multipath number that is within its accepted range", + }, + { + .code = EC_ZEBRA_PREFIX_PARSE_ERROR, + .title = "String could not be parsed as IP prefix", + .description = "There was an attempt to parse a string as an IPv4 or IPv6 prefix, but the string could not be parsed and this operation failed.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_MAC_ADD_FAILED, + .title = "Failed to add MAC address to interface", + .description = "Zebra attempted to assign a MAC address to a vxlan interface but failed", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_VNI_DEL_FAILED, + .title = "Failed to delete VNI", + .description = "Zebra attempted to delete a VNI entry and failed", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_VTEP_ADD_FAILED, + .title = "Adding remote VTEP failed", + .description = "Zebra attempted to add a remote VTEP and failed.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_VNI_ADD_FAILED, + .title = "Adding VNI failed", + .description = "Zebra attempted to add a VNI hash to an interface and failed", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_NS_NOTIFY_READ, + .title = "Zebra failed to read namespace inotify information", + .description = "Zebra received an event from inotify, but failed to read what it was.", + .suggestion = "Notify a developer.", + }, + { + .code = EC_ZEBRA_NHG_TABLE_INSERT_FAILED, + .title = + "Nexthop Group Hash Table Insert Failure", + .description = + "Zebra failed in inserting a Nexthop Group into its hash tables.", + .suggestion = + "Check to see if the entry already exists or if the netlink message was parsed incorrectly." + }, + { + .code = EC_ZEBRA_NHG_SYNC, + .title = + "Zebra's Nexthop Groups are out of sync", + .description = + "Zebra's nexthop group tables are out of sync with the nexthop groups in the fib.", + .suggestion = + "Check the current status of the kernels nexthop groups and compare it to Zebra's." + }, + { + .code = EC_ZEBRA_NHG_FIB_UPDATE, + .title = + "Zebra failed updating the fib with Nexthop Group", + .description = + "Zebra was not able to successfully install a new nexthop group into the fib", + .suggestion = + "Check to see if the nexthop group on the route you tried to install is valid." + }, + { + .code = EC_ZEBRA_NS_NO_DEFAULT, + .title = "Zebra NameSpace failed to find Default", + .description = "Zebra NameSpace subsystem failed to find a Default namespace during initialization.", + .suggestion = "Open an Issue with all relevant log files and restart FRR", + }, + /* Warnings */ + { + .code = EC_ZEBRAING_LM_PROTO_MISMATCH, + .title = + "Zebra label manager received malformed label request", + .description = + "Zebra's label manager received a label request from a client whose protocol type does not match the protocol field received in the message.", + .suggestion = + "This is a bug. Please report it.", + }, + { + .code = EC_ZEBRA_LSP_INSTALL_FAILURE, + .title = + "Zebra failed to install LSP into the kernel", + .description = + "Zebra made an attempt to install a label switched path, but the kernel indicated that the installation was not successful.", + .suggestion = + "Wait for Zebra to reattempt installation.", + }, + { + .code = EC_ZEBRA_LSP_DELETE_FAILURE, + .title = + "Zebra failed to remove LSP from the kernel", + .description = + "Zebra made an attempt to remove a label switched path, but the kernel indicated that the deletion was not successful.", + .suggestion = + "Wait for Zebra to reattempt deletion.", + }, + { + .code = EC_ZEBRA_MPLS_SUPPORT_DISABLED, + .title = + "Zebra will not run with MPLS support", + .description = + "Zebra noticed that the running kernel does not support MPLS, so it disabled MPLS support.", + .suggestion = + "If you want MPLS support, upgrade the kernel to a version that provides MPLS support.", + }, + { + .code = EC_ZEBRA_SYSCTL_FAILED, + .title = "A call to sysctl() failed", + .description = + "sysctl() returned a nonzero exit code, indicating an error.", + .suggestion = + "The log message should contain further details on the specific error that occurred; investigate the reported error.", + }, + { + .code = EC_ZEBRA_NS_VRF_CREATION_FAILED, + .title = + "Zebra failed to create namespace VRF", + .description = + "Zebra failed to create namespace VRF", + .suggestion = "", + }, + { + .code = EC_ZEBRA_NS_DELETION_FAILED_NO_VRF, + .title = + "Zebra attempted to delete nonexistent namespace", + .description = + "Zebra attempted to delete a particular namespace, but no VRF associated with that namespace could be found to delete.", + .suggestion = "Please report this bug.", + }, + { + .code = EC_ZEBRA_IFLIST_FAILED, + .title = + "Zebra interface listing failed", + .description = + "Zebra encountered an error attempting to query sysctl for a list of interfaces on the system.", + .suggestion = + "Check that Zebra is running with the appropriate permissions. If it is, please report this as a bug.", + }, + { + .code = EC_ZEBRA_IRDP_BAD_CHECKSUM, + .title = + "Zebra received ICMP packet with invalid checksum", + .description = + "Zebra received an ICMP packet with a bad checksum and has silently ignored it.", + .suggestion = + "If the problem continues to occur, investigate the source of the bad ICMP packets.", + }, + { + .code = EC_ZEBRA_IRDP_BAD_TYPE_CODE, + .title = + "Zebra received ICMP packet with bad type code", + .description = + "Zebra received an ICMP packet with a bad code for the message type and has silently ignored it.", + .suggestion = + "If the problem continues to occur, investigate the source of the bad ICMP packets.", + }, + { + .code = EC_ZEBRA_IRDP_BAD_RX_FLAGS, + .title = + "Zebra received IRDP packet while operating in wrong mode", + .description = + "Zebra received a multicast IRDP packet while operating in unicast mode, or vice versa.", + .suggestion = + "If you wish to receive the messages, change your IRDP settings accordingly.", + }, + { + .code = EC_ZEBRA_RNH_NO_TABLE, + .title = + "Zebra could not find table for next hop", + .description = + "Zebra attempted to add a next hop but could not find the appropriate table to install it in.", + .suggestion = "Please report this bug.", + }, + { + .code = EC_ZEBRA_FPM_FORMAT_UNKNOWN, + .title = + "Unknown message format for Zebra's FPM module", + .description = + "Zebra's FPM module takes an argument which specifies the message format to use, but the format was either not provided or was not a valid format. The FPM interface will be disabled.", + .suggestion = + "Provide or correct the module argument to provide a valid format. See documentation for further information.", + }, + { + .code = EC_ZEBRA_CLIENT_IO_ERROR, + .title = + "Zebra client connection failed", + .description = + "A Zebra client encountered an I/O error and is shutting down. This can occur under normal circumstances, such as when FRR is restarting or shutting down; it can also happen if the daemon crashed. Usually this warning can be ignored.", + .suggestion = + "Ignore this warning, it is mostly informational.", + }, + { + .code = EC_ZEBRA_CLIENT_WRITE_FAILED, + .title = + "Zebra failed to send message to client", + .description = + "Zebra attempted to send a message to one of its clients, but the write operation failed. The connection will be closed.", + .suggestion = + "Ignore this warning, it is mostly informational.", + }, + { + .code = EC_ZEBRA_NETLINK_INVALID_AF, + .title = + "Zebra received Netlink message with invalid family", + .description = + "Zebra received a Netlink message with an invalid address family.", + .suggestion = + "Inspect the logged address family and submit it with a bug report.", + }, + { + .code = EC_ZEBRA_REMOVE_ADDR_UNKNOWN_SUBNET, + .title = + "Zebra tried to remove address from unknown subnet", + .description = + "Zebra attempted to remove an address from an unknown subnet.", + .suggestion = + "This is a bug, please report it.", + }, + { + .code = EC_ZEBRA_REMOVE_UNREGISTERED_ADDR, + .title = + "Zebra tried to remove unregistered address", + .description = + "Zebra attempted to remove an address from a subnet it was not registered on.", + .suggestion = + "This is a bug, please report it.", + }, + { + .code = EC_ZEBRA_PTM_NOT_READY, + .title = + "Interface is up but PTM check has not completed", + .description = + "Zebra noticed that an interface came up and attempted to perform its usual setup procedures, but the PTM check failed and the operation was aborted.", + .suggestion = + "If the problem persists, ensure that the interface is actually up and that PTM is functioning properly.", + }, + { + .code = EC_ZEBRA_UNSUPPORTED_V4_SRCDEST, + .title = + "Kernel rejected sourcedest route", + .description = + "Zebra attempted to install a sourcedest route into the kernel, but the kernel did not acknowledge its installation. The route is unsupported.", + .suggestion = + "Check configuration values for correctness", + }, + { + .code = EC_ZEBRA_UNKNOWN_INTERFACE, + .title = + "Zebra encountered an unknown interface specifier", + .description = + "Zebra was asked to look up an interface with a given name or index, but could not find the interface corresponding to the given name or index.", + .suggestion = + "Check configuration values for correctness.", + }, + { + .code = EC_ZEBRA_VRF_NOT_FOUND, + .title = + "Zebra could not find the specified VRF", + .description = + "Zebra tried to look up a VRF, either by name or ID, and could not find it. This could be due to internal inconsistency (a bug) or a configuration error.", + .suggestion = + "Check configuration values for correctness. If values are correct, please file a bug report.", + }, + { + .code = EC_ZEBRA_MORE_NH_THAN_MULTIPATH, + .title = + "More nexthops were provided than the configured multipath limit", + .description = + "A route with multiple nexthops was given, but the number of nexthops exceeded the configured multipath limit.", + .suggestion = + "Reduce the number of nexthops, or increase the multipath limit.", + }, + { + .code = EC_ZEBRA_NEXTHOP_CREATION_FAILED, + .title = + "Zebra failed to create one or more nexthops", + .description = + "While attempting to create nexthops for a route installation operation, Zebra found that it was unable to create one or more of the given nexthops.", + .suggestion = + "Check configuration values for correctness. If they are correct, report this as a bug.", + }, + { + .code = EC_ZEBRA_RX_ROUTE_NO_NEXTHOPS, + .title = + "Zebra received an installation request for a route without nexthops", + .description = + "Zebra received a message from a client requesting a route installation, but the route is invalid since it doesn't have any nexthop address or interface.", + .suggestion = + "This is a bug; please report it.", + }, + { + .code = EC_ZEBRA_RX_SRCDEST_WRONG_AFI, + .title = + "Zebra received sourcedest route install without IPv6 address family", + .description = + "Zebra received a message from a client requesting a sourcedest route installation, but the address family was not set to IPv6. Only IPv6 is supported for sourcedest routing.", + .suggestion = + "This is a bug; please report it.", + }, + { + .code = EC_ZEBRA_PSEUDOWIRE_EXISTS, + .title = + "Zebra received an installation / creation request for a pseudowire that already exists", + .description = + "Zebra received an installation or creation request for a pseudowire that already exists, so the installation / creation has been skipped.", + .suggestion = + "This message is informational.", + }, + { + .code = EC_ZEBRA_PSEUDOWIRE_NONEXISTENT, + .title = + "Zebra received an uninstallation / deletion request for a pseudowire that already exists", + .description = + "Zebra received an uninstallation / deletion request for a pseudowire that doesn't exist, so the uninstallation / deletion has been skipped.", + .suggestion = + "This message is informational.", + }, + { + .code = EC_ZEBRA_PSEUDOWIRE_UNINSTALL_NOT_FOUND, + .title = + "Zebra received uninstall request for a pseudowire that doesn't exist", + .description = + "Zebra received an uninstall request for a pseudowire that doesn't exist, so the uninstallation has been skipped.", + .suggestion = + "This message is informational.", + }, + { + .code = EC_ZEBRA_NO_IFACE_ADDR, + .title = "No address on interface", + .description = + "Zebra attempted to retrieve a connected address for an interface, but the interface had no connected addresses.", + .suggestion = + "This warning is situational; it is usually informative but can indicate a misconfiguration.", + }, + { + .code = EC_ZEBRA_IFACE_ADDR_ADD_FAILED, + .title = + "Zebra failed to add address to interface", + .description = + "Zebra attempted to add an address to an interface but was unsuccessful.", + .suggestion = + "Check configuration values for correctness.", + }, + { + .code = EC_ZEBRA_IRDP_CANNOT_ACTIVATE_IFACE, + .title = + "Zebra could not enable IRDP on interface", + .description = + "Zebra attempted to enable IRDP on an interface, but could not create the IRDP socket. The system may be out of socket resources, or privilege elevation may have failed.", + .suggestion = + "Verify that Zebra has the appropriate privileges and that the system has sufficient socket resources.", + }, + { + .code = EC_ZEBRA_IRDP_IFACE_DOWN, + .title = + "Zebra attempted to enable IRDP on an interface, but the interface was down", + .description = "Zebra attempted to enable IRDP on an interface, but the interface was down.", + .suggestion = + "Bring up the interface that IRDP is desired on.", + }, + { + .code = EC_ZEBRA_IRDP_IFACE_MCAST_DISABLED, + .title = + "Zebra cannot enable IRDP on interface because multicast is disabled", + .description = + "Zebra attempted to enable IRDP on an interface, but multicast functionality was not enabled on the interface.", + .suggestion = + "Enable multicast on the interface.", + }, + { + .code = EC_ZEBRA_NETLINK_EXTENDED_WARNING, + .title = + "Zebra received warning message from Netlink", + .description = + "Zebra received a warning message from Netlink", + .suggestion = + "This message is informational. See the Netlink error message for details.", + }, + { + .code = EC_ZEBRA_NAMESPACE_DIR_INACCESSIBLE, + .title = + "Zebra could not access /var/run/netns", + .description = + "Zebra tried to verify that the run directory for Linux network namespaces existed, but this test failed.", + .suggestion = + "Ensure that Zebra has the proper privileges to access this directory.", + }, + { + .code = EC_ZEBRA_CONNECTED_AFI_UNKNOWN, + .title = + "Zebra received unknown address family on interface", + .description = + "Zebra received a notification of a connected prefix on an interface but did not recognize the address family as IPv4 or IPv6", + .suggestion = + "This message is informational.", + }, + { + .code = EC_ZEBRA_IFACE_SAME_LOCAL_AS_PEER, + .title = + "Zebra route has same destination address as local interface", + .description = + "Zebra noticed that a route on an interface has the same destination address as an address on the interface itself, which may cause issues with routing protocols.", + .suggestion = + "Investigate the source of the route to determine why the destination and interface addresses are the same.", + }, + { + .code = EC_ZEBRA_BCAST_ADDR_MISMATCH, + .title = + "Zebra broadcast address sanity check failed", + .description = + "Zebra computed the broadcast address for a connected prefix based on the netmask and found that it did not match the broadcast address it received for the prefix on that interface", + .suggestion = + "Investigate the source of the broadcast address to determine why it does not match the computed address.", + }, + { + .code = EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + .title = + "Zebra encountered unknown address family during redistribution", + .description = + "During a redistribution operation Zebra encountered an unknown address family.", + .suggestion = + "This warning can be ignored; the redistribution operation will skip the unknown address family.", + }, + { + .code = EC_ZEBRA_ADVERTISING_UNUSABLE_ADDR, + .title = + "Zebra advertising unusable interface address", + .description = + "Zebra is advertising an address on an interface that is not yet fully installed on the interface.", + .suggestion = + "This message is informational. The address should show up on the interface shortly after advertisement.", + }, + { + .code = EC_ZEBRA_RA_PARAM_MISMATCH, + .title = + "Zebra received route advertisement with parameter mismatch", + .description = + "Zebra received a router advertisement, but one of the non-critical parameters (AdvCurHopLimit, AdvManagedFlag, AdvOtherConfigFlag, AdvReachableTime or AdvRetransTimer) does not match Zebra's local settings.", + .suggestion = + "This message is informational; the route advertisement will be processed as normal. If issues arise due to the parameter mismatch, check Zebra's router advertisement configuration.", + }, + { + .code = EC_ZEBRA_RTM_VERSION_MISMATCH, + .title = + "Zebra received kernel message with unknown version", + .description = + "Zebra received a message from the kernel with a message version that does not match Zebra's internal version. Depending on version compatibility, this may cause issues sending and receiving messages to the kernel.", + .suggestion = + "If issues arise, check if there is a version of FRR available for your kernel version.", + }, + { + .code = EC_ZEBRA_RTM_NO_GATEWAY, + .title = + "Zebra could not determine proper gateway for kernel route", + .description = + "Zebra attempted to install a route into the kernel, but noticed it had no gateway and no interface with a gateway could be located.", + .suggestion = + "Check configuration values for correctness.", + }, + { + .code = EC_ZEBRA_MAX_LABELS_PUSH, + .title = + "Zebra exceeded maximum LSP labels for a single rtmsg", + .description = + "Zebra attempted to push more than one label into the kernel; the maximum on OpenBSD is 1 label.", + .suggestion = + "This message is informational.", + }, + { + .code = EC_ZEBRA_STICKY_MAC_ALREADY_LEARNT, + .title = + "EVPN MAC already learnt as remote sticky MAC", + .description = + "Zebra tried to handle a local MAC addition but noticed that it had already learnt the MAC from a remote peer.", + .suggestion = + "Check configuration values for correctness.", + }, + { + .code = EC_ZEBRA_UNSUPPORTED_V6_SRCDEST, + .title = + "Kernel does not support IPv6 sourcedest routes", + .description = + "Zebra attempted to install a sourcedest route into the kernel, but IPv6 sourcedest routes are not supported on the current kernel.", + .suggestion = + "Do not use v6 sourcedest routes, or upgrade your kernel.", + }, + { + .code = EC_ZEBRA_DUP_MAC_DETECTED, + .title = + "EVPN MAC is detected duplicate", + .description = + "Zebra has hit duplicate address detection threshold which means host MAC is moving.", + .suggestion = + "Check network topology to detect duplicate host MAC for correctness.", + }, + { + .code = EC_ZEBRA_DUP_IP_INHERIT_DETECTED, + .title = + "EVPN IP is detected duplicate by MAC", + .description = + "Zebra has hit duplicate address detection threshold which means MAC-IP pair is moving.", + .suggestion = + "Check network topology to detect duplicate host MAC for correctness.", + }, + { + .code = EC_ZEBRA_DUP_IP_DETECTED, + .title = + "EVPN IP is detected duplicate", + .description = + "Zebra has hit duplicate address detection threshold which means host IP is moving.", + .suggestion = + "Check network topology to detect duplicate host IP for correctness.", + }, + { + .code = EC_ZEBRA_BAD_NHG_MESSAGE, + .title = + "Bad Nexthop Group Message", + .description = + "Zebra received Nexthop Group message from the kernel that it cannot process.", + .suggestion = + "Check the kernel's link states and routing table to see how it matches ours." + }, + { + .code = EC_ZEBRA_DUPLICATE_NHG_MESSAGE, + .title = + "Duplicate Nexthop Group Message", + .description = + "Zebra received Nexthop Group message from the kernel that it is identical to one it/we already have but with a different ID.", + .suggestion = + "See if the nexthop you are trying to add is already present in the fib." + }, + { + .code = EC_ZEBRA_VRF_MISCONFIGURED, + .title = "Duplicate VRF table id detected", + .description = "Zebra has detected a situation where there are two vrf devices with the exact same tableid. This is considered a complete misconfiguration of VRF devices and breaks a fundamental assumption in FRR about how VRF's work", + .suggestion = "Use different table id's for the VRF's in question" + }, + { + .code = EC_ZEBRA_SRV6M_UNRELEASED_LOCATOR_CHUNK, + .title = "Zebra did not free any srv6 locator chunks", + .description = "Zebra's srv6-locator chunk cleanup procedure ran, but no srv6 locator chunks were released.", + .suggestion = "Ignore this error.", + }, + { + .code = EC_ZEBRA_INTF_UPDATE_FAILURE, + .title = + "Zebra failed to update interface in the kernel", + .description = + "Zebra made an attempt to update an interfce in the kernel, but it was not successful.", + .suggestion = + "Wait for Zebra to reattempt update.", + }, + { + .code = END_FERR, + } +}; +/* clang-format on */ + + +void zebra_error_init(void) +{ + log_ref_add(ferr_zebra_err); +} diff --git a/zebra/zebra_errors.h b/zebra/zebra_errors.h new file mode 100644 index 0000000..5164de0 --- /dev/null +++ b/zebra/zebra_errors.h @@ -0,0 +1,148 @@ +/* + * Zebra-specific error messages. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Quentin Young + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __EC_ZEBRAORS_H__ +#define __EC_ZEBRAORS_H__ + +#include "lib/ferr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum zebra_log_refs { + EC_ZEBRA_LM_RESPONSE = ZEBRA_FERR_START, + EC_ZEBRA_LM_NO_SUCH_CLIENT, + EC_ZEBRA_LM_RELAY_FAILED, + EC_ZEBRA_LM_NO_SOCKET, + EC_ZEBRA_LM_BAD_INSTANCE, + EC_ZEBRA_LM_RELAY_REQUEST_FAILED, + EC_ZEBRA_LM_CLIENT_CONNECTION_FAILED, + EC_ZEBRA_LM_EXHAUSTED_LABELS, + EC_ZEBRA_LM_DAEMON_MISMATCH, + EC_ZEBRA_LM_UNRELEASED_CHUNK, + EC_ZEBRA_DP_INVALID_RC, + EC_ZEBRA_WQ_NONEXISTENT, + EC_ZEBRA_FEC_ADD_FAILED, + EC_ZEBRA_FEC_LABEL_INDEX_LABEL_CONFLICT, + EC_ZEBRA_FEC_RM_FAILED, + EC_ZEBRA_IRDP_LEN_MISMATCH, + EC_ZEBRA_RNH_UNKNOWN_FAMILY, + EC_ZEBRA_DP_INSTALL_FAIL, + EC_ZEBRA_DP_DELETE_FAIL, + EC_ZEBRA_TABLE_LOOKUP_FAILED, + EC_ZEBRA_NETLINK_NOT_AVAILABLE, + EC_ZEBRA_PROTOBUF_NOT_AVAILABLE, + EC_ZEBRA_TM_EXHAUSTED_IDS, + EC_ZEBRA_TM_DAEMON_MISMATCH, + EC_ZEBRA_TM_UNRELEASED_CHUNK, + EC_ZEBRA_UNKNOWN_FAMILY, + EC_ZEBRA_TM_WRONG_PROTO, + EC_ZEBRA_PROTO_OR_INSTANCE_MISMATCH, + EC_ZEBRA_LM_CANNOT_ASSIGN_CHUNK, + EC_ZEBRA_LM_ALIENS, + EC_ZEBRA_TM_CANNOT_ASSIGN_CHUNK, + EC_ZEBRA_TM_ALIENS, + EC_ZEBRA_RECVBUF, + EC_ZEBRA_UNKNOWN_NLMSG, + EC_ZEBRA_RECVMSG_OVERRUN, + EC_ZEBRA_NETLINK_LENGTH_ERROR, + EC_ZEBRA_UNEXPECTED_MESSAGE, + EC_ZEBRA_NETLINK_BAD_SEQUENCE, + EC_ZEBRA_BAD_MULTIPATH_NUM, + EC_ZEBRA_PREFIX_PARSE_ERROR, + EC_ZEBRA_MAC_ADD_FAILED, + EC_ZEBRA_VNI_DEL_FAILED, + EC_ZEBRA_VTEP_ADD_FAILED, + EC_ZEBRA_VNI_ADD_FAILED, + EC_ZEBRA_NHG_TABLE_INSERT_FAILED, + EC_ZEBRA_NHG_SYNC, + EC_ZEBRA_NHG_FIB_UPDATE, + EC_ZEBRA_NS_NO_DEFAULT, + EC_ZEBRA_PBR_RULE_UPDATE, + /* warnings */ + EC_ZEBRA_NS_NOTIFY_READ, + EC_ZEBRAING_LM_PROTO_MISMATCH, + EC_ZEBRA_LSP_INSTALL_FAILURE, + EC_ZEBRA_LSP_DELETE_FAILURE, + EC_ZEBRA_MPLS_SUPPORT_DISABLED, + EC_ZEBRA_SYSCTL_FAILED, + EC_ZEBRA_CONVERT_TO_DEBUG, + EC_ZEBRA_NS_VRF_CREATION_FAILED, + EC_ZEBRA_NS_DELETION_FAILED_NO_VRF, + EC_ZEBRA_IRDP_BAD_CHECKSUM, + EC_ZEBRA_IRDP_BAD_TYPE_CODE, + EC_ZEBRA_IRDP_BAD_RX_FLAGS, + EC_ZEBRA_RNH_NO_TABLE, + EC_ZEBRA_IFLIST_FAILED, + EC_ZEBRA_FPM_FORMAT_UNKNOWN, + EC_ZEBRA_CLIENT_IO_ERROR, + EC_ZEBRA_CLIENT_WRITE_FAILED, + EC_ZEBRA_NETLINK_INVALID_AF, + EC_ZEBRA_REMOVE_ADDR_UNKNOWN_SUBNET, + EC_ZEBRA_REMOVE_UNREGISTERED_ADDR, + EC_ZEBRA_PTM_NOT_READY, + EC_ZEBRA_UNSUPPORTED_V4_SRCDEST, + EC_ZEBRA_UNKNOWN_INTERFACE, + EC_ZEBRA_VRF_NOT_FOUND, + EC_ZEBRA_MORE_NH_THAN_MULTIPATH, + EC_ZEBRA_NEXTHOP_CREATION_FAILED, + EC_ZEBRA_RX_ROUTE_NO_NEXTHOPS, + EC_ZEBRA_RX_SRCDEST_WRONG_AFI, + EC_ZEBRA_PSEUDOWIRE_EXISTS, + EC_ZEBRA_PSEUDOWIRE_UNINSTALL_NOT_FOUND, + EC_ZEBRA_PSEUDOWIRE_NONEXISTENT, + EC_ZEBRA_NO_IFACE_ADDR, + EC_ZEBRA_IFACE_ADDR_ADD_FAILED, + EC_ZEBRA_IRDP_CANNOT_ACTIVATE_IFACE, + EC_ZEBRA_IRDP_IFACE_DOWN, + EC_ZEBRA_IRDP_IFACE_MCAST_DISABLED, + EC_ZEBRA_NETLINK_EXTENDED_WARNING, + EC_ZEBRA_NAMESPACE_DIR_INACCESSIBLE, + EC_ZEBRA_CONNECTED_AFI_UNKNOWN, + EC_ZEBRA_IFACE_SAME_LOCAL_AS_PEER, + EC_ZEBRA_BCAST_ADDR_MISMATCH, + EC_ZEBRA_REDISTRIBUTE_UNKNOWN_AF, + EC_ZEBRA_ADVERTISING_UNUSABLE_ADDR, + EC_ZEBRA_RA_PARAM_MISMATCH, + EC_ZEBRA_RTM_VERSION_MISMATCH, + EC_ZEBRA_RTM_NO_GATEWAY, + EC_ZEBRA_MAX_LABELS_PUSH, + EC_ZEBRA_STICKY_MAC_ALREADY_LEARNT, + EC_ZEBRA_UNSUPPORTED_V6_SRCDEST, + EC_ZEBRA_DUP_MAC_DETECTED, + EC_ZEBRA_DUP_IP_INHERIT_DETECTED, + EC_ZEBRA_DUP_IP_DETECTED, + EC_ZEBRA_BAD_NHG_MESSAGE, + EC_ZEBRA_DUPLICATE_NHG_MESSAGE, + EC_ZEBRA_VRF_MISCONFIGURED, + EC_ZEBRA_ES_CREATE, + EC_ZEBRA_GRE_SET_UPDATE, + EC_ZEBRA_SRV6M_UNRELEASED_LOCATOR_CHUNK, + EC_ZEBRA_INTF_UPDATE_FAILURE, +}; + +void zebra_error_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __EC_ZEBRAORS_H__ */ diff --git a/zebra/zebra_evpn.c b/zebra/zebra_evpn.c new file mode 100644 index 0000000..168f0b2 --- /dev/null +++ b/zebra/zebra_evpn.c @@ -0,0 +1,1576 @@ +/* + * Zebra EVPN for VxLAN code + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include <zebra.h> + +#include "hash.h" +#include "if.h" +#include "jhash.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "stream.h" +#include "table.h" +#include "vlan.h" +#include "vxlan.h" +#ifdef GNU_LINUX +#include <linux/neighbour.h> +#endif + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_l2.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/zebra_evpn_neigh.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_evpn_vxlan.h" +#include "zebra/zebra_router.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZEVPN, "VNI hash"); +DEFINE_MTYPE_STATIC(ZEBRA, ZEVPN_VTEP, "VNI remote VTEP"); + +/* PMSI strings. */ +#define VXLAN_FLOOD_STR_NO_INFO "-" +#define VXLAN_FLOOD_STR_DEFAULT VXLAN_FLOOD_STR_NO_INFO +static const struct message zvtep_flood_str[] = { + {VXLAN_FLOOD_DISABLED, VXLAN_FLOOD_STR_NO_INFO}, + {VXLAN_FLOOD_PIM_SM, "PIM-SM"}, + {VXLAN_FLOOD_HEAD_END_REPL, "HER"}, + {0} +}; + +int advertise_gw_macip_enabled(struct zebra_evpn *zevpn) +{ + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + if (zvrf->advertise_gw_macip) + return 1; + + if (zevpn && zevpn->advertise_gw_macip) + return 1; + + return 0; +} + +int advertise_svi_macip_enabled(struct zebra_evpn *zevpn) +{ + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + if (zvrf->advertise_svi_macip) + return 1; + + if (zevpn && zevpn->advertise_svi_macip) + return 1; + + return 0; +} + +/* + * Print a specific EVPN entry. + */ +void zebra_evpn_print(struct zebra_evpn *zevpn, void **ctxt) +{ + struct vty *vty; + struct zebra_vtep *zvtep; + uint32_t num_macs; + uint32_t num_neigh; + json_object *json = NULL; + json_object *json_vtep_list = NULL; + json_object *json_ip_str = NULL; + char buf[PREFIX_STRLEN]; + + vty = ctxt[0]; + json = ctxt[1]; + + if (json == NULL) { + vty_out(vty, "VNI: %u\n", zevpn->vni); + vty_out(vty, " Type: %s\n", "L2"); + vty_out(vty, " Tenant VRF: %s\n", vrf_id_to_name(zevpn->vrf_id)); + } else { + json_object_int_add(json, "vni", zevpn->vni); + json_object_string_add(json, "type", "L2"); + json_object_string_add(json, "vrf", + vrf_id_to_name(zevpn->vrf_id)); + } + + if (!zevpn->vxlan_if) { // unexpected + if (json == NULL) + vty_out(vty, " VxLAN interface: unknown\n"); + return; + } + num_macs = num_valid_macs(zevpn); + num_neigh = hashcount(zevpn->neigh_table); + if (json == NULL) { + vty_out(vty, " VxLAN interface: %s\n", zevpn->vxlan_if->name); + vty_out(vty, " VxLAN ifIndex: %u\n", zevpn->vxlan_if->ifindex); + vty_out(vty, " SVI interface: %s\n", + (zevpn->svi_if ? zevpn->svi_if->name : "")); + vty_out(vty, " SVI ifIndex: %u\n", + (zevpn->svi_if ? zevpn->svi_if->ifindex : 0)); + vty_out(vty, " Local VTEP IP: %pI4\n", + &zevpn->local_vtep_ip); + vty_out(vty, " Mcast group: %pI4\n", + &zevpn->mcast_grp); + } else { + json_object_string_add(json, "vxlanInterface", + zevpn->vxlan_if->name); + json_object_int_add(json, "ifindex", zevpn->vxlan_if->ifindex); + if (zevpn->svi_if) { + json_object_string_add(json, "sviInterface", + zevpn->svi_if->name); + json_object_int_add(json, "sviIfindex", + zevpn->svi_if->ifindex); + } + json_object_string_addf(json, "vtepIp", "%pI4", + &zevpn->local_vtep_ip); + json_object_string_addf(json, "mcastGroup", "%pI4", + &zevpn->mcast_grp); + json_object_string_add(json, "advertiseGatewayMacip", + zevpn->advertise_gw_macip ? "Yes" : "No"); + json_object_string_add(json, "advertiseSviMacip", + zevpn->advertise_svi_macip ? "Yes" + : "No"); + json_object_int_add(json, "numMacs", num_macs); + json_object_int_add(json, "numArpNd", num_neigh); + } + if (!zevpn->vteps) { + if (json == NULL) + vty_out(vty, " No remote VTEPs known for this VNI\n"); + } else { + if (json == NULL) + vty_out(vty, " Remote VTEPs for this VNI:\n"); + else + json_vtep_list = json_object_new_array(); + for (zvtep = zevpn->vteps; zvtep; zvtep = zvtep->next) { + const char *flood_str = lookup_msg(zvtep_flood_str, + zvtep->flood_control, + VXLAN_FLOOD_STR_DEFAULT); + + if (json == NULL) { + vty_out(vty, " %pI4 flood: %s\n", + &zvtep->vtep_ip, + flood_str); + } else { + json_ip_str = json_object_new_string( + inet_ntop(AF_INET, + &zvtep->vtep_ip, buf, + sizeof(buf))); + json_object_array_add(json_vtep_list, + json_ip_str); + } + } + if (json) + json_object_object_add(json, "numRemoteVteps", + json_vtep_list); + } + if (json == NULL) { + vty_out(vty, + " Number of MACs (local and remote) known for this VNI: %u\n", + num_macs); + vty_out(vty, + " Number of ARPs (IPv4 and IPv6, local and remote) " + "known for this VNI: %u\n", + num_neigh); + vty_out(vty, " Advertise-gw-macip: %s\n", + zevpn->advertise_gw_macip ? "Yes" : "No"); + vty_out(vty, " Advertise-svi-macip: %s\n", + zevpn->advertise_svi_macip ? "Yes" : "No"); + } +} + +/* + * Print an EVPN hash entry - called for display of all VNIs. + */ +void zebra_evpn_print_hash(struct hash_bucket *bucket, void *ctxt[]) +{ + struct vty *vty; + struct zebra_evpn *zevpn; + struct zebra_vtep *zvtep; + uint32_t num_vteps = 0; + uint32_t num_macs = 0; + uint32_t num_neigh = 0; + json_object *json = NULL; + json_object *json_evpn = NULL; + json_object *json_ip_str = NULL; + json_object *json_vtep_list = NULL; + char buf[PREFIX_STRLEN]; + + vty = ctxt[0]; + json = ctxt[1]; + + zevpn = (struct zebra_evpn *)bucket->data; + + zvtep = zevpn->vteps; + while (zvtep) { + num_vteps++; + zvtep = zvtep->next; + } + + num_macs = num_valid_macs(zevpn); + num_neigh = hashcount(zevpn->neigh_table); + if (json == NULL) + vty_out(vty, "%-10u %-4s %-21s %-8u %-8u %-15u %-37s\n", + zevpn->vni, "L2", + zevpn->vxlan_if ? zevpn->vxlan_if->name : "unknown", + num_macs, num_neigh, num_vteps, + vrf_id_to_name(zevpn->vrf_id)); + else { + char vni_str[VNI_STR_LEN]; + snprintf(vni_str, VNI_STR_LEN, "%u", zevpn->vni); + json_evpn = json_object_new_object(); + json_object_int_add(json_evpn, "vni", zevpn->vni); + json_object_string_add(json_evpn, "type", "L2"); + json_object_string_add(json_evpn, "vxlanIf", + zevpn->vxlan_if ? zevpn->vxlan_if->name + : "unknown"); + json_object_int_add(json_evpn, "numMacs", num_macs); + json_object_int_add(json_evpn, "numArpNd", num_neigh); + json_object_int_add(json_evpn, "numRemoteVteps", num_vteps); + json_object_string_add(json_evpn, "tenantVrf", + vrf_id_to_name(zevpn->vrf_id)); + if (num_vteps) { + json_vtep_list = json_object_new_array(); + for (zvtep = zevpn->vteps; zvtep; zvtep = zvtep->next) { + json_ip_str = json_object_new_string( + inet_ntop(AF_INET, &zvtep->vtep_ip, buf, + sizeof(buf))); + json_object_array_add(json_vtep_list, + json_ip_str); + } + json_object_object_add(json_evpn, "remoteVteps", + json_vtep_list); + } + json_object_object_add(json, vni_str, json_evpn); + } +} + +/* + * Print an EVPN hash entry in detail - called for display of all EVPNs. + */ +void zebra_evpn_print_hash_detail(struct hash_bucket *bucket, void *data) +{ + struct vty *vty; + struct zebra_evpn *zevpn; + json_object *json_array = NULL; + bool use_json = false; + struct zebra_evpn_show *zes = data; + + vty = zes->vty; + json_array = zes->json; + use_json = zes->use_json; + + zevpn = (struct zebra_evpn *)bucket->data; + + zebra_vxlan_print_vni(vty, zes->zvrf, zevpn->vni, use_json, json_array); + + if (!use_json) + vty_out(vty, "\n"); +} + +int zebra_evpn_del_macip_for_intf(struct interface *ifp, + struct zebra_evpn *zevpn) +{ + struct listnode *cnode = NULL, *cnnode = NULL; + struct connected *c = NULL; + struct ethaddr macaddr; + + memcpy(&macaddr.octet, ifp->hw_addr, ETH_ALEN); + + for (ALL_LIST_ELEMENTS(ifp->connected, cnode, cnnode, c)) { + struct ipaddr ip; + + memset(&ip, 0, sizeof(struct ipaddr)); + if (!CHECK_FLAG(c->conf, ZEBRA_IFC_REAL)) + continue; + + if (c->address->family == AF_INET) { + ip.ipa_type = IPADDR_V4; + memcpy(&(ip.ipaddr_v4), &(c->address->u.prefix4), + sizeof(struct in_addr)); + } else if (c->address->family == AF_INET6) { + ip.ipa_type = IPADDR_V6; + memcpy(&(ip.ipaddr_v6), &(c->address->u.prefix6), + sizeof(struct in6_addr)); + } else { + continue; + } + + zebra_evpn_gw_macip_del(ifp, zevpn, &ip); + } + + return 0; +} + +int zebra_evpn_add_macip_for_intf(struct interface *ifp, + struct zebra_evpn *zevpn) +{ + struct listnode *cnode = NULL, *cnnode = NULL; + struct connected *c = NULL; + struct ethaddr macaddr; + + memcpy(&macaddr.octet, ifp->hw_addr, ETH_ALEN); + + for (ALL_LIST_ELEMENTS(ifp->connected, cnode, cnnode, c)) { + struct ipaddr ip; + + if (!CHECK_FLAG(c->conf, ZEBRA_IFC_REAL)) + continue; + + memset(&ip, 0, sizeof(struct ipaddr)); + if (c->address->family == AF_INET) { + ip.ipa_type = IPADDR_V4; + memcpy(&(ip.ipaddr_v4), &(c->address->u.prefix4), + sizeof(struct in_addr)); + } else if (c->address->family == AF_INET6) { + ip.ipa_type = IPADDR_V6; + memcpy(&(ip.ipaddr_v6), &(c->address->u.prefix6), + sizeof(struct in6_addr)); + } else { + continue; + } + + zebra_evpn_gw_macip_add(ifp, zevpn, &macaddr, &ip); + } + return 0; +} + +static int ip_prefix_send_to_client(vrf_id_t vrf_id, struct prefix *p, + uint16_t cmd) +{ + struct zserv *client = NULL; + struct stream *s = NULL; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, vrf_id); + stream_put(s, p, sizeof(struct prefix)); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Send ip prefix %pFX %s on vrf %s", p, + (cmd == ZEBRA_IP_PREFIX_ROUTE_ADD) ? "ADD" : "DEL", + vrf_id_to_name(vrf_id)); + + if (cmd == ZEBRA_IP_PREFIX_ROUTE_ADD) + client->prefixadd_cnt++; + else + client->prefixdel_cnt++; + + return zserv_send_message(client, s); +} + +int zebra_evpn_advertise_subnet(struct zebra_evpn *zevpn, struct interface *ifp, + int advertise) +{ + struct listnode *cnode = NULL, *cnnode = NULL; + struct connected *c = NULL; + struct ethaddr macaddr; + + memcpy(&macaddr.octet, ifp->hw_addr, ETH_ALEN); + + for (ALL_LIST_ELEMENTS(ifp->connected, cnode, cnnode, c)) { + struct prefix p; + + memcpy(&p, c->address, sizeof(struct prefix)); + + /* skip link local address */ + if (IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6)) + continue; + + apply_mask(&p); + if (advertise) + ip_prefix_send_to_client(ifp->vrf->vrf_id, &p, + ZEBRA_IP_PREFIX_ROUTE_ADD); + else + ip_prefix_send_to_client(ifp->vrf->vrf_id, &p, + ZEBRA_IP_PREFIX_ROUTE_DEL); + } + return 0; +} + +/* + * zebra_evpn_gw_macip_add_to_client + */ +int zebra_evpn_gw_macip_add(struct interface *ifp, struct zebra_evpn *zevpn, + struct ethaddr *macaddr, struct ipaddr *ip) +{ + struct zebra_mac *mac = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + + zif = zevpn->vxlan_if->info; + if (!zif) + return -1; + + vxl = &zif->l2info.vxl; + + zebra_evpn_mac_gw_macip_add(ifp, zevpn, ip, &mac, macaddr, + vxl->access_vlan, true); + + return zebra_evpn_neigh_gw_macip_add(ifp, zevpn, ip, mac); +} + +/* + * zebra_evpn_gw_macip_del_from_client + */ +int zebra_evpn_gw_macip_del(struct interface *ifp, struct zebra_evpn *zevpn, + struct ipaddr *ip) +{ + struct zebra_neigh *n = NULL; + struct zebra_mac *mac = NULL; + + /* If the neigh entry is not present nothing to do*/ + n = zebra_evpn_neigh_lookup(zevpn, ip); + if (!n) + return 0; + + /* mac entry should be present */ + mac = zebra_evpn_mac_lookup(zevpn, &n->emac); + if (!mac) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("MAC %pEA doesn't exist for neigh %pIA on VNI %u", + &n->emac, ip, zevpn->vni); + return -1; + } + + /* If the entry is not local nothing to do*/ + if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) + return -1; + + /* only need to delete the entry from bgp if we sent it before */ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%u:SVI %s(%u) VNI %u, sending GW MAC %pEA IP %pIA del to BGP", + ifp->vrf->vrf_id, ifp->name, ifp->ifindex, zevpn->vni, + &n->emac, ip); + + /* Remove neighbor from BGP. */ + zebra_evpn_neigh_send_del_to_client(zevpn->vni, &n->ip, &n->emac, + n->flags, ZEBRA_NEIGH_ACTIVE, + false /*force*/); + + /* Delete this neighbor entry. */ + zebra_evpn_neigh_del(zevpn, n); + + /* see if the mac needs to be deleted as well*/ + if (mac) + zebra_evpn_deref_ip2mac(zevpn, mac); + + return 0; +} + +void zebra_evpn_gw_macip_del_for_evpn_hash(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + struct interface *vlan_if = NULL; + struct interface *vrr_if = NULL; + struct interface *ifp; + + /* Add primary SVI MAC*/ + zevpn = (struct zebra_evpn *)bucket->data; + + /* Global (Zvrf) advertise-default-gw is disabled, + * but zevpn advertise-default-gw is enabled + */ + if (zevpn->advertise_gw_macip) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VNI: %u GW-MACIP enabled, retain gw-macip", + zevpn->vni); + return; + } + + ifp = zevpn->vxlan_if; + if (!ifp) + return; + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + zl2_info = zif->l2info.vxl; + + vlan_if = + zvni_map_to_svi(zl2_info.access_vlan, zif->brslave_info.br_if); + if (!vlan_if) + return; + + /* Del primary MAC-IP */ + zebra_evpn_del_macip_for_intf(vlan_if, zevpn); + + /* Del VRR MAC-IP - if any*/ + vrr_if = zebra_get_vrr_intf_for_svi(vlan_if); + if (vrr_if) + zebra_evpn_del_macip_for_intf(vrr_if, zevpn); + + return; +} + +void zebra_evpn_gw_macip_add_for_evpn_hash(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + struct interface *vlan_if = NULL; + struct interface *vrr_if = NULL; + struct interface *ifp = NULL; + + zevpn = (struct zebra_evpn *)bucket->data; + + ifp = zevpn->vxlan_if; + if (!ifp) + return; + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + zl2_info = zif->l2info.vxl; + + vlan_if = + zvni_map_to_svi(zl2_info.access_vlan, zif->brslave_info.br_if); + if (!vlan_if) + return; + + /* Add primary SVI MAC-IP */ + if (advertise_svi_macip_enabled(zevpn) + || advertise_gw_macip_enabled(zevpn)) + zebra_evpn_add_macip_for_intf(vlan_if, zevpn); + + if (advertise_gw_macip_enabled(zevpn)) { + /* Add VRR MAC-IP - if any*/ + vrr_if = zebra_get_vrr_intf_for_svi(vlan_if); + if (vrr_if) + zebra_evpn_add_macip_for_intf(vrr_if, zevpn); + } + + return; +} + +void zebra_evpn_svi_macip_del_for_evpn_hash(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + struct interface *vlan_if = NULL; + struct interface *ifp; + + /* Add primary SVI MAC*/ + zevpn = (struct zebra_evpn *)bucket->data; + if (!zevpn) + return; + + /* Global(vrf) advertise-svi-ip disabled, but zevpn advertise-svi-ip + * enabled + */ + if (zevpn->advertise_svi_macip) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VNI: %u SVI-MACIP enabled, retain svi-macip", + zevpn->vni); + return; + } + + ifp = zevpn->vxlan_if; + if (!ifp) + return; + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + zl2_info = zif->l2info.vxl; + + vlan_if = + zvni_map_to_svi(zl2_info.access_vlan, zif->brslave_info.br_if); + if (!vlan_if) + return; + + /* Del primary MAC-IP */ + zebra_evpn_del_macip_for_intf(vlan_if, zevpn); + + return; +} + +static int zebra_evpn_map_vlan_ns(struct ns *ns, + void *_in_param, + void **_p_zevpn) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct interface *br_if; + struct zebra_evpn **p_zevpn = (struct zebra_evpn **)_p_zevpn; + struct zebra_evpn *zevpn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_from_svi_param *in_param = + (struct zebra_from_svi_param *)_in_param; + + assert(p_zevpn && in_param); + + br_if = in_param->br_if; + zif = in_param->zif; + assert(zif); + assert(br_if); + + /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ + /* TODO: Optimize with a hash. */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + if (!tmp_if) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + if (!if_is_operative(tmp_if)) + continue; + vxl = &zif->l2info.vxl; + + if (zif->brslave_info.br_if != br_if) + continue; + + if (!in_param->bridge_vlan_aware + || vxl->access_vlan == in_param->vid) { + zevpn = zebra_evpn_lookup(vxl->vni); + *p_zevpn = zevpn; + return NS_WALK_STOP; + } + } + + return NS_WALK_CONTINUE; +} + +/* + * Map port or (port, VLAN) to an EVPN. This is invoked upon getting MAC + * notifications, to see if they are of interest. + */ +struct zebra_evpn *zebra_evpn_map_vlan(struct interface *ifp, + struct interface *br_if, vlanid_t vid) +{ + struct zebra_if *zif; + struct zebra_l2info_bridge *br; + struct zebra_evpn **p_zevpn; + struct zebra_evpn *zevpn = NULL; + struct zebra_from_svi_param in_param; + + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; + assert(zif); + br = &zif->l2info.br; + in_param.bridge_vlan_aware = br->vlan_aware; + in_param.vid = vid; + in_param.br_if = br_if; + in_param.zif = zif; + p_zevpn = &zevpn; + + ns_walk_func(zebra_evpn_map_vlan_ns, + (void *)&in_param, + (void **)p_zevpn); + return zevpn; +} + +static int zebra_evpn_from_svi_ns(struct ns *ns, + void *_in_param, + void **_p_zevpn) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct interface *br_if; + struct zebra_evpn **p_zevpn = (struct zebra_evpn **)_p_zevpn; + struct zebra_evpn *zevpn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_from_svi_param *in_param = + (struct zebra_from_svi_param *)_in_param; + int found = 0; + + if (!in_param) + return NS_WALK_STOP; + br_if = in_param->br_if; + zif = in_param->zif; + assert(zif); + + /* TODO: Optimize with a hash. */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + if (!tmp_if) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + if (!if_is_operative(tmp_if)) + continue; + vxl = &zif->l2info.vxl; + + if (zif->brslave_info.br_if != br_if) + continue; + + if (!in_param->bridge_vlan_aware + || vxl->access_vlan == in_param->vid) { + found = 1; + break; + } + } + + if (!found) + return NS_WALK_CONTINUE; + + zevpn = zebra_evpn_lookup(vxl->vni); + if (p_zevpn) + *p_zevpn = zevpn; + return NS_WALK_STOP; +} + +/* + * Map SVI and associated bridge to an EVPN. This is invoked upon getting + * neighbor notifications, to see if they are of interest. + */ +struct zebra_evpn *zebra_evpn_from_svi(struct interface *ifp, + struct interface *br_if) +{ + struct zebra_l2info_bridge *br; + struct zebra_evpn *zevpn = NULL; + struct zebra_evpn **p_zevpn; + struct zebra_if *zif; + struct zebra_from_svi_param in_param; + + if (!br_if) + return NULL; + + /* Make sure the linked interface is a bridge. */ + if (!IS_ZEBRA_IF_BRIDGE(br_if)) + return NULL; + + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; + assert(zif); + br = &zif->l2info.br; + in_param.bridge_vlan_aware = br->vlan_aware; + in_param.vid = 0; + + if (in_param.bridge_vlan_aware) { + struct zebra_l2info_vlan *vl; + + if (!IS_ZEBRA_IF_VLAN(ifp)) + return NULL; + + zif = ifp->info; + assert(zif); + vl = &zif->l2info.vl; + in_param.vid = vl->vid; + } + + in_param.br_if = br_if; + in_param.zif = zif; + p_zevpn = &zevpn; + /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ + ns_walk_func(zebra_evpn_from_svi_ns, (void *)&in_param, + (void **)p_zevpn); + return zevpn; +} + +static int zvni_map_to_macvlan_ns(struct ns *ns, + void *_in_param, + void **_p_ifp) +{ + struct zebra_ns *zns = ns->info; + struct zebra_from_svi_param *in_param = + (struct zebra_from_svi_param *)_in_param; + struct interface **p_ifp = (struct interface **)_p_ifp; + struct route_node *rn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + + assert(in_param && p_ifp); + + /* Identify corresponding VLAN interface. */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + /* Check oper status of the SVI. */ + if (!tmp_if || !if_is_operative(tmp_if)) + continue; + zif = tmp_if->info; + + if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN) + continue; + + if (zif->link == in_param->svi_if) { + *p_ifp = tmp_if; + return NS_WALK_STOP; + } + } + + return NS_WALK_CONTINUE; +} + +/* Map to MAC-VLAN interface corresponding to specified SVI interface. + */ +struct interface *zebra_evpn_map_to_macvlan(struct interface *br_if, + struct interface *svi_if) +{ + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct interface **p_ifp; + struct zebra_from_svi_param in_param; + + /* Defensive check, caller expected to invoke only with valid bridge. */ + if (!br_if) + return NULL; + + if (!svi_if) { + zlog_debug("svi_if is not passed."); + return NULL; + } + + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; + assert(zif); + + in_param.vid = 0; + in_param.br_if = br_if; + in_param.zif = NULL; + in_param.svi_if = svi_if; + p_ifp = &tmp_if; + + /* Identify corresponding VLAN interface. */ + ns_walk_func(zvni_map_to_macvlan_ns, + (void *)&in_param, + (void **)p_ifp); + return tmp_if; +} + +/* + * Install MAC hash entry - called upon access VLAN change. + */ +void zebra_evpn_install_mac_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_mac *mac; + struct mac_walk_ctx *wctx = ctxt; + + mac = (struct zebra_mac *)bucket->data; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) + zebra_evpn_rem_mac_install(wctx->zevpn, mac, false); +} + +/* + * Read and populate local MACs and neighbors corresponding to this EVPN. + */ +void zebra_evpn_read_mac_neigh(struct zebra_evpn *zevpn, struct interface *ifp) +{ + struct zebra_ns *zns; + struct zebra_vrf *zvrf; + struct zebra_if *zif; + struct interface *vlan_if; + struct zebra_l2info_vxlan *vxl; + struct interface *vrr_if; + + zif = ifp->info; + vxl = &zif->l2info.vxl; + zvrf = zebra_vrf_lookup_by_id(zevpn->vrf_id); + if (!zvrf || !zvrf->zns) + return; + zns = zvrf->zns; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Reading MAC FDB and Neighbors for intf %s(%u) VNI %u master %u", + ifp->name, ifp->ifindex, zevpn->vni, + zif->brslave_info.bridge_ifindex); + + macfdb_read_for_bridge(zns, ifp, zif->brslave_info.br_if); + vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); + if (vlan_if) { + /* Add SVI MAC */ + zebra_evpn_acc_bd_svi_mac_add(vlan_if); + + /* Add SVI MAC-IP */ + if (advertise_svi_macip_enabled(zevpn) + || advertise_gw_macip_enabled(zevpn)) + zebra_evpn_add_macip_for_intf(vlan_if, zevpn); + + /* Add VRR MAC-IP - if any*/ + if (advertise_gw_macip_enabled(zevpn)) { + vrr_if = zebra_get_vrr_intf_for_svi(vlan_if); + if (vrr_if) + zebra_evpn_add_macip_for_intf(vrr_if, zevpn); + } + + neigh_read_for_vlan(zns, vlan_if); + } +} + +/* + * Hash function for EVPN. + */ +unsigned int zebra_evpn_hash_keymake(const void *p) +{ + const struct zebra_evpn *zevpn = p; + + return (jhash_1word(zevpn->vni, 0)); +} + +/* + * Compare 2 evpn hash entries. + */ +bool zebra_evpn_hash_cmp(const void *p1, const void *p2) +{ + const struct zebra_evpn *zevpn1 = p1; + const struct zebra_evpn *zevpn2 = p2; + + return (zevpn1->vni == zevpn2->vni); +} + +int zebra_evpn_list_cmp(void *p1, void *p2) +{ + const struct zebra_evpn *zevpn1 = p1; + const struct zebra_evpn *zevpn2 = p2; + + if (zevpn1->vni == zevpn2->vni) + return 0; + return (zevpn1->vni < zevpn2->vni) ? -1 : 1; +} + +/* + * Callback to allocate VNI hash entry. + */ +void *zebra_evpn_alloc(void *p) +{ + const struct zebra_evpn *tmp_vni = p; + struct zebra_evpn *zevpn; + + zevpn = XCALLOC(MTYPE_ZEVPN, sizeof(struct zebra_evpn)); + zevpn->vni = tmp_vni->vni; + return ((void *)zevpn); +} + +/* + * Look up EVPN hash entry. + */ +struct zebra_evpn *zebra_evpn_lookup(vni_t vni) +{ + struct zebra_vrf *zvrf; + struct zebra_evpn tmp_vni; + struct zebra_evpn *zevpn = NULL; + + zvrf = zebra_vrf_get_evpn(); + memset(&tmp_vni, 0, sizeof(tmp_vni)); + tmp_vni.vni = vni; + zevpn = hash_lookup(zvrf->evpn_table, &tmp_vni); + + return zevpn; +} + +/* + * Add EVPN hash entry. + */ +struct zebra_evpn *zebra_evpn_add(vni_t vni) +{ + char buffer[80]; + struct zebra_vrf *zvrf; + struct zebra_evpn tmp_zevpn; + struct zebra_evpn *zevpn = NULL; + + zvrf = zebra_vrf_get_evpn(); + memset(&tmp_zevpn, 0, sizeof(tmp_zevpn)); + tmp_zevpn.vni = vni; + zevpn = hash_get(zvrf->evpn_table, &tmp_zevpn, zebra_evpn_alloc); + + zebra_evpn_es_evi_init(zevpn); + + snprintf(buffer, sizeof(buffer), "Zebra EVPN MAC Table vni: %u", vni); + /* Create hash table for MAC */ + zevpn->mac_table = zebra_mac_db_create(buffer); + + snprintf(buffer, sizeof(buffer), "Zebra EVPN Neighbor Table vni: %u", + vni); + /* Create hash table for neighbors */ + zevpn->neigh_table = zebra_neigh_db_create(buffer); + + return zevpn; +} + +/* + * Delete EVPN hash entry. + */ +int zebra_evpn_del(struct zebra_evpn *zevpn) +{ + struct zebra_vrf *zvrf; + struct zebra_evpn *tmp_zevpn; + + zvrf = zebra_vrf_get_evpn(); + + zevpn->svi_if = NULL; + + /* Free the neighbor hash table. */ + hash_free(zevpn->neigh_table); + zevpn->neigh_table = NULL; + + /* Free the MAC hash table. */ + hash_free(zevpn->mac_table); + zevpn->mac_table = NULL; + + /* Remove references to the zevpn in the MH databases */ + if (zevpn->vxlan_if) + zebra_evpn_vxl_evpn_set(zevpn->vxlan_if->info, zevpn, false); + zebra_evpn_es_evi_cleanup(zevpn); + + /* Free the EVPN hash entry and allocated memory. */ + tmp_zevpn = hash_release(zvrf->evpn_table, zevpn); + XFREE(MTYPE_ZEVPN, tmp_zevpn); + + return 0; +} + +/* + * Inform BGP about local EVPN addition. + */ +int zebra_evpn_send_add_to_client(struct zebra_evpn *zevpn) +{ + struct zserv *client; + struct stream *s; + ifindex_t svi_index; + int rc; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + svi_index = zevpn->svi_if ? zevpn->svi_if->ifindex : 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_VNI_ADD, zebra_vrf_get_evpn_id()); + stream_putl(s, zevpn->vni); + stream_put_in_addr(s, &zevpn->local_vtep_ip); + stream_put(s, &zevpn->vrf_id, sizeof(vrf_id_t)); /* tenant vrf */ + stream_put_in_addr(s, &zevpn->mcast_grp); + stream_put(s, &svi_index, sizeof(ifindex_t)); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Send EVPN_ADD %u %pI4 tenant vrf %s(%u) SVI index %u to %s", + zevpn->vni, &zevpn->local_vtep_ip, + vrf_id_to_name(zevpn->vrf_id), zevpn->vrf_id, + (zevpn->svi_if ? zevpn->svi_if->ifindex : 0), + zebra_route_string(client->proto)); + + client->vniadd_cnt++; + rc = zserv_send_message(client, s); + + if (!(zevpn->flags & ZEVPN_READY_FOR_BGP)) { + zevpn->flags |= ZEVPN_READY_FOR_BGP; + /* once the EVPN is sent the ES-EVIs can also be replayed + * to BGP + */ + zebra_evpn_update_all_es(zevpn); + } + return rc; +} + +/* + * Inform BGP about local EVPN deletion. + */ +int zebra_evpn_send_del_to_client(struct zebra_evpn *zevpn) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + if (zevpn->flags & ZEVPN_READY_FOR_BGP) { + zevpn->flags &= ~ZEVPN_READY_FOR_BGP; + /* the ES-EVIs must be removed from BGP before the EVPN is */ + zebra_evpn_update_all_es(zevpn); + } + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + zclient_create_header(s, ZEBRA_VNI_DEL, zebra_vrf_get_evpn_id()); + stream_putl(s, zevpn->vni); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Send EVPN_DEL %u to %s", zevpn->vni, + zebra_route_string(client->proto)); + + client->vnidel_cnt++; + return zserv_send_message(client, s); +} + +/* + * See if remote VTEP matches with prefix. + */ +static int zebra_evpn_vtep_match(struct in_addr *vtep_ip, + struct zebra_vtep *zvtep) +{ + return (IPV4_ADDR_SAME(vtep_ip, &zvtep->vtep_ip)); +} + +/* + * Locate remote VTEP in EVPN hash table. + */ +struct zebra_vtep *zebra_evpn_vtep_find(struct zebra_evpn *zevpn, + struct in_addr *vtep_ip) +{ + struct zebra_vtep *zvtep; + + if (!zevpn) + return NULL; + + for (zvtep = zevpn->vteps; zvtep; zvtep = zvtep->next) { + if (zebra_evpn_vtep_match(vtep_ip, zvtep)) + break; + } + + return zvtep; +} + +/* + * Add remote VTEP to EVPN hash table. + */ +struct zebra_vtep *zebra_evpn_vtep_add(struct zebra_evpn *zevpn, + struct in_addr *vtep_ip, + int flood_control) + +{ + struct zebra_vtep *zvtep; + + zvtep = XCALLOC(MTYPE_ZEVPN_VTEP, sizeof(struct zebra_vtep)); + + zvtep->vtep_ip = *vtep_ip; + zvtep->flood_control = flood_control; + + if (zevpn->vteps) + zevpn->vteps->prev = zvtep; + zvtep->next = zevpn->vteps; + zevpn->vteps = zvtep; + + return zvtep; +} + +/* + * Remove remote VTEP from EVPN hash table. + */ +int zebra_evpn_vtep_del(struct zebra_evpn *zevpn, struct zebra_vtep *zvtep) +{ + if (zvtep->next) + zvtep->next->prev = zvtep->prev; + if (zvtep->prev) + zvtep->prev->next = zvtep->next; + else + zevpn->vteps = zvtep->next; + + zvtep->prev = zvtep->next = NULL; + XFREE(MTYPE_ZEVPN_VTEP, zvtep); + + return 0; +} + +/* + * Delete all remote VTEPs for this EVPN (upon VNI delete). Also + * uninstall from kernel if asked to. + */ +int zebra_evpn_vtep_del_all(struct zebra_evpn *zevpn, int uninstall) +{ + struct zebra_vtep *zvtep, *zvtep_next; + + if (!zevpn) + return -1; + + for (zvtep = zevpn->vteps; zvtep; zvtep = zvtep_next) { + zvtep_next = zvtep->next; + if (uninstall) + zebra_evpn_vtep_uninstall(zevpn, &zvtep->vtep_ip); + zebra_evpn_vtep_del(zevpn, zvtep); + } + + return 0; +} + +/* + * Install remote VTEP into the kernel if the remote VTEP has asked + * for head-end-replication. + */ +int zebra_evpn_vtep_install(struct zebra_evpn *zevpn, struct zebra_vtep *zvtep) +{ + if (is_vxlan_flooding_head_end() && + (zvtep->flood_control == VXLAN_FLOOD_HEAD_END_REPL)) { + if (ZEBRA_DPLANE_REQUEST_FAILURE == + dplane_vtep_add(zevpn->vxlan_if, + &zvtep->vtep_ip, zevpn->vni)) + return -1; + } + + return 0; +} + +/* + * Uninstall remote VTEP from the kernel. + */ +int zebra_evpn_vtep_uninstall(struct zebra_evpn *zevpn, struct in_addr *vtep_ip) +{ + if (!zevpn->vxlan_if) { + zlog_debug("VNI %u hash %p couldn't be uninstalled - no intf", + zevpn->vni, zevpn); + return -1; + } + + if (ZEBRA_DPLANE_REQUEST_FAILURE == + dplane_vtep_delete(zevpn->vxlan_if, vtep_ip, zevpn->vni)) + return -1; + + return 0; +} + +/* + * Install or uninstall flood entries in the kernel corresponding to + * remote VTEPs. This is invoked upon change to BUM handling. + */ +void zebra_evpn_handle_flooding_remote_vteps(struct hash_bucket *bucket, + void *zvrf) +{ + struct zebra_evpn *zevpn; + struct zebra_vtep *zvtep; + + zevpn = (struct zebra_evpn *)bucket->data; + if (!zevpn) + return; + + for (zvtep = zevpn->vteps; zvtep; zvtep = zvtep->next) { + if (is_vxlan_flooding_head_end()) + zebra_evpn_vtep_install(zevpn, zvtep); + else + zebra_evpn_vtep_uninstall(zevpn, &zvtep->vtep_ip); + } +} + +/* + * Cleanup EVPN/VTEP and update kernel + */ +void zebra_evpn_cleanup_all(struct hash_bucket *bucket, void *arg) +{ + struct zebra_evpn *zevpn = NULL; + + zevpn = (struct zebra_evpn *)bucket->data; + + /* Free up all neighbors and MACs, if any. */ + zebra_evpn_neigh_del_all(zevpn, 1, 0, DEL_ALL_NEIGH); + zebra_evpn_mac_del_all(zevpn, 1, 0, DEL_ALL_MAC); + + /* Free up all remote VTEPs, if any. */ + zebra_evpn_vtep_del_all(zevpn, 1); + + /* Delete the hash entry. */ + zebra_evpn_del(zevpn); +} + +static void zebra_evpn_process_sync_macip_add(struct zebra_evpn *zevpn, + const struct ethaddr *macaddr, + uint16_t ipa_len, + const struct ipaddr *ipaddr, + uint8_t flags, uint32_t seq, + const esi_t *esi) +{ + struct sync_mac_ip_ctx ctx; + char ipbuf[INET6_ADDRSTRLEN]; + bool sticky; + bool remote_gw; + struct zebra_neigh *n = NULL; + + sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + remote_gw = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + /* if sticky or remote-gw ignore updates from the peer */ + if (sticky || remote_gw) { + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH + || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "Ignore sync-macip vni %u mac %pEA%s%s%s%s", + zevpn->vni, + macaddr, + ipa_len ? " IP " : "", + ipa_len ? ipaddr2str(ipaddr, ipbuf, + sizeof(ipbuf)) + : "", + sticky ? " sticky" : "", + remote_gw ? " remote_gw" : ""); + return; + } + + if (ipa_len) { + n = zebra_evpn_neigh_lookup(zevpn, ipaddr); + if (n + && !zebra_evpn_neigh_is_bgp_seq_ok(zevpn, n, macaddr, seq, + true)) + return; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.mac = zebra_evpn_proc_sync_mac_update( + zevpn, macaddr, ipa_len, ipaddr, flags, seq, esi, &ctx); + if (ctx.ignore_macip || !ctx.mac || !ipa_len) + return; + + zebra_evpn_proc_sync_neigh_update(zevpn, n, ipa_len, ipaddr, flags, seq, + esi, &ctx); +} + +/************************** remote mac-ip handling **************************/ +/* Process a remote MACIP add from BGP. */ +void zebra_evpn_rem_macip_add(vni_t vni, const struct ethaddr *macaddr, + uint16_t ipa_len, const struct ipaddr *ipaddr, + uint8_t flags, uint32_t seq, + struct in_addr vtep_ip, const esi_t *esi) +{ + struct zebra_evpn *zevpn; + struct zebra_vtep *zvtep; + struct zebra_mac *mac = NULL; + struct interface *ifp = NULL; + struct zebra_if *zif = NULL; + struct zebra_vrf *zvrf; + + /* Locate EVPN hash entry - expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Unknown VNI %u upon remote MACIP ADD", vni); + return; + } + + ifp = zevpn->vxlan_if; + if (ifp) + zif = ifp->info; + if (!ifp || !if_is_operative(ifp) || !zif || !zif->brslave_info.br_if) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Ignoring remote MACIP ADD VNI %u, invalid interface state or info", + vni); + return; + } + + /* Type-2 routes from another PE can be interpreted as remote or + * SYNC based on the destination ES - + * SYNC - if ES is local + * REMOTE - if ES is not local + */ + if (flags & ZEBRA_MACIP_TYPE_SYNC_PATH) { + struct zebra_evpn_es *es; + + es = zebra_evpn_es_find(esi); + if (es && (es->flags & ZEBRA_EVPNES_READY_FOR_BGP)) { + zebra_evpn_process_sync_macip_add(zevpn, macaddr, + ipa_len, ipaddr, + flags, seq, esi); + } else { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { + char esi_str[ESI_STR_LEN]; + + esi_to_str(esi, esi_str, sizeof(esi_str)); + zlog_debug( + "Ignore sync-macip add; ES %s is not ready", + esi_str); + } + } + + return; + } + + /* The remote VTEP specified should normally exist, but it is + * possible that when peering comes up, peer may advertise MACIP + * routes before advertising type-3 routes. + */ + if (vtep_ip.s_addr) { + zvtep = zebra_evpn_vtep_find(zevpn, &vtep_ip); + if (!zvtep) { + zvtep = zebra_evpn_vtep_add(zevpn, &vtep_ip, + VXLAN_FLOOD_DISABLED); + if (!zvtep) { + flog_err( + EC_ZEBRA_VTEP_ADD_FAILED, + "Failed to add remote VTEP, VNI %u zevpn %p upon remote MACIP ADD", + vni, zevpn); + return; + } + + zebra_evpn_vtep_install(zevpn, zvtep); + } + } + + zvrf = zebra_vrf_get_evpn(); + if (zebra_evpn_mac_remote_macip_add(zevpn, zvrf, macaddr, ipa_len, + ipaddr, &mac, vtep_ip, flags, seq, + esi) + != 0) + return; + + zebra_evpn_neigh_remote_macip_add(zevpn, zvrf, ipaddr, mac, vtep_ip, + flags, seq); +} + +/* Process a remote MACIP delete from BGP. */ +void zebra_evpn_rem_macip_del(vni_t vni, const struct ethaddr *macaddr, + uint16_t ipa_len, const struct ipaddr *ipaddr, + struct in_addr vtep_ip) +{ + struct zebra_evpn *zevpn; + struct zebra_mac *mac = NULL; + struct zebra_neigh *n = NULL; + struct interface *ifp = NULL; + struct zebra_if *zif = NULL; + struct zebra_ns *zns; + struct zebra_l2info_vxlan *vxl; + struct zebra_vrf *zvrf; + char buf1[INET6_ADDRSTRLEN]; + + /* Locate EVPN hash entry - expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Unknown VNI %u upon remote MACIP DEL", vni); + return; + } + + ifp = zevpn->vxlan_if; + if (ifp) + zif = ifp->info; + if (!ifp || !if_is_operative(ifp) || !zif || !zif->brslave_info.br_if) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Ignoring remote MACIP DEL VNI %u, invalid interface state or info", + vni); + return; + } + zns = zebra_ns_lookup(NS_DEFAULT); + vxl = &zif->l2info.vxl; + + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (ipa_len) + n = zebra_evpn_neigh_lookup(zevpn, ipaddr); + + if (n && !mac) { + zlog_warn( + "Failed to locate MAC %pEA for neigh %pIA VNI %u upon remote MACIP DEL", + macaddr, ipaddr, vni); + return; + } + + /* If the remote mac or neighbor doesn't exist there is nothing + * more to do. Otherwise, uninstall the entry and then remove it. + */ + if (!mac && !n) + return; + + zvrf = zevpn->vxlan_if->vrf->info; + + /* Ignore the delete if this mac is a gateway mac-ip */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) + && CHECK_FLAG(mac->flags, ZEBRA_MAC_DEF_GW)) { + zlog_warn( + "Ignore remote MACIP DEL VNI %u MAC %pEA%s%s as MAC is already configured as gateway MAC", + vni, macaddr, + ipa_len ? " IP " : "", + ipa_len ? ipaddr2str(ipaddr, buf1, sizeof(buf1)) : ""); + return; + } + + /* Uninstall remote neighbor or MAC. */ + if (n) + zebra_evpn_neigh_remote_uninstall(zevpn, zvrf, n, mac, ipaddr); + else { + /* DAD: when MAC is freeze state as remote learn event, + * remote mac-ip delete event is received will result in freeze + * entry removal, first fetch kernel for the same entry present + * as LOCAL and reachable, avoid deleting this entry instead + * use kerenel local entry to update during unfreeze time. + */ + if (zvrf->dad_freeze + && CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE) + && CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: MAC %pEA (flags 0x%x) is remote and duplicate, read kernel for local entry", + __func__, macaddr, mac->flags); + macfdb_read_specific_mac(zns, zif->brslave_info.br_if, + macaddr, vxl->access_vlan); + } + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + if (!ipa_len) + zebra_evpn_sync_mac_del(mac); + } else if (CHECK_FLAG(mac->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_evpn_rem_mac_del(zevpn, mac); + } + } +} + +/************************** EVPN BGP config management ************************/ +void zebra_evpn_cfg_cleanup(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_evpn *zevpn = NULL; + + zevpn = (struct zebra_evpn *)bucket->data; + zevpn->advertise_gw_macip = 0; + zevpn->advertise_svi_macip = 0; + zevpn->advertise_subnet = 0; + + zebra_evpn_neigh_del_all(zevpn, 1, 0, + DEL_REMOTE_NEIGH | DEL_REMOTE_NEIGH_FROM_VTEP); + zebra_evpn_mac_del_all(zevpn, 1, 0, + DEL_REMOTE_MAC | DEL_REMOTE_MAC_FROM_VTEP); + zebra_evpn_vtep_del_all(zevpn, 1); +} diff --git a/zebra/zebra_evpn.h b/zebra/zebra_evpn.h new file mode 100644 index 0000000..2c84d23 --- /dev/null +++ b/zebra/zebra_evpn.h @@ -0,0 +1,221 @@ +/* + * Zebra EVPN Data structures and definitions + * These are "internal" to this function. + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * Copyright (C) 2020 Volta Networks. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_EVPN_H +#define _ZEBRA_EVPN_H + +#include <zebra.h> + +#include "if.h" +#include "linklist.h" +#include "bitfield.h" + +#include "zebra/zebra_l2.h" +#include "zebra/interface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +RB_HEAD(zebra_es_evi_rb_head, zebra_evpn_es_evi); +RB_PROTOTYPE(zebra_es_evi_rb_head, zebra_evpn_es_evi, rb_node, + zebra_es_evi_rb_cmp); + +/* Private Structure to pass callback data for hash iterator */ +struct zebra_evpn_show { + struct vty *vty; + json_object *json; + struct zebra_vrf *zvrf; + bool use_json; +}; + +/* + * VTEP info + * + * Right now, this just has each remote VTEP's IP address. + */ +struct zebra_vtep { + /* Remote IP. */ + /* NOTE: Can only be IPv4 right now. */ + struct in_addr vtep_ip; + /* Flood mode (one of enum vxlan_flood_control) based on the PMSI + * tunnel type advertised by the remote VTEP + */ + int flood_control; + + /* Links. */ + struct zebra_vtep *next; + struct zebra_vtep *prev; +}; + +/* + * VNI hash table + * + * Contains information pertaining to a VNI: + * - the list of remote VTEPs (with this VNI) + */ +struct zebra_evpn { + /* VNI - key */ + vni_t vni; + + /* ES flags */ + uint32_t flags; +#define ZEVPN_READY_FOR_BGP (1 << 0) /* ready to be sent to BGP */ + + /* Flag for advertising gw macip */ + uint8_t advertise_gw_macip; + + /* Flag for advertising svi macip */ + uint8_t advertise_svi_macip; + + /* Flag for advertising gw macip */ + uint8_t advertise_subnet; + + /* Corresponding VxLAN interface. */ + struct interface *vxlan_if; + + /* Corresponding SVI interface. */ + struct interface *svi_if; + + /* List of remote VTEPs */ + struct zebra_vtep *vteps; + + /* Local IP */ + struct in_addr local_vtep_ip; + + /* PIM-SM MDT group for BUM flooding */ + struct in_addr mcast_grp; + + /* tenant VRF, if any */ + vrf_id_t vrf_id; + + /* List of local or remote MAC */ + struct hash *mac_table; + + /* List of local or remote neighbors (MAC+IP) */ + struct hash *neigh_table; + + /* RB tree of ES-EVIs */ + struct zebra_es_evi_rb_head es_evi_rb_tree; + + /* List of local ESs */ + struct list *local_es_evi_list; +}; + +/* for parsing evpn and vni contexts */ +struct zebra_from_svi_param { + struct interface *br_if; + struct interface *svi_if; + struct zebra_if *zif; + uint8_t bridge_vlan_aware; + vlanid_t vid; +}; + +struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if); + +static inline struct interface *zevpn_map_to_svi(struct zebra_evpn *zevpn) +{ + struct interface *ifp; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + + ifp = zevpn->vxlan_if; + if (!ifp) + return NULL; + zif = ifp->info; + if (!zif) + return NULL; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return NULL; + zl2_info = zif->l2info.vxl; + return zvni_map_to_svi(zl2_info.access_vlan, zif->brslave_info.br_if); +} + +int advertise_gw_macip_enabled(struct zebra_evpn *zevpn); +int advertise_svi_macip_enabled(struct zebra_evpn *zevpn); +void zebra_evpn_print(struct zebra_evpn *zevpn, void **ctxt); +void zebra_evpn_print_hash(struct hash_bucket *bucket, void *ctxt[]); +void zebra_evpn_print_hash_detail(struct hash_bucket *bucket, void *data); +int zebra_evpn_add_macip_for_intf(struct interface *ifp, + struct zebra_evpn *zevpn); +int zebra_evpn_del_macip_for_intf(struct interface *ifp, + struct zebra_evpn *zevpn); +int zebra_evpn_advertise_subnet(struct zebra_evpn *zevpn, struct interface *ifp, + int advertise); +int zebra_evpn_gw_macip_add(struct interface *ifp, struct zebra_evpn *zevpn, + struct ethaddr *macaddr, struct ipaddr *ip); +int zebra_evpn_gw_macip_del(struct interface *ifp, struct zebra_evpn *zevpn, + struct ipaddr *ip); +void zebra_evpn_gw_macip_del_for_evpn_hash(struct hash_bucket *bucket, + void *ctxt); +void zebra_evpn_gw_macip_add_for_evpn_hash(struct hash_bucket *bucket, + void *ctxt); +void zebra_evpn_svi_macip_del_for_evpn_hash(struct hash_bucket *bucket, + void *ctxt); +struct zebra_evpn *zebra_evpn_map_vlan(struct interface *ifp, + struct interface *br_if, vlanid_t vid); +struct zebra_evpn *zebra_evpn_from_svi(struct interface *ifp, + struct interface *br_if); +struct interface *zebra_evpn_map_to_macvlan(struct interface *br_if, + struct interface *svi_if); +void zebra_evpn_install_mac_hash(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_read_mac_neigh(struct zebra_evpn *zevpn, struct interface *ifp); +unsigned int zebra_evpn_hash_keymake(const void *p); +bool zebra_evpn_hash_cmp(const void *p1, const void *p2); +int zebra_evpn_list_cmp(void *p1, void *p2); +void *zebra_evpn_alloc(void *p); +struct zebra_evpn *zebra_evpn_lookup(vni_t vni); +struct zebra_evpn *zebra_evpn_add(vni_t vni); +int zebra_evpn_del(struct zebra_evpn *zevpn); +int zebra_evpn_send_add_to_client(struct zebra_evpn *zevpn); +int zebra_evpn_send_del_to_client(struct zebra_evpn *zevpn); +struct zebra_vtep *zebra_evpn_vtep_find(struct zebra_evpn *zevpn, + struct in_addr *vtep_ip); +struct zebra_vtep *zebra_evpn_vtep_add(struct zebra_evpn *zevpn, + struct in_addr *vtep_ip, + int flood_control); +int zebra_evpn_vtep_del(struct zebra_evpn *zevpn, struct zebra_vtep *zvtep); +int zebra_evpn_vtep_del_all(struct zebra_evpn *zevpn, int uninstall); +int zebra_evpn_vtep_install(struct zebra_evpn *zevpn, struct zebra_vtep *zvtep); +int zebra_evpn_vtep_uninstall(struct zebra_evpn *zevpn, + struct in_addr *vtep_ip); +void zebra_evpn_handle_flooding_remote_vteps(struct hash_bucket *bucket, + void *zvrf); +void zebra_evpn_cleanup_all(struct hash_bucket *bucket, void *arg); +void zebra_evpn_rem_macip_add(vni_t vni, const struct ethaddr *macaddr, + uint16_t ipa_len, const struct ipaddr *ipaddr, + uint8_t flags, uint32_t seq, + struct in_addr vtep_ip, const esi_t *esi); +void zebra_evpn_rem_macip_del(vni_t vni, const struct ethaddr *macaddr, + uint16_t ipa_len, const struct ipaddr *ipaddr, + struct in_addr vtep_ip); +void zebra_evpn_cfg_cleanup(struct hash_bucket *bucket, void *ctxt); + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_EVPN_H */ diff --git a/zebra/zebra_evpn_mac.c b/zebra/zebra_evpn_mac.c new file mode 100644 index 0000000..a3d2179 --- /dev/null +++ b/zebra/zebra_evpn_mac.c @@ -0,0 +1,2564 @@ +/* + * Zebra EVPN for VxLAN code + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "hash.h" +#include "interface.h" +#include "jhash.h" +#include "memory.h" +#include "prefix.h" +#include "vlan.h" +#include "json.h" +#include "printfrr.h" + +#include "zebra/zserv.h" +#include "zebra/debug.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/zebra_evpn_neigh.h" + +DEFINE_MTYPE_STATIC(ZEBRA, MAC, "EVPN MAC"); + +/* + * Return number of valid MACs in an EVPN's MAC hash table - all + * remote MACs and non-internal (auto) local MACs count. + */ +uint32_t num_valid_macs(struct zebra_evpn *zevpn) +{ + unsigned int i; + uint32_t num_macs = 0; + struct hash *hash; + struct hash_bucket *hb; + struct zebra_mac *mac; + + hash = zevpn->mac_table; + if (!hash) + return num_macs; + for (i = 0; i < hash->size; i++) { + for (hb = hash->index[i]; hb; hb = hb->next) { + mac = (struct zebra_mac *)hb->data; + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) + || CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) + || !CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) + num_macs++; + } + } + + return num_macs; +} + +uint32_t num_dup_detected_macs(struct zebra_evpn *zevpn) +{ + unsigned int i; + uint32_t num_macs = 0; + struct hash *hash; + struct hash_bucket *hb; + struct zebra_mac *mac; + + hash = zevpn->mac_table; + if (!hash) + return num_macs; + for (i = 0; i < hash->size; i++) { + for (hb = hash->index[i]; hb; hb = hb->next) { + mac = (struct zebra_mac *)hb->data; + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + num_macs++; + } + } + + return num_macs; +} + +/* Setup mac_list against the access port. This is done when a mac uses + * the ifp as destination for the first time + */ +static void zebra_evpn_mac_ifp_new(struct zebra_if *zif) +{ + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("MAC list created for ifp %s (%u)", zif->ifp->name, + zif->ifp->ifindex); + + zif->mac_list = list_new(); + listset_app_node_mem(zif->mac_list); +} + +/* Unlink local mac from a destination access port */ +static void zebra_evpn_mac_ifp_unlink(struct zebra_mac *zmac) +{ + struct zebra_if *zif; + struct interface *ifp = zmac->ifp; + + if (!ifp) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("VNI %d MAC %pEA unlinked from ifp %s (%u)", + zmac->zevpn->vni, + &zmac->macaddr, + ifp->name, ifp->ifindex); + + zif = ifp->info; + list_delete_node(zif->mac_list, &zmac->ifp_listnode); + zmac->ifp = NULL; +} + +/* Free up the mac_list if any as a part of the interface del/cleanup */ +void zebra_evpn_mac_ifp_del(struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + struct listnode *node; + struct zebra_mac *zmac; + + if (zif->mac_list) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("MAC list deleted for ifp %s (%u)", + zif->ifp->name, zif->ifp->ifindex); + + for (ALL_LIST_ELEMENTS_RO(zif->mac_list, node, zmac)) { + zebra_evpn_mac_ifp_unlink(zmac); + } + list_delete(&zif->mac_list); + } +} + +/* Link local mac to destination access port. This is done only if the + * local mac is associated with a zero ESI i.e. single attach or lacp-bypass + * bridge port member + */ +static void zebra_evpn_mac_ifp_link(struct zebra_mac *zmac, + struct interface *ifp) +{ + struct zebra_if *zif; + + if (!CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL)) + return; + + /* already linked to the destination */ + if (zmac->ifp == ifp) + return; + + /* unlink the mac from any old destination */ + if (zmac->ifp) + zebra_evpn_mac_ifp_unlink(zmac); + + if (!ifp) + return; + + zif = ifp->info; + /* the interface mac_list is created on first mac link attempt */ + if (!zif->mac_list) + zebra_evpn_mac_ifp_new(zif); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("VNI %d MAC %pEA linked to ifp %s (%u)", + zmac->zevpn->vni, + &zmac->macaddr, + ifp->name, ifp->ifindex); + + zmac->ifp = ifp; + listnode_init(&zmac->ifp_listnode, zmac); + listnode_add(zif->mac_list, &zmac->ifp_listnode); +} + +/* If the mac is a local mac clear links to destination access port */ +void zebra_evpn_mac_clear_fwd_info(struct zebra_mac *zmac) +{ + zebra_evpn_mac_ifp_unlink(zmac); + memset(&zmac->fwd_info, 0, sizeof(zmac->fwd_info)); +} + +/* + * Install remote MAC into the forwarding plane. + */ +int zebra_evpn_rem_mac_install(struct zebra_evpn *zevpn, struct zebra_mac *mac, + bool was_static) +{ + const struct zebra_if *zif, *br_zif; + const struct zebra_l2info_vxlan *vxl; + bool sticky; + enum zebra_dplane_result res; + const struct interface *br_ifp; + vlanid_t vid; + uint32_t nhg_id; + struct in_addr vtep_ip; + + zif = zevpn->vxlan_if->info; + if (!zif) + return -1; + + br_ifp = zif->brslave_info.br_if; + if (br_ifp == NULL) + return -1; + + vxl = &zif->l2info.vxl; + + sticky = !!CHECK_FLAG(mac->flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); + + /* If nexthop group for the FDB entry is inactive (not programmed in + * the dataplane) the MAC entry cannot be installed + */ + if (mac->es) { + if (!(mac->es->flags & ZEBRA_EVPNES_NHG_ACTIVE)) + return -1; + nhg_id = mac->es->nhg_id; + vtep_ip.s_addr = 0; + } else { + nhg_id = 0; + vtep_ip = mac->fwd_info.r_vtep_ip; + } + + br_zif = (const struct zebra_if *)(br_ifp->info); + + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) + vid = vxl->access_vlan; + else + vid = 0; + + res = dplane_rem_mac_add(zevpn->vxlan_if, br_ifp, vid, &mac->macaddr, + vtep_ip, sticky, nhg_id, was_static); + if (res != ZEBRA_DPLANE_REQUEST_FAILURE) + return 0; + else + return -1; +} + +/* + * Uninstall remote MAC from the forwarding plane. + */ +int zebra_evpn_rem_mac_uninstall(struct zebra_evpn *zevpn, + struct zebra_mac *mac, bool force) +{ + const struct zebra_if *zif, *br_zif; + const struct zebra_l2info_vxlan *vxl; + struct in_addr vtep_ip; + const struct interface *ifp, *br_ifp; + vlanid_t vid; + enum zebra_dplane_result res; + + /* If the MAC was not installed there is no need to uninstall it */ + if (!force && mac->es && !(mac->es->flags & ZEBRA_EVPNES_NHG_ACTIVE)) + return -1; + + if (!zevpn->vxlan_if) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "VNI %u hash %p couldn't be uninstalled - no intf", + zevpn->vni, zevpn); + return -1; + } + + zif = zevpn->vxlan_if->info; + if (!zif) + return -1; + + br_ifp = zif->brslave_info.br_if; + if (br_ifp == NULL) + return -1; + + vxl = &zif->l2info.vxl; + + br_zif = (const struct zebra_if *)br_ifp->info; + + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) + vid = vxl->access_vlan; + else + vid = 0; + + ifp = zevpn->vxlan_if; + vtep_ip = mac->fwd_info.r_vtep_ip; + + res = dplane_rem_mac_del(ifp, br_ifp, vid, &mac->macaddr, vtep_ip); + if (res != ZEBRA_DPLANE_REQUEST_FAILURE) + return 0; + else + return -1; +} + +/* + * Decrement neighbor refcount of MAC; uninstall and free it if + * appropriate. + */ +void zebra_evpn_deref_ip2mac(struct zebra_evpn *zevpn, struct zebra_mac *mac) +{ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) + return; + + /* If all remote neighbors referencing a remote MAC go away, + * we need to uninstall the MAC. + */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) + && remote_neigh_count(mac) == 0) { + zebra_evpn_rem_mac_uninstall(zevpn, mac, false /*force*/); + zebra_evpn_es_mac_deref_entry(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); + } + + /* If no references, delete the MAC. */ + if (!zebra_evpn_mac_in_use(mac)) + zebra_evpn_mac_del(zevpn, mac); +} + +static void zebra_evpn_mac_get_access_info(struct zebra_mac *mac, + struct interface **p_ifp, + vlanid_t *vid) +{ + /* if the mac is associated with an ES we must get the access + * info from the ES + */ + if (mac->es) { + struct zebra_if *zif; + + /* get the access port from the es */ + *p_ifp = mac->es->zif ? mac->es->zif->ifp : NULL; + /* get the vlan from the EVPN */ + if (mac->zevpn->vxlan_if) { + zif = mac->zevpn->vxlan_if->info; + *vid = zif->l2info.vxl.access_vlan; + } else { + *vid = 0; + } + } else { + struct zebra_ns *zns; + + *vid = mac->fwd_info.local.vid; + zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); + *p_ifp = if_lookup_by_index_per_ns(zns, + mac->fwd_info.local.ifindex); + } +} + +#define MAC_BUF_SIZE 256 +static char *zebra_evpn_zebra_mac_flag_dump(struct zebra_mac *mac, char *buf, + size_t len) +{ + if (mac->flags == 0) { + snprintfrr(buf, len, "None "); + return buf; + } + + snprintfrr( + buf, len, "%s%s%s%s%s%s%s%s%s%s%s%s", + CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) ? "LOC " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) ? "REM " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO) ? "AUTO " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY) ? "STICKY " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_RMAC) ? "REM Router " + : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_DEF_GW) ? "Default GW " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW) ? "REM DEF GW " + : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE) ? "DUP " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_FPM_SENT) ? "FPM " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE) + ? "PEER Active " + : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY) ? "PROXY " : "", + CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE) + ? "LOC Inactive " + : ""); + return buf; +} + +static void zebra_evpn_dad_mac_auto_recovery_exp(struct thread *t) +{ + struct zebra_vrf *zvrf = NULL; + struct zebra_mac *mac = NULL; + struct zebra_evpn *zevpn = NULL; + struct listnode *node = NULL; + struct zebra_neigh *nbr = NULL; + + mac = THREAD_ARG(t); + + /* since this is asynchronous we need sanity checks*/ + zvrf = vrf_info_lookup(mac->zevpn->vrf_id); + if (!zvrf) + return; + + zevpn = zebra_evpn_lookup(mac->zevpn->vni); + if (!zevpn) + return; + + mac = zebra_evpn_mac_lookup(zevpn, &mac->macaddr); + if (!mac) + return; + + if (IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: duplicate addr mac %pEA flags %slearn count %u host count %u auto recovery expired", + __func__, &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + mac->dad_count, listcount(mac->neigh_list)); + } + + /* Remove all IPs as duplicate associcated with this MAC */ + for (ALL_LIST_ELEMENTS_RO(mac->neigh_list, node, nbr)) { + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) { + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) + ZEBRA_NEIGH_SET_INACTIVE(nbr); + else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) + zebra_evpn_rem_neigh_install( + zevpn, nbr, false /*was_static*/); + } + + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->dad_dup_detect_time = 0; + } + + UNSET_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE); + mac->dad_count = 0; + mac->detect_start_time.tv_sec = 0; + mac->detect_start_time.tv_usec = 0; + mac->dad_dup_detect_time = 0; + mac->dad_mac_auto_recovery_timer = NULL; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + /* Inform to BGP */ + if (zebra_evpn_mac_send_add_to_client(zevpn->vni, &mac->macaddr, + mac->flags, mac->loc_seq, + mac->es)) + return; + + /* Process all neighbors associated with this MAC. */ + zebra_evpn_process_neigh_on_local_mac_change(zevpn, mac, 0, + 0 /*es_change*/); + + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + zebra_evpn_process_neigh_on_remote_mac_add(zevpn, mac); + + /* Install the entry. */ + zebra_evpn_rem_mac_install(zevpn, mac, false /* was_static */); + } +} + +static void zebra_evpn_dup_addr_detect_for_mac(struct zebra_vrf *zvrf, + struct zebra_mac *mac, + struct in_addr vtep_ip, + bool do_dad, bool *is_dup_detect, + bool is_local) +{ + struct zebra_neigh *nbr; + struct listnode *node = NULL; + struct timeval elapsed = {0, 0}; + bool reset_params = false; + + if (!(zebra_evpn_do_dup_addr_detect(zvrf) && do_dad)) + return; + + /* MAC is detected as duplicate, + * Local MAC event -> hold on advertising to BGP. + * Remote MAC event -> hold on installing it. + */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) { + if (IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: duplicate addr MAC %pEA flags %sskip update to client, learn count %u recover time %u", + __func__, &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + mac->dad_count, zvrf->dad_freeze_time); + } + /* For duplicate MAC do not update + * client but update neigh due to + * this MAC update. + */ + if (zvrf->dad_freeze) + *is_dup_detect = true; + + return; + } + + /* Check if detection time (M-secs) expired. + * Reset learn count and detection start time. + */ + monotime_since(&mac->detect_start_time, &elapsed); + reset_params = (elapsed.tv_sec > zvrf->dad_time); + if (is_local && !reset_params) { + /* RFC-7432: A PE/VTEP that detects a MAC mobility + * event via LOCAL learning starts an M-second timer. + * + * NOTE: This is the START of the probe with count is + * 0 during LOCAL learn event. + * (mac->dad_count == 0 || elapsed.tv_sec >= zvrf->dad_time) + */ + reset_params = !mac->dad_count; + } + + if (reset_params) { + if (IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: duplicate addr MAC %pEA flags %sdetection time passed, reset learn count %u", + __func__, &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + mac->dad_count); + } + + mac->dad_count = 0; + /* Start dup. addr detection (DAD) start time, + * ONLY during LOCAL learn. + */ + if (is_local) + monotime(&mac->detect_start_time); + + } else if (!is_local) { + /* For REMOTE MAC, increment detection count + * ONLY while in probe window, once window passed, + * next local learn event should trigger DAD. + */ + mac->dad_count++; + } + + /* For LOCAL MAC learn event, once count is reset above via either + * initial/start detection time or passed the probe time, the count + * needs to be incremented. + */ + if (is_local) + mac->dad_count++; + + if (mac->dad_count >= zvrf->dad_max_moves) { + flog_warn(EC_ZEBRA_DUP_MAC_DETECTED, + "VNI %u: MAC %pEA detected as duplicate during %s VTEP %pI4", + mac->zevpn->vni, &mac->macaddr, + is_local ? "local update, last" : + "remote update, from", &vtep_ip); + + SET_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE); + + /* Capture Duplicate detection time */ + mac->dad_dup_detect_time = monotime(NULL); + + /* Mark all IPs/Neighs as duplicate + * associcated with this MAC + */ + for (ALL_LIST_ELEMENTS_RO(mac->neigh_list, node, nbr)) { + + /* Ony Mark IPs which are Local */ + if (!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) + continue; + + SET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + + nbr->dad_dup_detect_time = monotime(NULL); + + flog_warn(EC_ZEBRA_DUP_IP_INHERIT_DETECTED, + "VNI %u: MAC %pEA IP %pIA detected as duplicate during %s update, inherit duplicate from MAC", + mac->zevpn->vni, &mac->macaddr, &nbr->ip, + is_local ? "local" : "remote"); + } + + /* Start auto recovery timer for this MAC */ + THREAD_OFF(mac->dad_mac_auto_recovery_timer); + if (zvrf->dad_freeze && zvrf->dad_freeze_time) { + if (IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: duplicate addr MAC %pEA flags %sauto recovery time %u start", + __func__, &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf)), + zvrf->dad_freeze_time); + } + + thread_add_timer(zrouter.master, + zebra_evpn_dad_mac_auto_recovery_exp, + mac, zvrf->dad_freeze_time, + &mac->dad_mac_auto_recovery_timer); + } + + /* In case of local update, do not inform to client (BGPd), + * upd_neigh for neigh sequence change. + */ + if (zvrf->dad_freeze) + *is_dup_detect = true; + } +} + +/* + * Print a specific MAC entry. + */ +void zebra_evpn_print_mac(struct zebra_mac *mac, void *ctxt, json_object *json) +{ + struct vty *vty; + struct zebra_neigh *n = NULL; + struct listnode *node = NULL; + char buf1[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + struct zebra_vrf *zvrf; + struct timeval detect_start_time = {0, 0}; + char timebuf[MONOTIME_STRLEN]; + char thread_buf[THREAD_TIMER_STRLEN]; + time_t uptime; + char up_str[MONOTIME_STRLEN]; + + zvrf = zebra_vrf_get_evpn(); + vty = (struct vty *)ctxt; + prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)); + + uptime = monotime(NULL); + uptime -= mac->uptime; + + frrtime_to_interval(uptime, up_str, sizeof(up_str)); + + if (json) { + json_object *json_mac = json_object_new_object(); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + struct interface *ifp; + vlanid_t vid; + + zebra_evpn_mac_get_access_info(mac, &ifp, &vid); + json_object_string_add(json_mac, "type", "local"); + if (ifp) { + json_object_string_add(json_mac, "intf", + ifp->name); + json_object_int_add(json_mac, "ifindex", + ifp->ifindex); + } + if (vid) + json_object_int_add(json_mac, "vlan", vid); + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + json_object_string_add(json_mac, "type", "remote"); + json_object_string_addf(json_mac, "remoteVtep", "%pI4", + &mac->fwd_info.r_vtep_ip); + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) + json_object_string_add(json_mac, "type", "auto"); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)) + json_object_boolean_true_add(json_mac, "stickyMac"); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_SVI)) + json_object_boolean_true_add(json_mac, "sviMac"); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DEF_GW)) + json_object_boolean_true_add(json_mac, + "defaultGateway"); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW)) + json_object_boolean_true_add(json_mac, + "remoteGatewayMac"); + + json_object_string_add(json_mac, "uptime", up_str); + json_object_int_add(json_mac, "localSequence", mac->loc_seq); + json_object_int_add(json_mac, "remoteSequence", mac->rem_seq); + + json_object_int_add(json_mac, "detectionCount", mac->dad_count); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + json_object_boolean_true_add(json_mac, "isDuplicate"); + else + json_object_boolean_false_add(json_mac, "isDuplicate"); + + json_object_int_add(json_mac, "syncNeighCount", + mac->sync_neigh_cnt); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) + json_object_boolean_true_add(json_mac, "localInactive"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY)) + json_object_boolean_true_add(json_mac, "peerProxy"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + json_object_boolean_true_add(json_mac, "peerActive"); + if (mac->hold_timer) + json_object_string_add( + json_mac, "peerActiveHold", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + mac->hold_timer)); + if (mac->es) + json_object_string_add(json_mac, "esi", + mac->es->esi_str); + /* print all the associated neigh */ + if (!listcount(mac->neigh_list)) + json_object_string_add(json_mac, "neighbors", "none"); + else { + json_object *json_active_nbrs = json_object_new_array(); + json_object *json_inactive_nbrs = + json_object_new_array(); + json_object *json_nbrs = json_object_new_object(); + + for (ALL_LIST_ELEMENTS_RO(mac->neigh_list, node, n)) { + if (IS_ZEBRA_NEIGH_ACTIVE(n)) + json_object_array_add( + json_active_nbrs, + json_object_new_string( + ipaddr2str( + &n->ip, buf2, + sizeof(buf2)))); + else + json_object_array_add( + json_inactive_nbrs, + json_object_new_string( + ipaddr2str( + &n->ip, buf2, + sizeof(buf2)))); + } + + json_object_object_add(json_nbrs, "active", + json_active_nbrs); + json_object_object_add(json_nbrs, "inactive", + json_inactive_nbrs); + json_object_object_add(json_mac, "neighbors", + json_nbrs); + } + + json_object_object_add(json, buf1, json_mac); + } else { + vty_out(vty, "MAC: %s\n", buf1); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + struct interface *ifp; + vlanid_t vid; + + zebra_evpn_mac_get_access_info(mac, &ifp, &vid); + + if (mac->es) + vty_out(vty, " ESI: %s\n", mac->es->esi_str); + + if (ifp) + vty_out(vty, " Intf: %s(%u)", ifp->name, + ifp->ifindex); + else + vty_out(vty, " Intf: -"); + vty_out(vty, " VLAN: %u", vid); + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + if (mac->es) + vty_out(vty, " Remote ES: %s", + mac->es->esi_str); + else + vty_out(vty, " Remote VTEP: %pI4", + &mac->fwd_info.r_vtep_ip); + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) { + vty_out(vty, " Auto Mac "); + } + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)) + vty_out(vty, " Sticky Mac "); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_SVI)) + vty_out(vty, " SVI-Mac "); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DEF_GW)) + vty_out(vty, " Default-gateway Mac "); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW)) + vty_out(vty, " Remote-gateway Mac "); + + vty_out(vty, "\n"); + vty_out(vty, " Sync-info: neigh#: %u", mac->sync_neigh_cnt); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) + vty_out(vty, " local-inactive"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY)) + vty_out(vty, " peer-proxy"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + vty_out(vty, " peer-active"); + if (mac->hold_timer) + vty_out(vty, " (ht: %s)", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + mac->hold_timer)); + vty_out(vty, "\n"); + vty_out(vty, " Local Seq: %u Remote Seq: %u\n", mac->loc_seq, + mac->rem_seq); + vty_out(vty, " Uptime: %s\n", up_str); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) { + vty_out(vty, " Duplicate, detected at %s", + time_to_string(mac->dad_dup_detect_time, + timebuf)); + } else if (mac->dad_count) { + monotime_since(&mac->detect_start_time, + &detect_start_time); + if (detect_start_time.tv_sec <= zvrf->dad_time) { + time_to_string(mac->detect_start_time.tv_sec, + timebuf); + vty_out(vty, + " Duplicate detection started at %s, detection count %u\n", + timebuf, mac->dad_count); + } + } + + /* print all the associated neigh */ + vty_out(vty, " Neighbors:\n"); + if (!listcount(mac->neigh_list)) + vty_out(vty, " No Neighbors\n"); + else { + for (ALL_LIST_ELEMENTS_RO(mac->neigh_list, node, n)) { + vty_out(vty, " %s %s\n", + ipaddr2str(&n->ip, buf2, sizeof(buf2)), + (IS_ZEBRA_NEIGH_ACTIVE(n) + ? "Active" + : "Inactive")); + } + } + + vty_out(vty, "\n"); + } +} + +static char *zebra_evpn_print_mac_flags(struct zebra_mac *mac, char *flags_buf, + size_t flags_buf_sz) +{ + snprintf(flags_buf, flags_buf_sz, "%s%s%s%s", + mac->sync_neigh_cnt ? "N" : "", + (mac->flags & ZEBRA_MAC_ES_PEER_ACTIVE) ? "P" : "", + (mac->flags & ZEBRA_MAC_ES_PEER_PROXY) ? "X" : "", + (mac->flags & ZEBRA_MAC_LOCAL_INACTIVE) ? "I" : ""); + + return flags_buf; +} + +/* + * Print MAC hash entry - called for display of all MACs. + */ +void zebra_evpn_print_mac_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct vty *vty; + json_object *json_mac_hdr = NULL, *json_mac = NULL; + struct zebra_mac *mac; + char buf1[ETHER_ADDR_STRLEN]; + char addr_buf[PREFIX_STRLEN]; + struct mac_walk_ctx *wctx = ctxt; + char flags_buf[6]; + + vty = wctx->vty; + json_mac_hdr = wctx->json; + mac = (struct zebra_mac *)bucket->data; + + prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)); + + if (json_mac_hdr) + json_mac = json_object_new_object(); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + struct interface *ifp; + vlanid_t vid; + + if (wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) + return; + + zebra_evpn_mac_get_access_info(mac, &ifp, &vid); + if (json_mac_hdr == NULL) { + vty_out(vty, "%-17s %-6s %-5s %-30s", buf1, "local", + zebra_evpn_print_mac_flags(mac, flags_buf, + sizeof(flags_buf)), + ifp ? ifp->name : "-"); + } else { + json_object_string_add(json_mac, "type", "local"); + if (ifp) + json_object_string_add(json_mac, "intf", + ifp->name); + } + if (vid) { + if (json_mac_hdr == NULL) + vty_out(vty, " %-5u", vid); + else + json_object_int_add(json_mac, "vlan", vid); + } else /* No vid? fill out the space */ + if (json_mac_hdr == NULL) + vty_out(vty, " %-5s", ""); + if (json_mac_hdr == NULL) { + vty_out(vty, " %u/%u", mac->loc_seq, mac->rem_seq); + vty_out(vty, "\n"); + } else { + json_object_int_add(json_mac, "localSequence", + mac->loc_seq); + json_object_int_add(json_mac, "remoteSequence", + mac->rem_seq); + json_object_int_add(json_mac, "detectionCount", + mac->dad_count); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + json_object_boolean_true_add(json_mac, + "isDuplicate"); + else + json_object_boolean_false_add(json_mac, + "isDuplicate"); + json_object_object_add(json_mac_hdr, buf1, json_mac); + } + + wctx->count++; + + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + + if ((wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) + && !IPV4_ADDR_SAME(&mac->fwd_info.r_vtep_ip, + &wctx->r_vtep_ip)) + return; + + if (json_mac_hdr == NULL) { + if ((wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) + && (wctx->count == 0)) { + vty_out(vty, "\nVNI %u\n\n", wctx->zevpn->vni); + vty_out(vty, "%-17s %-6s %-5s%-30s %-5s %s\n", + "MAC", "Type", "Flags", + "Intf/Remote ES/VTEP", "VLAN", + "Seq #'s"); + } + if (mac->es == NULL) + inet_ntop(AF_INET, &mac->fwd_info.r_vtep_ip, + addr_buf, sizeof(addr_buf)); + + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %u/%u\n", buf1, + "remote", + zebra_evpn_print_mac_flags(mac, flags_buf, + sizeof(flags_buf)), + mac->es ? mac->es->esi_str : addr_buf, + "", mac->loc_seq, mac->rem_seq); + } else { + json_object_string_add(json_mac, "type", "remote"); + json_object_string_addf(json_mac, "remoteVtep", "%pI4", + &mac->fwd_info.r_vtep_ip); + json_object_object_add(json_mac_hdr, buf1, json_mac); + json_object_int_add(json_mac, "localSequence", + mac->loc_seq); + json_object_int_add(json_mac, "remoteSequence", + mac->rem_seq); + json_object_int_add(json_mac, "detectionCount", + mac->dad_count); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + json_object_boolean_true_add(json_mac, + "isDuplicate"); + else + json_object_boolean_false_add(json_mac, + "isDuplicate"); + } + + wctx->count++; + } +} + +/* + * Print MAC hash entry in detail - called for display of all MACs. + */ +void zebra_evpn_print_mac_hash_detail(struct hash_bucket *bucket, void *ctxt) +{ + struct vty *vty; + json_object *json_mac_hdr = NULL; + struct zebra_mac *mac; + struct mac_walk_ctx *wctx = ctxt; + char buf1[ETHER_ADDR_STRLEN]; + + vty = wctx->vty; + json_mac_hdr = wctx->json; + mac = (struct zebra_mac *)bucket->data; + if (!mac) + return; + + wctx->count++; + prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)); + + zebra_evpn_print_mac(mac, vty, json_mac_hdr); +} + +/* + * Inform BGP about local MACIP. + */ +int zebra_evpn_macip_send_msg_to_client(vni_t vni, + const struct ethaddr *macaddr, + const struct ipaddr *ip, uint8_t flags, + uint32_t seq, int state, + struct zebra_evpn_es *es, uint16_t cmd) +{ + int ipa_len; + struct zserv *client = NULL; + struct stream *s = NULL; + esi_t *esi = es ? &es->esi : zero_esi; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, zebra_vrf_get_evpn_id()); + stream_putl(s, vni); + stream_put(s, macaddr->octet, ETH_ALEN); + if (ip) { + ipa_len = 0; + if (IS_IPADDR_V4(ip)) + ipa_len = IPV4_MAX_BYTELEN; + else if (IS_IPADDR_V6(ip)) + ipa_len = IPV6_MAX_BYTELEN; + + stream_putl(s, ipa_len); /* IP address length */ + if (ipa_len) + stream_put(s, &ip->ip.addr, ipa_len); /* IP address */ + } else + stream_putl(s, 0); /* Just MAC. */ + + if (cmd == ZEBRA_MACIP_ADD) { + stream_putc(s, flags); /* sticky mac/gateway mac */ + stream_putl(s, seq); /* sequence number */ + stream_put(s, esi, sizeof(esi_t)); + } else { + stream_putl(s, state); /* state - active/inactive */ + } + + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) { + char flag_buf[MACIP_BUF_SIZE]; + + zlog_debug( + "Send MACIP %s f %s state %u MAC %pEA IP %pIA seq %u L2-VNI %u ESI %s to %s", + (cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del", + zclient_evpn_dump_macip_flags(flags, flag_buf, + sizeof(flag_buf)), + state, macaddr, ip, seq, vni, es ? es->esi_str : "-", + zebra_route_string(client->proto)); + } + + if (cmd == ZEBRA_MACIP_ADD) + client->macipadd_cnt++; + else + client->macipdel_cnt++; + + return zserv_send_message(client, s); +} + +static unsigned int mac_hash_keymake(const void *p) +{ + const struct zebra_mac *pmac = p; + const void *pnt = (void *)pmac->macaddr.octet; + + return jhash(pnt, ETH_ALEN, 0xa5a5a55a); +} + +/* + * Compare two MAC addresses. + */ +static bool mac_cmp(const void *p1, const void *p2) +{ + const struct zebra_mac *pmac1 = p1; + const struct zebra_mac *pmac2 = p2; + + if (pmac1 == NULL && pmac2 == NULL) + return true; + + if (pmac1 == NULL || pmac2 == NULL) + return false; + + return (memcmp(pmac1->macaddr.octet, pmac2->macaddr.octet, ETH_ALEN) + == 0); +} + +/* + * Callback to allocate MAC hash entry. + */ +static void *zebra_evpn_mac_alloc(void *p) +{ + const struct zebra_mac *tmp_mac = p; + struct zebra_mac *mac; + + mac = XCALLOC(MTYPE_MAC, sizeof(struct zebra_mac)); + *mac = *tmp_mac; + + return ((void *)mac); +} + +/* + * Add MAC entry. + */ +struct zebra_mac *zebra_evpn_mac_add(struct zebra_evpn *zevpn, + const struct ethaddr *macaddr) +{ + struct zebra_mac tmp_mac; + struct zebra_mac *mac = NULL; + + memset(&tmp_mac, 0, sizeof(tmp_mac)); + memcpy(&tmp_mac.macaddr, macaddr, ETH_ALEN); + mac = hash_get(zevpn->mac_table, &tmp_mac, zebra_evpn_mac_alloc); + + mac->zevpn = zevpn; + mac->dad_mac_auto_recovery_timer = NULL; + + mac->neigh_list = list_new(); + mac->neigh_list->cmp = neigh_list_cmp; + + mac->uptime = monotime(NULL); + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug("%s: MAC %pEA flags %s", __func__, + &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf))); + } + return mac; +} + +/* + * Delete MAC entry. + */ +int zebra_evpn_mac_del(struct zebra_evpn *zevpn, struct zebra_mac *mac) +{ + struct zebra_mac *tmp_mac; + + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug("%s: MAC %pEA flags %s", __func__, + &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf))); + } + + /* force de-ref any ES entry linked to the MAC */ + zebra_evpn_es_mac_deref_entry(mac); + + /* remove links to the destination access port */ + zebra_evpn_mac_clear_fwd_info(mac); + + /* Cancel proxy hold timer */ + zebra_evpn_mac_stop_hold_timer(mac); + + /* Cancel auto recovery */ + THREAD_OFF(mac->dad_mac_auto_recovery_timer); + + /* If the MAC is freed before the neigh we will end up + * with a stale pointer against the neigh. + * The situation can arise when a MAC is in remote state + * and its associated neigh is local state. + * zebra_evpn_cfg_cleanup() cleans up remote neighs and MACs. + * Instead of deleting remote MAC, if its neigh list is non-empty + * (associated to local neighs), mark the MAC as AUTO. + */ + if (!list_isempty(mac->neigh_list)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "MAC %pEA (flags 0x%x vni %u) has non-empty neigh list " + "count %u, mark MAC as AUTO", + &mac->macaddr, mac->flags, zevpn->vni, + listcount(mac->neigh_list)); + + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + return 0; + } + + list_delete(&mac->neigh_list); + + /* Free the VNI hash entry and allocated memory. */ + tmp_mac = hash_release(zevpn->mac_table, mac); + XFREE(MTYPE_MAC, tmp_mac); + + return 0; +} + +static bool zebra_evpn_check_mac_del_from_db(struct mac_walk_ctx *wctx, + struct zebra_mac *mac) +{ + if ((wctx->flags & DEL_LOCAL_MAC) && (mac->flags & ZEBRA_MAC_LOCAL)) + return true; + else if ((wctx->flags & DEL_REMOTE_MAC) + && (mac->flags & ZEBRA_MAC_REMOTE)) + return true; + else if ((wctx->flags & DEL_REMOTE_MAC_FROM_VTEP) + && (mac->flags & ZEBRA_MAC_REMOTE) + && IPV4_ADDR_SAME(&mac->fwd_info.r_vtep_ip, &wctx->r_vtep_ip)) + return true; + else if ((wctx->flags & DEL_LOCAL_MAC) && (mac->flags & ZEBRA_MAC_AUTO) + && !listcount(mac->neigh_list)) { + if (IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: Del MAC %pEA flags %s", __func__, + &mac->macaddr, + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + wctx->uninstall = 0; + + return true; + } + + return false; +} + +/* + * Free MAC hash entry (callback) + */ +static void zebra_evpn_mac_del_hash_entry(struct hash_bucket *bucket, void *arg) +{ + struct mac_walk_ctx *wctx = arg; + struct zebra_mac *mac = bucket->data; + + if (zebra_evpn_check_mac_del_from_db(wctx, mac)) { + if (wctx->upd_client && (mac->flags & ZEBRA_MAC_LOCAL)) { + zebra_evpn_mac_send_del_to_client(wctx->zevpn->vni, + &mac->macaddr, + mac->flags, false); + } + if (wctx->uninstall) { + if (zebra_evpn_mac_is_static(mac)) + zebra_evpn_sync_mac_dp_install( + mac, false /* set_inactive */, + true /* force_clear_static */, + __func__); + + if (mac->flags & ZEBRA_MAC_REMOTE) + zebra_evpn_rem_mac_uninstall(wctx->zevpn, mac, + false /*force*/); + } + + zebra_evpn_mac_del(wctx->zevpn, mac); + } + + return; +} + +/* + * Delete all MAC entries for this EVPN. + */ +void zebra_evpn_mac_del_all(struct zebra_evpn *zevpn, int uninstall, + int upd_client, uint32_t flags) +{ + struct mac_walk_ctx wctx; + + if (!zevpn->mac_table) + return; + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.uninstall = uninstall; + wctx.upd_client = upd_client; + wctx.flags = flags; + + hash_iterate(zevpn->mac_table, zebra_evpn_mac_del_hash_entry, &wctx); +} + +/* + * Look up MAC hash entry. + */ +struct zebra_mac *zebra_evpn_mac_lookup(struct zebra_evpn *zevpn, + const struct ethaddr *mac) +{ + struct zebra_mac tmp; + struct zebra_mac *pmac; + + memset(&tmp, 0, sizeof(tmp)); + memcpy(&tmp.macaddr, mac, ETH_ALEN); + pmac = hash_lookup(zevpn->mac_table, &tmp); + + return pmac; +} + +/* + * Inform BGP about local MAC addition. + */ +int zebra_evpn_mac_send_add_to_client(vni_t vni, const struct ethaddr *macaddr, + uint32_t mac_flags, uint32_t seq, + struct zebra_evpn_es *es) +{ + uint8_t flags = 0; + + if (CHECK_FLAG(mac_flags, ZEBRA_MAC_LOCAL_INACTIVE)) { + /* host reachability has not been verified locally */ + + /* if no ES peer is claiming reachability we can't advertise the + * entry + */ + if (!CHECK_FLAG(mac_flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + return 0; + + /* ES peers are claiming reachability; we will + * advertise the entry but with a proxy flag + */ + SET_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT); + } + + if (CHECK_FLAG(mac_flags, ZEBRA_MAC_STICKY)) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + if (CHECK_FLAG(mac_flags, ZEBRA_MAC_DEF_GW)) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + + return zebra_evpn_macip_send_msg_to_client(vni, macaddr, NULL, flags, + seq, ZEBRA_NEIGH_ACTIVE, es, + ZEBRA_MACIP_ADD); +} + +/* + * Inform BGP about local MAC deletion. + */ +int zebra_evpn_mac_send_del_to_client(vni_t vni, const struct ethaddr *macaddr, + uint32_t flags, bool force) +{ + int state = ZEBRA_NEIGH_ACTIVE; + + if (!force) { + if (CHECK_FLAG(flags, ZEBRA_MAC_LOCAL_INACTIVE) + && !CHECK_FLAG(flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + /* the host was not advertised - nothing to delete */ + return 0; + + /* MAC is LOCAL and DUP_DETECTED, this local mobility event + * is not known to bgpd. Upon receiving local delete + * ask bgp to reinstall the best route (remote entry). + */ + if (CHECK_FLAG(flags, ZEBRA_MAC_LOCAL) && + CHECK_FLAG(flags, ZEBRA_MAC_DUPLICATE)) + state = ZEBRA_NEIGH_INACTIVE; + } + + return zebra_evpn_macip_send_msg_to_client( + vni, macaddr, NULL, 0 /* flags */, 0 /* seq */, state, NULL, + ZEBRA_MACIP_DEL); +} + +/* + * wrapper to create a MAC hash table + */ +struct hash *zebra_mac_db_create(const char *desc) +{ + return hash_create_size(8, mac_hash_keymake, mac_cmp, desc); +} + +/* program sync mac flags in the dataplane */ +int zebra_evpn_sync_mac_dp_install(struct zebra_mac *mac, bool set_inactive, + bool force_clear_static, const char *caller) +{ + struct interface *ifp; + bool sticky; + bool set_static; + struct zebra_evpn *zevpn = mac->zevpn; + vlanid_t vid; + struct zebra_if *zif; + struct interface *br_ifp; + + /* If the ES-EVI doesn't exist defer install. When the ES-EVI is + * created we will attempt to install the mac entry again + */ + if (mac->es) { + struct zebra_evpn_es_evi *es_evi; + + es_evi = zebra_evpn_es_evi_find(mac->es, mac->zevpn); + if (!es_evi) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "%s: dp-install sync-mac vni %u mac %pEA es %s 0x%x %sskipped, no es-evi", + caller, zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + mac->flags, + set_inactive ? "inactive " : ""); + return -1; + } + } + + /* get the access vlan from the vxlan_device */ + zebra_evpn_mac_get_access_info(mac, &ifp, &vid); + + if (!ifp) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: dp-install sync-mac vni %u mac %pEA es %s %s%sskipped, no access-port", + caller, zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + set_inactive ? "inactive " : ""); + } + return -1; + } + + zif = ifp->info; + br_ifp = zif->brslave_info.br_if; + if (!br_ifp) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s: dp-install sync-mac vni %u mac %pEA es %s %s%sskipped, no br", + caller, zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + set_inactive ? "inactive " : ""); + } + return -1; + } + + sticky = !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY); + if (force_clear_static) + set_static = false; + else + set_static = zebra_evpn_mac_is_static(mac); + + /* We can install a local mac that has been synced from the peer + * over the VxLAN-overlay/network-port if fast failover is not + * supported and if the local ES is oper-down. + */ + if (mac->es && zebra_evpn_es_local_mac_via_network_port(mac->es)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "dp-%s sync-nw-mac vni %u mac %pEA es %s %s%s", + set_static ? "install" : "uninstall", + zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + set_inactive ? "inactive " : ""); + } + if (set_static) + /* XXX - old_static needs to be computed more + * accurately + */ + zebra_evpn_rem_mac_install(zevpn, mac, + true /* old_static */); + else + zebra_evpn_rem_mac_uninstall(zevpn, mac, + false /* force */); + + return 0; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug("dp-install sync-mac vni %u mac %pEA es %s %s%s%s", + zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + set_static ? "static " : "", + set_inactive ? "inactive " : ""); + } + + dplane_local_mac_add(ifp, br_ifp, vid, &mac->macaddr, sticky, + set_static, set_inactive); + return 0; +} + +void zebra_evpn_mac_send_add_del_to_client(struct zebra_mac *mac, + bool old_bgp_ready, + bool new_bgp_ready) +{ + if (new_bgp_ready) + zebra_evpn_mac_send_add_to_client(mac->zevpn->vni, + &mac->macaddr, mac->flags, + mac->loc_seq, mac->es); + else if (old_bgp_ready) + zebra_evpn_mac_send_del_to_client(mac->zevpn->vni, + &mac->macaddr, mac->flags, + true /* force */); +} + +/* MAC hold timer is used to age out peer-active flag. + * + * During this wait time we expect the dataplane component or an + * external neighmgr daemon to probe existing hosts to independently + * establish their presence on the ES. + */ +static void zebra_evpn_mac_hold_exp_cb(struct thread *t) +{ + struct zebra_mac *mac; + bool old_bgp_ready; + bool new_bgp_ready; + bool old_static; + bool new_static; + + mac = THREAD_ARG(t); + /* the purpose of the hold timer is to age out the peer-active + * flag + */ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + return; + + old_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + old_static = zebra_evpn_mac_is_static(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE); + new_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + new_static = zebra_evpn_mac_is_static(mac); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "sync-mac vni %u mac %pEA es %s %shold expired", + mac->zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf))); + } + + /* re-program the local mac in the dataplane if the mac is no + * longer static + */ + if (old_static != new_static) + zebra_evpn_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + /* inform bgp if needed */ + if (old_bgp_ready != new_bgp_ready) + zebra_evpn_mac_send_add_del_to_client(mac, old_bgp_ready, + new_bgp_ready); +} + +static inline void zebra_evpn_mac_start_hold_timer(struct zebra_mac *mac) +{ + if (mac->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "sync-mac vni %u mac %pEA es %s %shold started", + mac->zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf))); + } + thread_add_timer(zrouter.master, zebra_evpn_mac_hold_exp_cb, mac, + zmh_info->mac_hold_time, &mac->hold_timer); +} + +void zebra_evpn_mac_stop_hold_timer(struct zebra_mac *mac) +{ + if (!mac->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "sync-mac vni %u mac %pEA es %s %shold stopped", + mac->zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf))); + } + + THREAD_OFF(mac->hold_timer); +} + +void zebra_evpn_sync_mac_del(struct zebra_mac *mac) +{ + bool old_static; + bool new_static; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "sync-mac del vni %u mac %pEA es %s seq %d f %s", + mac->zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", mac->loc_seq, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf))); + } + + old_static = zebra_evpn_mac_is_static(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + zebra_evpn_mac_start_hold_timer(mac); + new_static = zebra_evpn_mac_is_static(mac); + + if (old_static != new_static) + /* program the local mac in the kernel */ + zebra_evpn_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, + __func__); +} + +static inline bool zebra_evpn_mac_is_bgp_seq_ok(struct zebra_evpn *zevpn, + struct zebra_mac *mac, + uint32_t seq, uint16_t ipa_len, + const struct ipaddr *ipaddr, + bool sync) +{ + char ipbuf[INET6_ADDRSTRLEN]; + uint32_t tmp_seq; + const char *n_type; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + tmp_seq = mac->loc_seq; + n_type = "local"; + } else { + tmp_seq = mac->rem_seq; + n_type = "remote"; + } + + if (seq < tmp_seq) { + /* if the mac was never advertised to bgp we must accept + * whatever sequence number bgp sends + * XXX - check with Vivek + */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) + && !zebra_evpn_mac_is_ready_for_bgp(mac->flags)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC + || IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s-macip accept vni %u %s-mac %pEA%s%s lower seq %u f %s", + sync ? "sync" : "rem", zevpn->vni, + n_type, + &mac->macaddr, + ipa_len ? " IP " : "", + ipa_len ? ipaddr2str(ipaddr, ipbuf, + sizeof(ipbuf)) + : "", + tmp_seq, + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + + return true; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC || IS_ZEBRA_DEBUG_VXLAN) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "%s-macip ignore vni %u %s-mac %pEA%s%s as existing has higher seq %u f %s", + sync ? "sync" : "rem", zevpn->vni, n_type, + &mac->macaddr, + ipa_len ? " IP " : "", + ipa_len ? ipaddr2str(ipaddr, ipbuf, + sizeof(ipbuf)) + : "", + tmp_seq, + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + return false; + } + + return true; +} + +struct zebra_mac *zebra_evpn_proc_sync_mac_update( + struct zebra_evpn *zevpn, const struct ethaddr *macaddr, + uint16_t ipa_len, const struct ipaddr *ipaddr, uint8_t flags, + uint32_t seq, const esi_t *esi, struct sync_mac_ip_ctx *ctx) +{ + struct zebra_mac *mac; + bool inform_bgp = false; + bool inform_dataplane = false; + bool seq_change = false; + bool es_change = false; + uint32_t tmp_seq; + char ipbuf[INET6_ADDRSTRLEN]; + bool old_local = false; + bool old_bgp_ready; + bool new_bgp_ready; + + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) { + /* if it is a new local path we need to inform both + * the control protocol and the data-plane + */ + inform_bgp = true; + inform_dataplane = true; + ctx->mac_created = true; + ctx->mac_inactive = true; + + /* create the MAC and associate it with the dest ES */ + mac = zebra_evpn_mac_add(zevpn, macaddr); + zebra_evpn_es_mac_ref(mac, esi); + + /* local mac activated by an ES peer */ + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + /* if mac-only route setup peer flags */ + if (!ipa_len) { + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) + SET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY); + else + SET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE); + } + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + old_bgp_ready = false; + new_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + } else { + uint32_t old_flags; + uint32_t new_flags; + bool old_static; + bool new_static; + bool sticky; + bool remote_gw; + + mac->uptime = monotime(NULL); + + old_flags = mac->flags; + sticky = !!CHECK_FLAG(old_flags, ZEBRA_MAC_STICKY); + remote_gw = !!CHECK_FLAG(old_flags, ZEBRA_MAC_REMOTE_DEF_GW); + if (sticky || remote_gw) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "Ignore sync-macip vni %u mac %pEA%s%s%s%s", + zevpn->vni, macaddr, + ipa_len ? " IP " : "", + ipa_len ? ipaddr2str(ipaddr, ipbuf, + sizeof(ipbuf)) + : "", + sticky ? " sticky" : "", + remote_gw ? " remote_gw" : ""); + ctx->ignore_macip = true; + return NULL; + } + if (!zebra_evpn_mac_is_bgp_seq_ok(zevpn, mac, seq, ipa_len, + ipaddr, true)) { + ctx->ignore_macip = true; + return NULL; + } + + old_local = !!CHECK_FLAG(old_flags, ZEBRA_MAC_LOCAL); + old_static = zebra_evpn_mac_is_static(mac); + + /* re-build the mac flags */ + new_flags = 0; + SET_FLAG(new_flags, ZEBRA_MAC_LOCAL); + /* retain old local activity flag */ + if (old_flags & ZEBRA_MAC_LOCAL) { + new_flags |= (old_flags & ZEBRA_MAC_LOCAL_INACTIVE); + } else { + new_flags |= ZEBRA_MAC_LOCAL_INACTIVE; + ctx->mac_inactive = true; + } + if (ipa_len) { + /* if mac-ip route do NOT update the peer flags + * i.e. retain only flags as is + */ + new_flags |= (old_flags & ZEBRA_MAC_ALL_PEER_FLAGS); + } else { + /* if mac-only route update peer flags */ + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) { + SET_FLAG(new_flags, ZEBRA_MAC_ES_PEER_PROXY); + /* if the mac was peer-active previously we + * need to keep the flag and start the + * holdtimer on it. the peer-active flag is + * cleared on holdtimer expiry. + */ + if (CHECK_FLAG(old_flags, + ZEBRA_MAC_ES_PEER_ACTIVE)) { + SET_FLAG(new_flags, + ZEBRA_MAC_ES_PEER_ACTIVE); + zebra_evpn_mac_start_hold_timer(mac); + } + } else { + SET_FLAG(new_flags, ZEBRA_MAC_ES_PEER_ACTIVE); + /* stop hold timer if a peer has verified + * reachability + */ + zebra_evpn_mac_stop_hold_timer(mac); + } + } + mac->rem_seq = 0; + zebra_evpn_mac_clear_fwd_info(mac); + mac->flags = new_flags; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC && (old_flags != new_flags)) { + char mac_buf[MAC_BUF_SIZE], omac_buf[MAC_BUF_SIZE]; + struct zebra_mac omac; + + omac.flags = old_flags; + zlog_debug( + "sync-mac vni %u mac %pEA old_f %snew_f %s", + zevpn->vni, macaddr, + zebra_evpn_zebra_mac_flag_dump( + &omac, omac_buf, sizeof(omac_buf)), + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + + /* update es */ + es_change = zebra_evpn_es_mac_ref(mac, esi); + /* if mac dest change - inform both sides */ + if (es_change) { + inform_bgp = true; + inform_dataplane = true; + ctx->mac_inactive = true; + } + + /* if peer-flag is being set notify dataplane that the + * entry must not be expired because of local inactivity + */ + new_static = zebra_evpn_mac_is_static(mac); + if (old_static != new_static) + inform_dataplane = true; + + old_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(old_flags); + new_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + if (old_bgp_ready != new_bgp_ready) + inform_bgp = true; + } + + + /* update sequence number; if that results in a new local sequence + * inform bgp + */ + tmp_seq = MAX(mac->loc_seq, seq); + if (tmp_seq != mac->loc_seq) { + mac->loc_seq = tmp_seq; + seq_change = true; + inform_bgp = true; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug("sync-mac %s vni %u mac %pEA es %s seq %d f %s%s%s", + ctx->mac_created ? "created" : "updated", + zevpn->vni, macaddr, + mac->es ? mac->es->esi_str : "-", mac->loc_seq, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + inform_bgp ? "inform_bgp" : "", + inform_dataplane ? " inform_dp" : ""); + } + + if (inform_bgp) + zebra_evpn_mac_send_add_del_to_client(mac, old_bgp_ready, + new_bgp_ready); + + /* neighs using the mac may need to be re-sent to + * bgp with updated info + */ + if (seq_change || es_change || !old_local) + zebra_evpn_process_neigh_on_local_mac_change( + zevpn, mac, seq_change, es_change); + + if (inform_dataplane) { + if (ipa_len) + /* if the mac is being created as a part of MAC-IP + * route wait for the neigh to be updated or + * created before programming the mac + */ + ctx->mac_dp_update_deferred = true; + else + /* program the local mac in the kernel. when the ES + * change we need to force the dataplane to reset + * the activity as we are yet to establish activity + * locally + */ + zebra_evpn_sync_mac_dp_install( + mac, ctx->mac_inactive, + false /* force_clear_static */, __func__); + } + + return mac; +} + +/* update local forwarding info. return true if a dest-ES change + * is detected + */ +static bool zebra_evpn_local_mac_update_fwd_info(struct zebra_mac *mac, + struct interface *ifp, + vlanid_t vid) +{ + struct zebra_if *zif = ifp->info; + bool es_change; + ns_id_t local_ns_id = NS_DEFAULT; + struct zebra_vrf *zvrf; + struct zebra_evpn_es *es; + + zvrf = ifp->vrf->info; + if (zvrf && zvrf->zns) + local_ns_id = zvrf->zns->ns_id; + + zebra_evpn_mac_clear_fwd_info(mac); + + es = zif->es_info.es; + if (es && (es->flags & ZEBRA_EVPNES_BYPASS)) + es = NULL; + es_change = zebra_evpn_es_mac_ref_entry(mac, es); + + if (!mac->es) { + /* if es is set fwd_info is not-relevant/taped-out */ + mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.ns_id = local_ns_id; + mac->fwd_info.local.vid = vid; + zebra_evpn_mac_ifp_link(mac, ifp); + } + + return es_change; +} + +/* Notify Local MACs to the clienti, skips GW MAC */ +static void zebra_evpn_send_mac_hash_entry_to_client(struct hash_bucket *bucket, + void *arg) +{ + struct mac_walk_ctx *wctx = arg; + struct zebra_mac *zmac = bucket->data; + + if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_DEF_GW)) + return; + + if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL)) + zebra_evpn_mac_send_add_to_client(wctx->zevpn->vni, + &zmac->macaddr, zmac->flags, + zmac->loc_seq, zmac->es); +} + +/* Iterator to Notify Local MACs of a EVPN */ +void zebra_evpn_send_mac_list_to_client(struct zebra_evpn *zevpn) +{ + struct mac_walk_ctx wctx; + + if (!zevpn->mac_table) + return; + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + + hash_iterate(zevpn->mac_table, zebra_evpn_send_mac_hash_entry_to_client, + &wctx); +} + +void zebra_evpn_rem_mac_del(struct zebra_evpn *zevpn, struct zebra_mac *mac) +{ + zebra_evpn_process_neigh_on_remote_mac_del(zevpn, mac); + /* the remote sequence number in the auto mac entry + * needs to be reset to 0 as the mac entry may have + * been removed on all VTEPs (including + * the originating one) + */ + mac->rem_seq = 0; + + /* If all remote neighbors referencing a remote MAC + * go away, we need to uninstall the MAC. + */ + if (remote_neigh_count(mac) == 0) { + zebra_evpn_rem_mac_uninstall(zevpn, mac, false /*force*/); + zebra_evpn_es_mac_deref_entry(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); + } + + if (list_isempty(mac->neigh_list)) + zebra_evpn_mac_del(zevpn, mac); + else + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); +} + +/* Print Duplicate MAC */ +void zebra_evpn_print_dad_mac_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_mac *mac; + + mac = (struct zebra_mac *)bucket->data; + if (!mac) + return; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + zebra_evpn_print_mac_hash(bucket, ctxt); +} + +/* Print Duplicate MAC in detail */ +void zebra_evpn_print_dad_mac_hash_detail(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_mac *mac; + + mac = (struct zebra_mac *)bucket->data; + if (!mac) + return; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + zebra_evpn_print_mac_hash_detail(bucket, ctxt); +} + +int zebra_evpn_mac_remote_macip_add( + struct zebra_evpn *zevpn, struct zebra_vrf *zvrf, + const struct ethaddr *macaddr, uint16_t ipa_len, + const struct ipaddr *ipaddr, struct zebra_mac **macp, + struct in_addr vtep_ip, uint8_t flags, uint32_t seq, const esi_t *esi) +{ + char buf1[INET6_ADDRSTRLEN]; + bool sticky; + bool remote_gw; + int update_mac = 0; + bool do_dad = false; + bool is_dup_detect = false; + esi_t *old_esi; + bool old_static = false; + struct zebra_mac *mac; + bool old_es_present; + bool new_es_present; + + sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + remote_gw = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + + /* Ignore if the mac is already present as a gateway mac */ + if (mac && CHECK_FLAG(mac->flags, ZEBRA_MAC_DEF_GW) + && CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Ignore remote MACIP ADD VNI %u MAC %pEA%s%s as MAC is already configured as gateway MAC", + zevpn->vni, macaddr, + ipa_len ? " IP " : "", + ipa_len ? ipaddr2str(ipaddr, buf1, sizeof(buf1)) + : ""); + return -1; + } + + old_esi = (mac && mac->es) ? &mac->es->esi : zero_esi; + + /* check if the remote MAC is unknown or has a change. + * If so, that needs to be updated first. Note that client could + * install MAC and MACIP separately or just install the latter. + */ + if (!mac || !CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) + || sticky != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY) + || remote_gw != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW) + || !IPV4_ADDR_SAME(&mac->fwd_info.r_vtep_ip, &vtep_ip) + || memcmp(old_esi, esi, sizeof(esi_t)) || seq != mac->rem_seq) + update_mac = 1; + + if (update_mac) { + if (!mac) { + mac = zebra_evpn_mac_add(zevpn, macaddr); + zebra_evpn_es_mac_ref(mac, esi); + + /* Is this MAC created for a MACIP? */ + if (ipa_len) + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + } else { + /* When host moves but changes its (MAC,IP) + * binding, BGP may install a MACIP entry that + * corresponds to "older" location of the host + * in transient situations (because {IP1,M1} + * is a different route from {IP1,M2}). Check + * the sequence number and ignore this update + * if appropriate. + */ + if (!zebra_evpn_mac_is_bgp_seq_ok( + zevpn, mac, seq, ipa_len, ipaddr, false)) + return -1; + + old_es_present = !!mac->es; + zebra_evpn_es_mac_ref(mac, esi); + new_es_present = !!mac->es; + /* XXX - dataplane is curently not able to handle a MAC + * replace if the destination changes from L2-NHG to + * single VTEP and vice-versa. So delete the old entry + * and re-install + */ + if (old_es_present != new_es_present) + zebra_evpn_rem_mac_uninstall(zevpn, mac, false); + } + + /* Check MAC's curent state is local (this is the case + * where MAC has moved from L->R) and check previous + * detection started via local learning. + * RFC-7432: A PE/VTEP that detects a MAC mobility + * event via local learning starts an M-second timer. + * + * VTEP-IP or seq. change alone is not considered + * for dup. detection. + * + * MAC is already marked duplicate set dad, then + * is_dup_detect will be set to not install the entry. + */ + if ((!CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) + && mac->dad_count) + || CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + do_dad = true; + + /* Remove local MAC from BGP. */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + /* force drop the sync flags */ + old_static = zebra_evpn_mac_is_static(mac); + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "sync-mac->remote vni %u mac %pEA es %s seq %d f %s", + zevpn->vni, macaddr, + mac->es ? mac->es->esi_str : "-", + mac->loc_seq, + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + + zebra_evpn_mac_clear_sync_info(mac); + zebra_evpn_mac_send_del_to_client(zevpn->vni, macaddr, + mac->flags, + false /* force */); + } + + /* Set "auto" and "remote" forwarding info. */ + zebra_evpn_mac_clear_fwd_info(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS); + SET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); + mac->fwd_info.r_vtep_ip = vtep_ip; + + if (sticky) + SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + else + UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + + if (remote_gw) + SET_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW); + else + UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW); + + zebra_evpn_dup_addr_detect_for_mac( + zvrf, mac, mac->fwd_info.r_vtep_ip, do_dad, + &is_dup_detect, false); + + if (!is_dup_detect) { + zebra_evpn_process_neigh_on_remote_mac_add(zevpn, mac); + /* Install the entry. */ + zebra_evpn_rem_mac_install(zevpn, mac, old_static); + } + } + + /* Update seq number. */ + mac->rem_seq = seq; + + /* If there is no IP, return after clearing AUTO flag of MAC. */ + if (!ipa_len) { + UNSET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + return -1; + } + *macp = mac; + return 0; +} + +int zebra_evpn_add_update_local_mac(struct zebra_vrf *zvrf, + struct zebra_evpn *zevpn, + struct interface *ifp, + const struct ethaddr *macaddr, vlanid_t vid, + bool sticky, bool local_inactive, + bool dp_static, struct zebra_mac *mac) +{ + bool mac_sticky = false; + bool inform_client = false; + bool upd_neigh = false; + bool is_dup_detect = false; + struct in_addr vtep_ip = {.s_addr = 0}; + bool es_change = false; + bool new_bgp_ready; + /* assume inactive if not present or if not local */ + bool old_local_inactive = true; + bool old_bgp_ready = false; + bool inform_dataplane = false; + bool new_static = false; + + assert(ifp); + /* Check if we need to create or update or it is a NO-OP. */ + if (!mac) + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) { + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "ADD %sMAC %pEA intf %s(%u) VID %u -> VNI %u%s", + sticky ? "sticky " : "", macaddr, + ifp->name, ifp->ifindex, vid, zevpn->vni, + local_inactive ? " local-inactive" : ""); + + mac = zebra_evpn_mac_add(zevpn, macaddr); + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + es_change = zebra_evpn_local_mac_update_fwd_info(mac, ifp, vid); + if (sticky) + SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + inform_client = true; + } else { + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "UPD %sMAC %pEA intf %s(%u) VID %u -> VNI %u %scurFlags %s", + sticky ? "sticky " : "", macaddr, + ifp->name, ifp->ifindex, vid, zevpn->vni, + local_inactive ? "local-inactive " : "", + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + struct interface *old_ifp; + vlanid_t old_vid; + bool old_static; + + zebra_evpn_mac_get_access_info(mac, &old_ifp, &old_vid); + old_bgp_ready = + zebra_evpn_mac_is_ready_for_bgp(mac->flags); + old_local_inactive = + !!(mac->flags & ZEBRA_MAC_LOCAL_INACTIVE); + old_static = zebra_evpn_mac_is_static(mac); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)) + mac_sticky = true; + es_change = zebra_evpn_local_mac_update_fwd_info( + mac, ifp, vid); + + /* + * Update any changes and if changes are relevant to + * BGP, note it. + */ + if (mac_sticky == sticky && old_ifp == ifp + && old_vid == vid + && old_local_inactive == local_inactive + && dp_static == old_static && !es_change) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + " Add/Update %sMAC %pEA intf %s(%u) VID %u -> VNI %u%s, " + "entry exists and has not changed ", + sticky ? "sticky " : "", + macaddr, ifp->name, + ifp->ifindex, vid, zevpn->vni, + local_inactive + ? " local_inactive" + : ""); + return 0; + } + if (mac_sticky != sticky) { + if (sticky) + SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + else + UNSET_FLAG(mac->flags, + ZEBRA_MAC_STICKY); + inform_client = true; + } + + /* If an es_change is detected we need to advertise + * the route with a sequence that is one + * greater. This is need to indicate a mac-move + * to the ES peers + */ + if (es_change) { + /* update the sequence number only if the entry + * is locally active + */ + if (!local_inactive) + mac->loc_seq = mac->loc_seq + 1; + /* force drop the peer/sync info as it is + * simply no longer relevant + */ + if (CHECK_FLAG(mac->flags, + ZEBRA_MAC_ALL_PEER_FLAGS)) { + zebra_evpn_mac_clear_sync_info(mac); + new_static = + zebra_evpn_mac_is_static(mac); + /* if we clear peer-flags we + * also need to notify the dataplane + * to drop the static flag + */ + if (old_static != new_static) + inform_dataplane = true; + } + } + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) + || CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) { + bool do_dad = false; + + /* + * MAC has either moved or was "internally" created due + * to a neighbor learn and is now actually learnt. If + * it was learnt as a remote sticky MAC, this is an + * operator error. + */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)) { + flog_warn( + EC_ZEBRA_STICKY_MAC_ALREADY_LEARNT, + "MAC %pEA already learnt as remote sticky MAC behind VTEP %pI4 VNI %u", + macaddr, + &mac->fwd_info.r_vtep_ip, + zevpn->vni); + return 0; + } + + /* If an actual move, compute MAC's seq number */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + mac->loc_seq = + MAX(mac->rem_seq + 1, mac->loc_seq); + vtep_ip = mac->fwd_info.r_vtep_ip; + /* Trigger DAD for remote MAC */ + do_dad = true; + } + + UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); + UNSET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + es_change = zebra_evpn_local_mac_update_fwd_info( + mac, ifp, vid); + if (sticky) + SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + else + UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + /* + * We have to inform BGP of this MAC as well as process + * all neighbors. + */ + inform_client = true; + upd_neigh = true; + + zebra_evpn_dup_addr_detect_for_mac( + zvrf, mac, vtep_ip, do_dad, &is_dup_detect, + true); + if (is_dup_detect) { + inform_client = false; + upd_neigh = false; + es_change = false; + } + } + } + + /* if the dataplane thinks the entry is sync but it is + * not sync in zebra (or vice-versa) we need to re-install + * to fixup + */ + new_static = zebra_evpn_mac_is_static(mac); + if (dp_static != new_static) + inform_dataplane = true; + + if (local_inactive) + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + else + UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + + new_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + /* if local-activity has changed we need update bgp + * even if bgp already knows about the mac + */ + if ((old_local_inactive != local_inactive) + || (new_bgp_ready != old_bgp_ready)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "local mac vni %u mac %pEA es %s seq %d f %s%s", + zevpn->vni, macaddr, + mac->es ? mac->es->esi_str : "", mac->loc_seq, + zebra_evpn_zebra_mac_flag_dump(mac, mac_buf, + sizeof(mac_buf)), + local_inactive ? "local-inactive" : ""); + } + + if (!is_dup_detect) + inform_client = true; + } + + if (es_change) { + inform_client = true; + upd_neigh = true; + } + + /* Inform dataplane if required. */ + if (inform_dataplane) + zebra_evpn_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + /* Inform BGP if required. */ + if (inform_client) + zebra_evpn_mac_send_add_del_to_client(mac, old_bgp_ready, + new_bgp_ready); + + /* Process all neighbors associated with this MAC, if required. */ + if (upd_neigh) + zebra_evpn_process_neigh_on_local_mac_change(zevpn, mac, 0, + es_change); + + return 0; +} + +int zebra_evpn_del_local_mac(struct zebra_evpn *zevpn, struct zebra_mac *mac, + bool clear_static) +{ + bool old_bgp_ready; + bool new_bgp_ready; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("DEL MAC %pEA VNI %u seq %u flags 0x%x nbr count %u", + &mac->macaddr, zevpn->vni, mac->loc_seq, mac->flags, + listcount(mac->neigh_list)); + + old_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + if (!clear_static && zebra_evpn_mac_is_static(mac)) { + /* this is a synced entry and can only be removed when the + * es-peers stop advertising it. + */ + zebra_evpn_mac_clear_fwd_info(mac); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char mac_buf[MAC_BUF_SIZE]; + + zlog_debug( + "re-add sync-mac vni %u mac %pEA es %s seq %d f %s", + zevpn->vni, &mac->macaddr, + mac->es ? mac->es->esi_str : "-", mac->loc_seq, + zebra_evpn_zebra_mac_flag_dump( + mac, mac_buf, sizeof(mac_buf))); + } + + /* inform-bgp about change in local-activity if any */ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) { + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + new_bgp_ready = + zebra_evpn_mac_is_ready_for_bgp(mac->flags); + zebra_evpn_mac_send_add_del_to_client( + mac, old_bgp_ready, new_bgp_ready); + } + + /* re-install the inactive entry in the kernel */ + zebra_evpn_sync_mac_dp_install(mac, true /* set_inactive */, + false /* force_clear_static */, + __func__); + + return 0; + } + + /* flush the peer info */ + zebra_evpn_mac_clear_sync_info(mac); + + /* Update all the neigh entries associated with this mac */ + zebra_evpn_process_neigh_on_local_mac_del(zevpn, mac); + + /* Remove MAC from BGP. */ + zebra_evpn_mac_send_del_to_client(zevpn->vni, &mac->macaddr, mac->flags, + clear_static /* force */); + + zebra_evpn_es_mac_deref_entry(mac); + + /* remove links to the destination access port */ + zebra_evpn_mac_clear_fwd_info(mac); + + /* + * If there are no neigh associated with the mac delete the mac + * else mark it as AUTO for forward reference + */ + if (!listcount(mac->neigh_list)) { + zebra_evpn_mac_del(zevpn, mac); + } else { + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS); + UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + } + + return 0; +} + +void zebra_evpn_mac_gw_macip_add(struct interface *ifp, + struct zebra_evpn *zevpn, + const struct ipaddr *ip, + struct zebra_mac **macp, + const struct ethaddr *macaddr, + vlanid_t vlan_id, bool def_gw) +{ + struct zebra_mac *mac; + ns_id_t local_ns_id = NS_DEFAULT; + struct zebra_vrf *zvrf; + + zvrf = ifp->vrf->info; + if (zvrf && zvrf->zns) + local_ns_id = zvrf->zns->ns_id; + + if (!*macp) { + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) + mac = zebra_evpn_mac_add(zevpn, macaddr); + *macp = mac; + } else + mac = *macp; + + /* Set "local" forwarding info. */ + zebra_evpn_mac_clear_fwd_info(mac); + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + if (def_gw) + SET_FLAG(mac->flags, ZEBRA_MAC_DEF_GW); + else + SET_FLAG(mac->flags, ZEBRA_MAC_SVI); + mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.ns_id = local_ns_id; + mac->fwd_info.local.vid = vlan_id; +} + +void zebra_evpn_mac_svi_del(struct interface *ifp, struct zebra_evpn *zevpn) +{ + struct zebra_mac *mac; + struct ethaddr macaddr; + bool old_bgp_ready; + + if (!zebra_evpn_mh_do_adv_svi_mac()) + return; + + memcpy(&macaddr.octet, ifp->hw_addr, ETH_ALEN); + mac = zebra_evpn_mac_lookup(zevpn, &macaddr); + if (mac && CHECK_FLAG(mac->flags, ZEBRA_MAC_SVI)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("SVI %s mac free", ifp->name); + + old_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + UNSET_FLAG(mac->flags, ZEBRA_MAC_SVI); + zebra_evpn_mac_send_add_del_to_client(mac, old_bgp_ready, + false); + zebra_evpn_deref_ip2mac(mac->zevpn, mac); + } +} + +void zebra_evpn_mac_svi_add(struct interface *ifp, struct zebra_evpn *zevpn) +{ + struct zebra_mac *mac = NULL; + struct ethaddr macaddr; + struct zebra_if *zif = ifp->info; + bool old_bgp_ready; + bool new_bgp_ready; + + if (!zebra_evpn_mh_do_adv_svi_mac() + || !zebra_evpn_send_to_client_ok(zevpn)) + return; + + memcpy(&macaddr.octet, ifp->hw_addr, ETH_ALEN); + + /* dup check */ + mac = zebra_evpn_mac_lookup(zevpn, &macaddr); + if (mac && CHECK_FLAG(mac->flags, ZEBRA_MAC_SVI)) + return; + + /* add/update mac */ + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("SVI %s mac add", zif->ifp->name); + + old_bgp_ready = (mac && zebra_evpn_mac_is_ready_for_bgp(mac->flags)) + ? true + : false; + + zebra_evpn_mac_gw_macip_add(ifp, zevpn, NULL, &mac, &macaddr, 0, false); + + new_bgp_ready = zebra_evpn_mac_is_ready_for_bgp(mac->flags); + zebra_evpn_mac_send_add_del_to_client(mac, old_bgp_ready, + new_bgp_ready); +} diff --git a/zebra/zebra_evpn_mac.h b/zebra/zebra_evpn_mac.h new file mode 100644 index 0000000..b727ac1 --- /dev/null +++ b/zebra/zebra_evpn_mac.h @@ -0,0 +1,296 @@ +/* + * Zebra EVPN MAC Data structures and definitions + * These are "internal" to this function. + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * Copyright (C) 2020 Volta Networks. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_EVPN_MAC_H +#define _ZEBRA_EVPN_MAC_H + +#ifdef __cplusplus +extern "C" { +#endif + + +struct host_rb_entry { + RB_ENTRY(host_rb_entry) hl_entry; + + struct prefix p; +}; + +RB_HEAD(host_rb_tree_entry, host_rb_entry); +RB_PROTOTYPE(host_rb_tree_entry, host_rb_entry, hl_entry, + host_rb_entry_compare); +/* + * MAC hash table. + * + * This table contains the MAC addresses pertaining to this VNI. + * This includes local MACs learnt on an attached VLAN that maps + * to this VNI as well as remote MACs learnt and installed by BGP. + * Local MACs will be known either on a VLAN sub-interface or + * on (port, VLAN); however, it is sufficient for zebra to maintain + * against the VNI i.e., it does not need to retain the local "port" + * information. The correct VNI will be obtained as zebra maintains + * the mapping (of VLAN to VNI). + */ +struct zebra_mac { + /* MAC address. */ + struct ethaddr macaddr; + + /* When modifying flags please fixup zebra_evpn_zebra_mac_flag_dump */ + uint32_t flags; +#define ZEBRA_MAC_LOCAL 0x01 +#define ZEBRA_MAC_REMOTE 0x02 +#define ZEBRA_MAC_AUTO 0x04 /* Auto created for neighbor. */ +#define ZEBRA_MAC_STICKY 0x08 /* Static MAC */ +#define ZEBRA_MAC_REMOTE_RMAC 0x10 /* remote router mac */ +#define ZEBRA_MAC_DEF_GW 0x20 +/* remote VTEP advertised MAC as default GW */ +#define ZEBRA_MAC_REMOTE_DEF_GW 0x40 +#define ZEBRA_MAC_DUPLICATE 0x80 +#define ZEBRA_MAC_FPM_SENT 0x100 /* whether or not this entry was sent. */ +/* MAC is locally active on an ethernet segment peer */ +#define ZEBRA_MAC_ES_PEER_ACTIVE 0x200 +/* MAC has been proxy-advertised by peers. This means we need to + * keep the entry for forwarding but cannot advertise it + */ +#define ZEBRA_MAC_ES_PEER_PROXY 0x400 +/* We have not been able to independently establish that the host is + * local connected but one or more ES peers claims it is. + * We will maintain the entry for forwarding purposes and continue + * to advertise it as locally attached but with a "proxy" flag + */ +#define ZEBRA_MAC_LOCAL_INACTIVE 0x800 +/* The MAC entry was created because of advertise_svi_mac */ +#define ZEBRA_MAC_SVI 0x1000 + +#define ZEBRA_MAC_ALL_LOCAL_FLAGS (ZEBRA_MAC_LOCAL | ZEBRA_MAC_LOCAL_INACTIVE) +#define ZEBRA_MAC_ALL_PEER_FLAGS \ + (ZEBRA_MAC_ES_PEER_PROXY | ZEBRA_MAC_ES_PEER_ACTIVE) + + /* back pointer to zevpn */ + struct zebra_evpn *zevpn; + + /* Local or remote info. + * Note: fwd_info is only relevant if mac->es is NULL. + */ + union { + struct { + ifindex_t ifindex; + ns_id_t ns_id; + vlanid_t vid; + } local; + + struct in_addr r_vtep_ip; + } fwd_info; + + /* Local or remote ES */ + struct zebra_evpn_es *es; + /* memory used to link the mac to the es */ + struct listnode es_listnode; + + /* access-port/bridge member. only relevant for local macs that + * are associated with a zero-ESI, + * XXX - this belongs in fwd_info.local; however fwd_info is + * being cleared and memset to zero in different ways that can + * mess up the links. + */ + struct interface *ifp; + /* memory used to link the mac to the ifp */ + struct listnode ifp_listnode; + + /* Mobility sequence numbers associated with this entry. */ + uint32_t rem_seq; + uint32_t loc_seq; + + /* List of neigh associated with this mac */ + struct list *neigh_list; + + /* List of nexthop associated with this RMAC */ + struct list *nh_list; + + /* Duplicate mac detection */ + uint32_t dad_count; + + struct thread *dad_mac_auto_recovery_timer; + + struct timeval detect_start_time; + + time_t dad_dup_detect_time; + + /* used for ageing out the PEER_ACTIVE flag */ + struct thread *hold_timer; + + /* number of neigh entries (using this mac) that have + * ZEBRA_MAC_ES_PEER_ACTIVE or ZEBRA_NEIGH_ES_PEER_PROXY + */ + uint32_t sync_neigh_cnt; + + time_t uptime; +}; + +/* + * Context for MAC hash walk - used by callbacks. + */ +struct mac_walk_ctx { + struct zebra_evpn *zevpn; /* EVPN hash */ + struct zebra_vrf *zvrf; /* VRF - for client notification. */ + int uninstall; /* uninstall from kernel? */ + int upd_client; /* uninstall from client? */ + + uint32_t flags; +#define DEL_LOCAL_MAC 0x1 +#define DEL_REMOTE_MAC 0x2 +#define DEL_ALL_MAC (DEL_LOCAL_MAC | DEL_REMOTE_MAC) +#define DEL_REMOTE_MAC_FROM_VTEP 0x4 +#define SHOW_REMOTE_MAC_FROM_VTEP 0x8 + + struct in_addr r_vtep_ip; /* To walk MACs from specific VTEP */ + + struct vty *vty; /* Used by VTY handlers */ + uint32_t count; /* Used by VTY handlers */ + struct json_object *json; /* Used for JSON Output */ + bool print_dup; /* Used to print dup addr list */ +}; + +struct rmac_walk_ctx { + struct vty *vty; + struct json_object *json; +}; + +/* temporary datastruct to pass info between the mac-update and + * neigh-update while handling mac-ip routes + */ +struct sync_mac_ip_ctx { + bool ignore_macip; + bool mac_created; + bool mac_inactive; + bool mac_dp_update_deferred; + struct zebra_mac *mac; +}; + +/**************************** SYNC MAC handling *****************************/ +/* if the mac has been added of a mac-route from the peer + * or if it is being referenced by a neigh added by the + * peer we cannot let it age out i.e. we set the static bit + * in the dataplane + */ +static inline bool zebra_evpn_mac_is_static(struct zebra_mac *mac) +{ + return ((mac->flags & ZEBRA_MAC_ALL_PEER_FLAGS) || mac->sync_neigh_cnt); +} + +/* mac needs to be locally active or active on an ES peer */ +static inline bool zebra_evpn_mac_is_ready_for_bgp(uint32_t flags) +{ + return (flags & ZEBRA_MAC_LOCAL) + && (!(flags & ZEBRA_MAC_LOCAL_INACTIVE) + || (flags & ZEBRA_MAC_ES_PEER_ACTIVE)); +} + +void zebra_evpn_mac_stop_hold_timer(struct zebra_mac *mac); + +static inline void zebra_evpn_mac_clear_sync_info(struct zebra_mac *mac) +{ + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_PEER_FLAGS); + zebra_evpn_mac_stop_hold_timer(mac); +} + +static inline bool zebra_evpn_mac_in_use(struct zebra_mac *mac) +{ + return !list_isempty(mac->neigh_list) + || CHECK_FLAG(mac->flags, ZEBRA_MAC_SVI); +} + +struct hash *zebra_mac_db_create(const char *desc); +uint32_t num_valid_macs(struct zebra_evpn *zevi); +uint32_t num_dup_detected_macs(struct zebra_evpn *zevi); +int zebra_evpn_rem_mac_uninstall(struct zebra_evpn *zevi, struct zebra_mac *mac, + bool force); +int zebra_evpn_rem_mac_install(struct zebra_evpn *zevi, struct zebra_mac *mac, + bool was_static); +void zebra_evpn_deref_ip2mac(struct zebra_evpn *zevi, struct zebra_mac *mac); +struct zebra_mac *zebra_evpn_mac_lookup(struct zebra_evpn *zevi, + const struct ethaddr *mac); +struct zebra_mac *zebra_evpn_mac_add(struct zebra_evpn *zevi, + const struct ethaddr *macaddr); +int zebra_evpn_mac_del(struct zebra_evpn *zevi, struct zebra_mac *mac); +int zebra_evpn_macip_send_msg_to_client(uint32_t id, + const struct ethaddr *macaddr, + const struct ipaddr *ip, uint8_t flags, + uint32_t seq, int state, + struct zebra_evpn_es *es, uint16_t cmd); +void zebra_evpn_print_mac(struct zebra_mac *mac, void *ctxt, json_object *json); +void zebra_evpn_print_mac_hash(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_print_mac_hash_detail(struct hash_bucket *bucket, void *ctxt); +int zebra_evpn_sync_mac_dp_install(struct zebra_mac *mac, bool set_inactive, + bool force_clear_static, const char *caller); +void zebra_evpn_mac_send_add_del_to_client(struct zebra_mac *mac, + bool old_bgp_ready, + bool new_bgp_ready); + +void zebra_evpn_mac_del_all(struct zebra_evpn *zevi, int uninstall, + int upd_client, uint32_t flags); +int zebra_evpn_mac_send_add_to_client(vni_t vni, const struct ethaddr *macaddr, + uint32_t mac_flags, uint32_t seq, + struct zebra_evpn_es *es); +int zebra_evpn_mac_send_del_to_client(vni_t vni, const struct ethaddr *macaddr, + uint32_t flags, bool force); +void zebra_evpn_send_mac_list_to_client(struct zebra_evpn *zevi); +struct zebra_mac *zebra_evpn_proc_sync_mac_update( + struct zebra_evpn *zevi, const struct ethaddr *macaddr, + uint16_t ipa_len, const struct ipaddr *ipaddr, uint8_t flags, + uint32_t seq, const esi_t *esi, struct sync_mac_ip_ctx *ctx); +void zebra_evpn_sync_mac_del(struct zebra_mac *mac); +void zebra_evpn_rem_mac_del(struct zebra_evpn *zevi, struct zebra_mac *mac); +void zebra_evpn_print_dad_mac_hash(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_print_dad_mac_hash_detail(struct hash_bucket *bucket, + void *ctxt); +int zebra_evpn_mac_remote_macip_add( + struct zebra_evpn *zevpn, struct zebra_vrf *zvrf, + const struct ethaddr *macaddr, uint16_t ipa_len, + const struct ipaddr *ipaddr, struct zebra_mac **macp, + struct in_addr vtep_ip, uint8_t flags, uint32_t seq, const esi_t *esi); + +int zebra_evpn_add_update_local_mac(struct zebra_vrf *zvrf, + struct zebra_evpn *zevpn, + struct interface *ifp, + const struct ethaddr *macaddr, vlanid_t vid, + bool sticky, bool local_inactive, + bool dp_static, struct zebra_mac *mac); +int zebra_evpn_del_local_mac(struct zebra_evpn *zevpn, struct zebra_mac *mac, + bool clear_static); +void zebra_evpn_mac_gw_macip_add(struct interface *ifp, + struct zebra_evpn *zevpn, + const struct ipaddr *ip, + struct zebra_mac **macp, + const struct ethaddr *macaddr, + vlanid_t vlan_id, bool def_gw); +void zebra_evpn_mac_svi_add(struct interface *ifp, struct zebra_evpn *zevpn); +void zebra_evpn_mac_svi_del(struct interface *ifp, struct zebra_evpn *zevpn); +void zebra_evpn_mac_ifp_del(struct interface *ifp); +void zebra_evpn_mac_clear_fwd_info(struct zebra_mac *zmac); + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_EVPN_MAC_H */ diff --git a/zebra/zebra_evpn_mh.c b/zebra/zebra_evpn_mh.c new file mode 100644 index 0000000..6f6ba25 --- /dev/null +++ b/zebra/zebra_evpn_mh.c @@ -0,0 +1,4039 @@ +/* + * Zebra EVPN multihoming code + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <zebra.h> + +#include "command.h" +#include "hash.h" +#include "if.h" +#include "jhash.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "stream.h" +#include "table.h" +#include "vlan.h" +#include "vxlan.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_l2.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_nhg.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZACC_BD, "Access Broadcast Domain"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES, "Ethernet Segment"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES_EVI, "ES info per-EVI"); +DEFINE_MTYPE_STATIC(ZEBRA, ZMH_INFO, "MH global info"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES_VTEP, "VTEP attached to the ES"); +DEFINE_MTYPE_STATIC(ZEBRA, L2_NH, "L2 nexthop"); + +static void zebra_evpn_es_get_one_base_evpn(void); +static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, + struct zebra_evpn *zevpn, bool add); +static void zebra_evpn_local_es_del(struct zebra_evpn_es **esp); +static int zebra_evpn_local_es_update(struct zebra_if *zif, esi_t *esi); +static bool zebra_evpn_es_br_port_dplane_update(struct zebra_evpn_es *es, + const char *caller); +static void zebra_evpn_mh_uplink_cfg_update(struct zebra_if *zif, bool set); +static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es, + bool resync_dplane); +static void zebra_evpn_mh_clear_protodown_es(struct zebra_evpn_es *es); +static void zebra_evpn_mh_startup_delay_timer_start(const char *rc); + +esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; + +/*****************************************************************************/ +/* Ethernet Segment to EVI association - + * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI + * (struct zebra_evpn.es_evi_rb_tree). + * 2. Each local ES-EVI entry is sent to BGP which advertises it as an + * EAD-EVI (Type-1 EVPN) route + * 3. Local ES-EVI setup is re-evaluated on the following triggers - + * a. When an ESI is set or cleared on an access port. + * b. When an access port associated with an ESI is deleted. + * c. When VLAN member ship changes on an access port. + * d. When a VXLAN_IF is set or cleared on an access broadcast domain. + * e. When a L2-VNI is added or deleted for a VxLAN_IF. + * 4. Currently zebra doesn't remote ES-EVIs. Those are managed and maintained + * entirely in BGP which consolidates them into a remote ES. The remote ES + * is then sent to zebra which allocates a NHG for it. + */ + +/* compare ES-IDs for the ES-EVI RB tree maintained per-EVPN */ +static int zebra_es_evi_rb_cmp(const struct zebra_evpn_es_evi *es_evi1, + const struct zebra_evpn_es_evi *es_evi2) +{ + return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES); +} +RB_GENERATE(zebra_es_evi_rb_head, zebra_evpn_es_evi, + rb_node, zebra_es_evi_rb_cmp); + +/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES + * tables. + */ +static struct zebra_evpn_es_evi *zebra_evpn_es_evi_new(struct zebra_evpn_es *es, + struct zebra_evpn *zevpn) +{ + struct zebra_evpn_es_evi *es_evi; + + es_evi = XCALLOC(MTYPE_ZES_EVI, sizeof(struct zebra_evpn_es_evi)); + + es_evi->es = es; + es_evi->zevpn = zevpn; + + /* insert into the EVPN-ESI rb tree */ + RB_INSERT(zebra_es_evi_rb_head, &zevpn->es_evi_rb_tree, es_evi); + + /* add to the ES's VNI list */ + listnode_init(&es_evi->es_listnode, es_evi); + listnode_add(es->es_evi_list, &es_evi->es_listnode); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s evi %d new", + es_evi->es->esi_str, es_evi->zevpn->vni); + + return es_evi; +} + +/* Evaluate if the es_evi is ready to be sent BGP - + * 1. If it is ready an add is sent to BGP + * 2. If it is not ready a del is sent (if the ES had been previously added + * to BGP). + */ +static void zebra_evpn_es_evi_re_eval_send_to_client( + struct zebra_evpn_es_evi *es_evi) +{ + bool old_ready; + bool new_ready; + + old_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP); + + /* ES and L2-VNI have to be individually ready for BGP */ + if ((es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) && + (es_evi->es->flags & ZEBRA_EVPNES_READY_FOR_BGP) && + zebra_evpn_send_to_client_ok(es_evi->zevpn)) + es_evi->flags |= ZEBRA_EVPNES_EVI_READY_FOR_BGP; + else + es_evi->flags &= ~ZEBRA_EVPNES_EVI_READY_FOR_BGP; + + new_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP); + + if (old_ready == new_ready) + return; + + if (new_ready) + zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zevpn, + true /* add */); + else + zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zevpn, + false /* add */); +} + +/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free + * up the memory. + */ +static void zebra_evpn_es_evi_free(struct zebra_evpn_es_evi *es_evi) +{ + struct zebra_evpn_es *es = es_evi->es; + struct zebra_evpn *zevpn = es_evi->zevpn; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s evi %d free", + es_evi->es->esi_str, es_evi->zevpn->vni); + + /* remove from the ES's VNI list */ + list_delete_node(es->es_evi_list, &es_evi->es_listnode); + + /* remove from the VNI-ESI rb tree */ + RB_REMOVE(zebra_es_evi_rb_head, &zevpn->es_evi_rb_tree, es_evi); + + /* remove from the VNI-ESI rb tree */ + XFREE(MTYPE_ZES_EVI, es_evi); +} + +/* find the ES-EVI in the per-L2-VNI RB tree */ +struct zebra_evpn_es_evi *zebra_evpn_es_evi_find(struct zebra_evpn_es *es, + struct zebra_evpn *zevpn) +{ + struct zebra_evpn_es_evi es_evi; + + es_evi.es = es; + + return RB_FIND(zebra_es_evi_rb_head, &zevpn->es_evi_rb_tree, &es_evi); +} + +/* Tell BGP about an ES-EVI deletion and then delete it */ +static void zebra_evpn_local_es_evi_do_del(struct zebra_evpn_es_evi *es_evi) +{ + if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("local es %s evi %d del", + es_evi->es->esi_str, es_evi->zevpn->vni); + + if (es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP) { + /* send a del only if add was sent for it earlier */ + zebra_evpn_es_evi_send_to_client(es_evi->es, + es_evi->zevpn, false /* add */); + } + + /* delete it from the EVPN's local list */ + list_delete_node(es_evi->zevpn->local_es_evi_list, + &es_evi->l2vni_listnode); + + es_evi->flags &= ~ZEBRA_EVPNES_EVI_LOCAL; + zebra_evpn_es_evi_free(es_evi); +} +static void zebra_evpn_local_es_evi_del(struct zebra_evpn_es *es, + struct zebra_evpn *zevpn) +{ + struct zebra_evpn_es_evi *es_evi; + + es_evi = zebra_evpn_es_evi_find(es, zevpn); + if (es_evi) + zebra_evpn_local_es_evi_do_del(es_evi); +} + +/* If there are any existing MAC entries for this es/zevpn we need + * to install it in the dataplane. + * + * Note: primary purpose of this is to handle es del/re-add windows where + * sync MAC entries may be added by bgpd before the es-evi membership is + * created in the dataplane and in zebra + */ +static void zebra_evpn_es_evi_mac_install(struct zebra_evpn_es_evi *es_evi) +{ + struct zebra_mac *mac; + struct listnode *node; + struct zebra_evpn_es *es = es_evi->es; + + if (listcount(es->mac_list) && IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("dp-mac install on es %s evi %d add", es->esi_str, + es_evi->zevpn->vni); + + for (ALL_LIST_ELEMENTS_RO(es->mac_list, node, mac)) { + if (mac->zevpn != es_evi->zevpn) + continue; + + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + continue; + + zebra_evpn_sync_mac_dp_install(mac, false, false, __func__); + } +} + +/* Create an ES-EVI if it doesn't already exist and tell BGP */ +static void zebra_evpn_local_es_evi_add(struct zebra_evpn_es *es, + struct zebra_evpn *zevpn) +{ + struct zebra_evpn_es_evi *es_evi; + + es_evi = zebra_evpn_es_evi_find(es, zevpn); + if (!es_evi) { + es_evi = zebra_evpn_es_evi_new(es, zevpn); + if (!es_evi) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("local es %s evi %d add", + es_evi->es->esi_str, es_evi->zevpn->vni); + es_evi->flags |= ZEBRA_EVPNES_EVI_LOCAL; + /* add to the EVPN's local list */ + listnode_init(&es_evi->l2vni_listnode, es_evi); + listnode_add(zevpn->local_es_evi_list, &es_evi->l2vni_listnode); + + zebra_evpn_es_evi_re_eval_send_to_client(es_evi); + + zebra_evpn_es_evi_mac_install(es_evi); + } +} + +static void zebra_evpn_es_evi_show_entry(struct vty *vty, + struct zebra_evpn_es_evi *es_evi, + json_object *json_array) +{ + char type_str[4]; + + if (json_array) { + json_object *json; + json_object *json_types; + + /* Separate JSON object for each es-evi entry */ + json = json_object_new_object(); + + json_object_string_add(json, "esi", es_evi->es->esi_str); + json_object_int_add(json, "vni", es_evi->zevpn->vni); + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) { + json_types = json_object_new_array(); + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) + json_array_string_add(json_types, "local"); + json_object_object_add(json, "type", json_types); + } + + /* Add es-evi entry to json array */ + json_object_array_add(json_array, json); + } else { + type_str[0] = '\0'; + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + + vty_out(vty, "%-8d %-30s %-4s\n", + es_evi->zevpn->vni, es_evi->es->esi_str, + type_str); + } +} + +static void +zebra_evpn_es_evi_show_entry_detail(struct vty *vty, + struct zebra_evpn_es_evi *es_evi, + json_object *json_array) +{ + char type_str[4]; + + if (json_array) { + json_object *json; + json_object *json_flags; + + /* Separate JSON object for each es-evi entry */ + json = json_object_new_object(); + + json_object_string_add(json, "esi", es_evi->es->esi_str); + json_object_int_add(json, "vni", es_evi->zevpn->vni); + if (es_evi->flags + & (ZEBRA_EVPNES_EVI_LOCAL + | ZEBRA_EVPNES_EVI_READY_FOR_BGP)) { + json_flags = json_object_new_array(); + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) + json_array_string_add(json_flags, "local"); + if (es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP) + json_array_string_add(json_flags, + "readyForBgp"); + json_object_object_add(json, "flags", json_flags); + } + + /* Add es-evi entry to json array */ + json_object_array_add(json_array, json); + } else { + type_str[0] = '\0'; + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + + vty_out(vty, "VNI %d ESI: %s\n", + es_evi->zevpn->vni, es_evi->es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " Ready for BGP: %s\n", + (es_evi->flags & + ZEBRA_EVPNES_EVI_READY_FOR_BGP) ? + "yes" : "no"); + vty_out(vty, "\n"); + } +} + +static void zebra_evpn_es_evi_show_one_evpn(struct zebra_evpn *zevpn, + struct vty *vty, + json_object *json_array, int detail) +{ + struct zebra_evpn_es_evi *es_evi; + + RB_FOREACH(es_evi, zebra_es_evi_rb_head, &zevpn->es_evi_rb_tree) { + if (detail) + zebra_evpn_es_evi_show_entry_detail(vty, es_evi, + json_array); + else + zebra_evpn_es_evi_show_entry(vty, es_evi, json_array); + } +} + +struct evpn_mh_show_ctx { + struct vty *vty; + json_object *json; + int detail; +}; + +static void zebra_evpn_es_evi_show_one_evpn_hash_cb(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_evpn *zevpn = (struct zebra_evpn *)bucket->data; + struct evpn_mh_show_ctx *wctx = (struct evpn_mh_show_ctx *)ctxt; + + zebra_evpn_es_evi_show_one_evpn(zevpn, wctx->vty, + wctx->json, wctx->detail); +} + +void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail) +{ + json_object *json_array = NULL; + struct zebra_vrf *zvrf; + struct evpn_mh_show_ctx wctx; + + zvrf = zebra_vrf_get_evpn(); + if (uj) + json_array = json_object_new_array(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json_array; + wctx.detail = detail; + + if (!detail && !json_array) { + vty_out(vty, "Type: L local, R remote\n"); + vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); + } + /* Display all L2-VNIs */ + hash_iterate(zvrf->evpn_table, zebra_evpn_es_evi_show_one_evpn_hash_cb, + &wctx); + + if (uj) + vty_json(vty, json_array); +} + +void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, vni_t vni, int detail) +{ + json_object *json_array = NULL; + struct zebra_evpn *zevpn; + + zevpn = zebra_evpn_lookup(vni); + if (uj) + json_array = json_object_new_array(); + + if (zevpn) { + if (!detail && !json_array) { + vty_out(vty, "Type: L local, R remote\n"); + vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); + } + zebra_evpn_es_evi_show_one_evpn(zevpn, vty, json_array, detail); + } else { + if (!uj) + vty_out(vty, "VNI %d doesn't exist\n", vni); + } + + if (uj) + vty_json(vty, json_array); +} + +/* Initialize the ES tables maintained per-L2_VNI */ +void zebra_evpn_es_evi_init(struct zebra_evpn *zevpn) +{ + /* Initialize the ES-EVI RB tree */ + RB_INIT(zebra_es_evi_rb_head, &zevpn->es_evi_rb_tree); + + /* Initialize the local and remote ES lists maintained for quick + * walks by type + */ + zevpn->local_es_evi_list = list_new(); + listset_app_node_mem(zevpn->local_es_evi_list); +} + +/* Cleanup the ES info maintained per- EVPN */ +void zebra_evpn_es_evi_cleanup(struct zebra_evpn *zevpn) +{ + struct zebra_evpn_es_evi *es_evi; + struct zebra_evpn_es_evi *es_evi_next; + + RB_FOREACH_SAFE(es_evi, zebra_es_evi_rb_head, + &zevpn->es_evi_rb_tree, es_evi_next) { + zebra_evpn_local_es_evi_do_del(es_evi); + } + + list_delete(&zevpn->local_es_evi_list); + zebra_evpn_es_clear_base_evpn(zevpn); +} + +/* called when the oper state or bridge membership changes for the + * vxlan device + */ +void zebra_evpn_update_all_es(struct zebra_evpn *zevpn) +{ + struct zebra_evpn_es_evi *es_evi; + struct listnode *node; + struct interface *vlan_if; + struct interface *vxlan_if; + struct zebra_if *vxlan_zif; + + /* the EVPN is now elgible as a base for EVPN-MH */ + if (zebra_evpn_send_to_client_ok(zevpn)) + zebra_evpn_es_set_base_evpn(zevpn); + else + zebra_evpn_es_clear_base_evpn(zevpn); + + for (ALL_LIST_ELEMENTS_RO(zevpn->local_es_evi_list, node, es_evi)) + zebra_evpn_es_evi_re_eval_send_to_client(es_evi); + + /* reinstall SVI MAC */ + vxlan_if = zevpn->vxlan_if; + if (vxlan_if) { + vxlan_zif = vxlan_if->info; + if (if_is_operative(vxlan_if) + && vxlan_zif->brslave_info.br_if) { + vlan_if = zvni_map_to_svi( + vxlan_zif->l2info.vxl.access_vlan, + vxlan_zif->brslave_info.br_if); + if (vlan_if) + zebra_evpn_acc_bd_svi_mac_add(vlan_if); + } + } +} + +/*****************************************************************************/ +/* Access broadcast domains (BD) + * 1. These broadcast domains can be VLAN aware (in which case + * the key is VID) or VLAN unaware (in which case the key is + * 2. A VID-BD is created when a VLAN is associated with an access port or + * when the VLAN is associated with VXLAN_IF + * 3. A BD is translated into ES-EVI entries when a VNI is associated + * with the broadcast domain + */ +/* Hash key for VLAN based broadcast domains */ +static unsigned int zebra_evpn_acc_vl_hash_keymake(const void *p) +{ + const struct zebra_evpn_access_bd *acc_bd = p; + + return jhash_1word(acc_bd->vid, 0); +} + +/* Compare two VLAN based broadcast domains */ +static bool zebra_evpn_acc_vl_cmp(const void *p1, const void *p2) +{ + const struct zebra_evpn_access_bd *acc_bd1 = p1; + const struct zebra_evpn_access_bd *acc_bd2 = p2; + + if (acc_bd1 == NULL && acc_bd2 == NULL) + return true; + + if (acc_bd1 == NULL || acc_bd2 == NULL) + return false; + + return (acc_bd1->vid == acc_bd2->vid); +} + +/* Lookup VLAN based broadcast domain */ +static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_find(vlanid_t vid) +{ + struct zebra_evpn_access_bd *acc_bd; + struct zebra_evpn_access_bd tmp; + + tmp.vid = vid; + acc_bd = hash_lookup(zmh_info->evpn_vlan_table, &tmp); + + return acc_bd; +} + +/* A new broadcast domain can be created when a VLAN member or VLAN<=>VxLAN_IF + * mapping is added. + */ +static struct zebra_evpn_access_bd * +zebra_evpn_acc_vl_new(vlanid_t vid, struct interface *br_if) +{ + struct zebra_evpn_access_bd *acc_bd; + struct interface *vlan_if; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d add", vid); + + acc_bd = XCALLOC(MTYPE_ZACC_BD, sizeof(struct zebra_evpn_access_bd)); + + acc_bd->vid = vid; + + /* Initialize the mbr list */ + acc_bd->mbr_zifs = list_new(); + + /* Add to hash */ + (void)hash_get(zmh_info->evpn_vlan_table, acc_bd, hash_alloc_intern); + + /* check if an svi exists for the vlan */ + if (br_if) { + vlan_if = zvni_map_to_svi(vid, br_if); + if (vlan_if) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("vlan %d SVI %s set", vid, + vlan_if->name); + acc_bd->vlan_zif = vlan_if->info; + } + } + return acc_bd; +} + +/* Free VLAN based broadcast domain - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static void zebra_evpn_acc_vl_free(struct zebra_evpn_access_bd *acc_bd) +{ + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d del", acc_bd->vid); + + if (acc_bd->vlan_zif && acc_bd->zevpn && acc_bd->zevpn->mac_table) + zebra_evpn_mac_svi_del(acc_bd->vlan_zif->ifp, acc_bd->zevpn); + + /* cleanup resources maintained against the ES */ + list_delete(&acc_bd->mbr_zifs); + + /* remove EVI from various tables */ + hash_release(zmh_info->evpn_vlan_table, acc_bd); + + XFREE(MTYPE_ZACC_BD, acc_bd); +} + +static void zebra_evpn_acc_vl_cleanup_all(struct hash_bucket *bucket, void *arg) +{ + struct zebra_evpn_access_bd *acc_bd = bucket->data; + + zebra_evpn_acc_vl_free(acc_bd); +} + +/* called when a bd mbr is removed or VxLAN_IF is diassociated from the access + * VLAN + */ +static void zebra_evpn_acc_bd_free_on_deref(struct zebra_evpn_access_bd *acc_bd) +{ + if (!list_isempty(acc_bd->mbr_zifs) || acc_bd->vxlan_zif) + return; + + /* if there are no references free the EVI */ + zebra_evpn_acc_vl_free(acc_bd); +} + +/* called when a SVI is goes up/down */ +void zebra_evpn_acc_bd_svi_set(struct zebra_if *vlan_zif, + struct zebra_if *br_zif, bool is_up) +{ + struct zebra_evpn_access_bd *acc_bd; + struct zebra_l2info_bridge *br; + uint16_t vid; + struct zebra_if *tmp_br_zif = br_zif; + + if (!tmp_br_zif) { + if (!vlan_zif->link || !vlan_zif->link->info) + return; + + tmp_br_zif = vlan_zif->link->info; + } + + br = &tmp_br_zif->l2info.br; + /* ignore vlan unaware bridges */ + if (!br->vlan_aware) + return; + + vid = vlan_zif->l2info.vl.vid; + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + return; + + if (is_up) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("vlan %d SVI %s set", vid, + vlan_zif->ifp->name); + + acc_bd->vlan_zif = vlan_zif; + if (acc_bd->zevpn) + zebra_evpn_mac_svi_add(acc_bd->vlan_zif->ifp, + acc_bd->zevpn); + } else if (acc_bd->vlan_zif) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("vlan %d SVI clear", vid); + acc_bd->vlan_zif = NULL; + if (acc_bd->zevpn && acc_bd->zevpn->mac_table) + zebra_evpn_mac_svi_del(vlan_zif->ifp, acc_bd->zevpn); + } +} + +/* On some events macs are force-flushed. This api can be used to reinstate + * the svi-mac after such cleanup-events. + */ +void zebra_evpn_acc_bd_svi_mac_add(struct interface *vlan_if) +{ + zebra_evpn_acc_bd_svi_set(vlan_if->info, NULL, + if_is_operative(vlan_if)); +} + +/* called when a EVPN-L2VNI is set or cleared against a BD */ +static void zebra_evpn_acc_bd_evpn_set(struct zebra_evpn_access_bd *acc_bd, + struct zebra_evpn *zevpn, + struct zebra_evpn *old_zevpn) +{ + struct zebra_if *zif; + struct listnode *node; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d l2-vni %u set", + acc_bd->vid, zevpn ? zevpn->vni : 0); + + for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) { + if (!zif->es_info.es) + continue; + + if (zevpn) + zebra_evpn_local_es_evi_add(zif->es_info.es, zevpn); + else if (old_zevpn) + zebra_evpn_local_es_evi_del(zif->es_info.es, old_zevpn); + } + + if (acc_bd->vlan_zif) { + if (zevpn) + zebra_evpn_mac_svi_add(acc_bd->vlan_zif->ifp, + acc_bd->zevpn); + else if (old_zevpn && old_zevpn->mac_table) + zebra_evpn_mac_svi_del(acc_bd->vlan_zif->ifp, + old_zevpn); + } +} + +/* handle VLAN->VxLAN_IF association */ +void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif) +{ + struct zebra_evpn_access_bd *acc_bd; + struct zebra_if *old_vxlan_zif; + struct zebra_evpn *old_zevpn; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + acc_bd = zebra_evpn_acc_vl_new(vid, + vxlan_zif->brslave_info.br_if); + + old_vxlan_zif = acc_bd->vxlan_zif; + acc_bd->vxlan_zif = vxlan_zif; + if (vxlan_zif == old_vxlan_zif) + return; + + old_zevpn = acc_bd->zevpn; + acc_bd->zevpn = zebra_evpn_lookup(vxlan_zif->l2info.vxl.vni); + if (acc_bd->zevpn == old_zevpn) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d vni %u ref", + acc_bd->vid, vxlan_zif->l2info.vxl.vni); + + if (old_zevpn) + zebra_evpn_acc_bd_evpn_set(acc_bd, NULL, old_zevpn); + + if (acc_bd->zevpn) + zebra_evpn_acc_bd_evpn_set(acc_bd, acc_bd->zevpn, NULL); +} + +/* handle VLAN->VxLAN_IF deref */ +void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif) +{ + struct zebra_evpn_access_bd *acc_bd; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + return; + + /* clear vxlan_if only if it matches */ + if (acc_bd->vxlan_zif != vxlan_zif) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d vni %u deref", + acc_bd->vid, vxlan_zif->l2info.vxl.vni); + + if (acc_bd->zevpn) + zebra_evpn_acc_bd_evpn_set(acc_bd, NULL, acc_bd->zevpn); + + acc_bd->zevpn = NULL; + acc_bd->vxlan_zif = NULL; + + /* if there are no other references the access_bd can be freed */ + zebra_evpn_acc_bd_free_on_deref(acc_bd); +} + +/* handle EVPN add/del */ +void zebra_evpn_vxl_evpn_set(struct zebra_if *zif, struct zebra_evpn *zevpn, + bool set) +{ + struct zebra_l2info_vxlan *vxl; + struct zebra_evpn_access_bd *acc_bd; + + if (!zif) + return; + + /* locate access_bd associated with the vxlan device */ + vxl = &zif->l2info.vxl; + acc_bd = zebra_evpn_acc_vl_find(vxl->access_vlan); + if (!acc_bd) + return; + + if (set) { + zebra_evpn_es_set_base_evpn(zevpn); + if (acc_bd->zevpn != zevpn) { + acc_bd->zevpn = zevpn; + zebra_evpn_acc_bd_evpn_set(acc_bd, zevpn, NULL); + } + } else { + if (acc_bd->zevpn) { + struct zebra_evpn *old_zevpn = acc_bd->zevpn; + acc_bd->zevpn = NULL; + zebra_evpn_acc_bd_evpn_set(acc_bd, NULL, old_zevpn); + } + } +} + +/* handle addition of new VLAN members */ +void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif) +{ + struct zebra_evpn_access_bd *acc_bd; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + acc_bd = zebra_evpn_acc_vl_new(vid, zif->brslave_info.br_if); + + if (listnode_lookup(acc_bd->mbr_zifs, zif)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d mbr %s ref", + vid, zif->ifp->name); + + listnode_add(acc_bd->mbr_zifs, zif); + if (acc_bd->zevpn && zif->es_info.es) + zebra_evpn_local_es_evi_add(zif->es_info.es, acc_bd->zevpn); +} + +/* handle deletion of VLAN members */ +void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif) +{ + struct zebra_evpn_access_bd *acc_bd; + struct listnode *node; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + return; + + node = listnode_lookup(acc_bd->mbr_zifs, zif); + if (!node) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d mbr %s deref", + vid, zif->ifp->name); + + list_delete_node(acc_bd->mbr_zifs, node); + + if (acc_bd->zevpn && zif->es_info.es) + zebra_evpn_local_es_evi_del(zif->es_info.es, acc_bd->zevpn); + + /* if there are no other references the access_bd can be freed */ + zebra_evpn_acc_bd_free_on_deref(acc_bd); +} + +static void zebra_evpn_acc_vl_adv_svi_mac_cb(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_evpn_access_bd *acc_bd = bucket->data; + + if (acc_bd->vlan_zif && acc_bd->zevpn) + zebra_evpn_mac_svi_add(acc_bd->vlan_zif->ifp, acc_bd->zevpn); +} + +/* called when advertise SVI MAC is enabled on the switch */ +static void zebra_evpn_acc_vl_adv_svi_mac_all(void) +{ + hash_iterate(zmh_info->evpn_vlan_table, + zebra_evpn_acc_vl_adv_svi_mac_cb, NULL); +} + +static void zebra_evpn_acc_vl_json_fill(struct zebra_evpn_access_bd *acc_bd, + json_object *json, bool detail) +{ + json_object_int_add(json, "vlan", acc_bd->vid); + if (acc_bd->vxlan_zif) + json_object_string_add(json, "vxlanIf", + acc_bd->vxlan_zif->ifp->name); + if (acc_bd->zevpn) + json_object_int_add(json, "vni", acc_bd->zevpn->vni); + if (acc_bd->mbr_zifs) + json_object_int_add(json, "memberIfCount", + listcount(acc_bd->mbr_zifs)); + + if (detail) { + json_object *json_mbrs; + json_object *json_mbr; + struct zebra_if *zif; + struct listnode *node; + + + json_mbrs = json_object_new_array(); + for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) { + json_mbr = json_object_new_object(); + json_object_string_add(json_mbr, "ifName", + zif->ifp->name); + json_object_array_add(json_mbrs, json_mbr); + } + json_object_object_add(json, "members", json_mbrs); + } +} + +static void zebra_evpn_acc_vl_show_entry_detail(struct vty *vty, + struct zebra_evpn_access_bd *acc_bd, json_object *json) +{ + struct zebra_if *zif; + struct listnode *node; + + if (json) { + zebra_evpn_acc_vl_json_fill(acc_bd, json, true); + } else { + vty_out(vty, "VLAN: %u\n", acc_bd->vid); + vty_out(vty, " VxLAN Interface: %s\n", + acc_bd->vxlan_zif ? + acc_bd->vxlan_zif->ifp->name : "-"); + vty_out(vty, " SVI: %s\n", + acc_bd->vlan_zif ? acc_bd->vlan_zif->ifp->name : "-"); + vty_out(vty, " L2-VNI: %d\n", + acc_bd->zevpn ? acc_bd->zevpn->vni : 0); + vty_out(vty, " Member Count: %d\n", + listcount(acc_bd->mbr_zifs)); + vty_out(vty, " Members: \n"); + for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) + vty_out(vty, " %s\n", zif->ifp->name); + vty_out(vty, "\n"); + } +} + +static void zebra_evpn_acc_vl_show_entry(struct vty *vty, + struct zebra_evpn_access_bd *acc_bd, json_object *json) +{ + if (json) { + zebra_evpn_acc_vl_json_fill(acc_bd, json, false); + } else { + vty_out(vty, "%-5u %-15s %-8d %-15s %u\n", acc_bd->vid, + acc_bd->vlan_zif ? acc_bd->vlan_zif->ifp->name : "-", + acc_bd->zevpn ? acc_bd->zevpn->vni : 0, + acc_bd->vxlan_zif ? acc_bd->vxlan_zif->ifp->name : "-", + listcount(acc_bd->mbr_zifs)); + } +} + +static void zebra_evpn_acc_vl_show_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct evpn_mh_show_ctx *wctx = ctxt; + struct zebra_evpn_access_bd *acc_bd = bucket->data; + json_object *json = NULL; + + if (wctx->json) + json = json_object_new_object(); + if (wctx->detail) + zebra_evpn_acc_vl_show_entry_detail(wctx->vty, acc_bd, json); + else + zebra_evpn_acc_vl_show_entry(wctx->vty, acc_bd, json); + if (json) + json_object_array_add(wctx->json, json); +} + +void zebra_evpn_acc_vl_show(struct vty *vty, bool uj) +{ + struct evpn_mh_show_ctx wctx; + json_object *json_array = NULL; + + if (uj) + json_array = json_object_new_array(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json_array; + wctx.detail = false; + + if (!uj) + vty_out(vty, "%-5s %-15s %-8s %-15s %s\n", "VLAN", "SVI", + "L2-VNI", "VXLAN-IF", "# Members"); + + hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash, + &wctx); + + if (uj) + vty_json(vty, json_array); +} + +void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj) +{ + struct evpn_mh_show_ctx wctx; + json_object *json_array = NULL; + + if (uj) + json_array = json_object_new_array(); + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json_array; + wctx.detail = true; + + hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash, + &wctx); + + if (uj) + vty_json(vty, json_array); +} + +void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid) +{ + json_object *json = NULL; + struct zebra_evpn_access_bd *acc_bd; + + if (uj) + json = json_object_new_object(); + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (acc_bd) { + zebra_evpn_acc_vl_show_entry_detail(vty, acc_bd, json); + } else { + if (!json) + vty_out(vty, "VLAN %u not present\n", vid); + } + + if (uj) + vty_json(vty, json); +} + +/* Initialize VLAN member bitmap on an interface. Although VLAN membership + * is independent of EVPN we only process it if its of interest to EVPN-MH + * i.e. on access ports that can be setup as Ethernet Segments. And that is + * intended as an optimization. + */ +void zebra_evpn_if_init(struct zebra_if *zif) +{ + if (!zebra_evpn_is_if_es_capable(zif)) + return; + + if (!bf_is_inited(zif->vlan_bitmap)) + bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX); + + /* if an es_id and sysmac are already present against the interface + * activate it + */ + zebra_evpn_local_es_update(zif, &zif->es_info.esi); +} + +/* handle deletion of an access port by removing it from all associated + * broadcast domains. + */ +void zebra_evpn_if_cleanup(struct zebra_if *zif) +{ + vlanid_t vid; + struct zebra_evpn_es *es; + + if (bf_is_inited(zif->vlan_bitmap)) { + bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) + { + zebra_evpn_vl_mbr_deref(vid, zif); + } + + bf_free(zif->vlan_bitmap); + } + + /* Delete associated Ethernet Segment */ + es = zif->es_info.es; + if (es) + zebra_evpn_local_es_del(&es); +} + +/***************************************************************************** + * L2 NH/NHG Management + * A L2 NH entry is programmed in the kernel for every ES-VTEP entry. This + * NH is then added to the L2-ECMP-NHG associated with the ES. + */ +static uint32_t zebra_evpn_nhid_alloc(struct zebra_evpn_es *es) +{ + uint32_t id; + uint32_t nh_id; + + bf_assign_index(zmh_info->nh_id_bitmap, id); + + if (!id) + return 0; + + if (es) { + nh_id = id | EVPN_NHG_ID_TYPE_BIT; + /* Add to NHG hash */ + es->nhg_id = nh_id; + (void)hash_get(zmh_info->nhg_table, es, hash_alloc_intern); + } else { + nh_id = id | EVPN_NH_ID_TYPE_BIT; + } + + return nh_id; +} + +static void zebra_evpn_nhid_free(uint32_t nh_id, struct zebra_evpn_es *es) +{ + uint32_t id = (nh_id & EVPN_NH_ID_VAL_MASK); + + if (!id) + return; + + if (es) { + hash_release(zmh_info->nhg_table, es); + es->nhg_id = 0; + } + + bf_release_index(zmh_info->nh_id_bitmap, id); +} + +static unsigned int zebra_evpn_nh_ip_hash_keymake(const void *p) +{ + const struct zebra_evpn_l2_nh *nh = p; + + return jhash_1word(nh->vtep_ip.s_addr, 0); +} + +static bool zebra_evpn_nh_ip_cmp(const void *p1, const void *p2) +{ + const struct zebra_evpn_l2_nh *nh1 = p1; + const struct zebra_evpn_l2_nh *nh2 = p2; + + if (nh1 == NULL && nh2 == NULL) + return true; + + if (nh1 == NULL || nh2 == NULL) + return false; + + return (nh1->vtep_ip.s_addr == nh2->vtep_ip.s_addr); +} + +static unsigned int zebra_evpn_nhg_hash_keymake(const void *p) +{ + const struct zebra_evpn_es *es = p; + + return jhash_1word(es->nhg_id, 0); +} + +static bool zebra_evpn_nhg_cmp(const void *p1, const void *p2) +{ + const struct zebra_evpn_es *es1 = p1; + const struct zebra_evpn_es *es2 = p2; + + if (es1 == NULL && es2 == NULL) + return true; + + if (es1 == NULL || es2 == NULL) + return false; + + return (es1->nhg_id == es2->nhg_id); +} + +/* Lookup ES using the NHG id associated with it */ +static struct zebra_evpn_es *zebra_evpn_nhg_find(uint32_t nhg_id) +{ + struct zebra_evpn_es *es; + struct zebra_evpn_es tmp; + + tmp.nhg_id = nhg_id; + es = hash_lookup(zmh_info->nhg_table, &tmp); + + return es; +} + +/* Returns TRUE if the NHG is associated with a local ES */ +bool zebra_evpn_nhg_is_local_es(uint32_t nhg_id, + struct zebra_evpn_es **local_es) +{ + struct zebra_evpn_es *es; + + es = zebra_evpn_nhg_find(nhg_id); + if (es && (es->flags & ZEBRA_EVPNES_LOCAL)) { + *local_es = es; + return true; + } + + *local_es = NULL; + return false; +} + +/* update remote macs associated with the ES */ +static void zebra_evpn_nhg_mac_update(struct zebra_evpn_es *es) +{ + struct zebra_mac *mac; + struct listnode *node; + bool local_via_nw; + + local_via_nw = zebra_evpn_es_local_mac_via_network_port(es); + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("mac update on es %s nhg %s", es->esi_str, + (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) + ? "activate" + : "de-activate"); + + for (ALL_LIST_ELEMENTS_RO(es->mac_list, node, mac)) { + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) + || (local_via_nw && CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) + && zebra_evpn_mac_is_static(mac))) { + if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "%smac %pEA install via es %s nhg 0x%x", + (mac->flags & ZEBRA_MAC_REMOTE) + ? "rem" + : "local-nw", + &mac->macaddr, es->esi_str, + es->nhg_id); + zebra_evpn_rem_mac_install( + mac->zevpn, mac, false /*was_static*/); + } else { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "%smac %pEA un-install es %s", + (mac->flags & ZEBRA_MAC_REMOTE) + ? "rem" + : "local-nw", + &mac->macaddr, es->esi_str); + zebra_evpn_rem_mac_uninstall(mac->zevpn, mac, + true /*force*/); + } + } + } +} + +/* The MAC ECMP group is activated on the first VTEP */ +static void zebra_evpn_nhg_update(struct zebra_evpn_es *es) +{ + uint32_t nh_cnt = 0; + struct nh_grp nh_ids[ES_VTEP_MAX_CNT]; + struct zebra_evpn_es_vtep *es_vtep; + struct listnode *node; + + if (!es->nhg_id) + return; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (!es_vtep->nh) + continue; + + if (nh_cnt >= ES_VTEP_MAX_CNT) + break; + + memset(&nh_ids[nh_cnt], 0, sizeof(struct nh_grp)); + nh_ids[nh_cnt].id = es_vtep->nh->nh_id; + ++nh_cnt; + } + + if (nh_cnt) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) { + char nh_str[ES_VTEP_LIST_STR_SZ]; + uint32_t i; + char nh_buf[16]; + + nh_str[0] = '\0'; + for (i = 0; i < nh_cnt; ++i) { + snprintf(nh_buf, sizeof(nh_buf), "%u ", + nh_ids[i].id); + strlcat(nh_str, nh_buf, sizeof(nh_str)); + } + zlog_debug("es %s nhg %u add %s", es->esi_str, + es->nhg_id, nh_str); + } + + kernel_upd_mac_nhg(es->nhg_id, nh_cnt, nh_ids); + if (!(es->flags & ZEBRA_EVPNES_NHG_ACTIVE)) { + es->flags |= ZEBRA_EVPNES_NHG_ACTIVE; + /* add backup NHG to the br-port */ + if ((es->flags & ZEBRA_EVPNES_LOCAL)) + zebra_evpn_es_br_port_dplane_update(es, + __func__); + zebra_evpn_nhg_mac_update(es); + } + } else { + if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + zlog_debug("es %s nhg %u del", es->esi_str, + es->nhg_id); + es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE; + /* remove backup NHG from the br-port */ + if ((es->flags & ZEBRA_EVPNES_LOCAL)) + zebra_evpn_es_br_port_dplane_update(es, + __func__); + zebra_evpn_nhg_mac_update(es); + kernel_del_mac_nhg(es->nhg_id); + } + } + +} + +static void zebra_evpn_es_l2_nh_show_entry(struct zebra_evpn_l2_nh *nh, + struct vty *vty, + json_object *json_array) +{ + if (json_array) { + json_object *json = NULL; + + json = json_object_new_object(); + json_object_string_addf(json, "vtep", "%pI4", &nh->vtep_ip); + json_object_int_add(json, "nhId", nh->nh_id); + json_object_int_add(json, "refCnt", nh->ref_cnt); + + json_object_array_add(json_array, json); + } else { + vty_out(vty, "%-16pI4 %-10u %u\n", &nh->vtep_ip, nh->nh_id, + nh->ref_cnt); + } +} + +static void zebra_evpn_l2_nh_show_cb(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_evpn_l2_nh *nh = (struct zebra_evpn_l2_nh *)bucket->data; + struct evpn_mh_show_ctx *wctx = (struct evpn_mh_show_ctx *)ctxt; + + zebra_evpn_es_l2_nh_show_entry(nh, wctx->vty, wctx->json); +} + +void zebra_evpn_l2_nh_show(struct vty *vty, bool uj) +{ + struct evpn_mh_show_ctx wctx; + json_object *json_array = NULL; + + if (uj) { + json_array = json_object_new_array(); + } else { + vty_out(vty, "%-16s %-10s %s\n", "VTEP", "NH id", "#ES"); + } + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json_array; + + hash_iterate(zmh_info->nh_ip_table, zebra_evpn_l2_nh_show_cb, &wctx); + + if (uj) + vty_json(vty, json_array); +} + +static struct zebra_evpn_l2_nh *zebra_evpn_l2_nh_find(struct in_addr vtep_ip) +{ + struct zebra_evpn_l2_nh *nh; + struct zebra_evpn_l2_nh tmp; + + tmp.vtep_ip.s_addr = vtep_ip.s_addr; + nh = hash_lookup(zmh_info->nh_ip_table, &tmp); + + return nh; +} + +static struct zebra_evpn_l2_nh *zebra_evpn_l2_nh_alloc(struct in_addr vtep_ip) +{ + struct zebra_evpn_l2_nh *nh; + + nh = XCALLOC(MTYPE_L2_NH, sizeof(*nh)); + nh->vtep_ip = vtep_ip; + (void)hash_get(zmh_info->nh_ip_table, nh, hash_alloc_intern); + + nh->nh_id = zebra_evpn_nhid_alloc(NULL); + if (!nh->nh_id) { + hash_release(zmh_info->nh_ip_table, nh); + XFREE(MTYPE_L2_NH, nh); + return NULL; + } + + /* install the NH in the dataplane */ + kernel_upd_mac_nh(nh->nh_id, nh->vtep_ip); + + return nh; +} + +static void zebra_evpn_l2_nh_free(struct zebra_evpn_l2_nh *nh) +{ + /* delete the NH from the dataplane */ + kernel_del_mac_nh(nh->nh_id); + + zebra_evpn_nhid_free(nh->nh_id, NULL); + hash_release(zmh_info->nh_ip_table, nh); + XFREE(MTYPE_L2_NH, nh); +} + +static void zebra_evpn_l2_nh_es_vtep_ref(struct zebra_evpn_es_vtep *es_vtep) +{ + if (es_vtep->nh) + return; + + es_vtep->nh = zebra_evpn_l2_nh_find(es_vtep->vtep_ip); + if (!es_vtep->nh) + es_vtep->nh = zebra_evpn_l2_nh_alloc(es_vtep->vtep_ip); + + if (!es_vtep->nh) { + zlog_warn("es %s vtep %pI4 nh ref failed", es_vtep->es->esi_str, + &es_vtep->vtep_ip); + return; + } + + ++es_vtep->nh->ref_cnt; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + zlog_debug("es %s vtep %pI4 nh %u ref %u", es_vtep->es->esi_str, + &es_vtep->vtep_ip, es_vtep->nh->nh_id, + es_vtep->nh->ref_cnt); + + /* add the NH to the parent NHG */ + zebra_evpn_nhg_update(es_vtep->es); +} + +static void zebra_evpn_l2_nh_es_vtep_deref(struct zebra_evpn_es_vtep *es_vtep) +{ + struct zebra_evpn_l2_nh *nh = es_vtep->nh; + + if (!nh) + return; + + es_vtep->nh = NULL; + if (nh->ref_cnt) + --nh->ref_cnt; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + zlog_debug("es %s vtep %pI4 nh %u deref %u", + es_vtep->es->esi_str, &es_vtep->vtep_ip, nh->nh_id, + nh->ref_cnt); + + /* remove the NH from the parent NHG */ + zebra_evpn_nhg_update(es_vtep->es); + + /* uninstall the NH */ + if (!nh->ref_cnt) + zebra_evpn_l2_nh_free(nh); +} + +/*****************************************************************************/ +/* Ethernet Segment Management + * 1. Ethernet Segment is a collection of links attached to the same + * server (MHD) or switch (MHN) + * 2. An Ethernet Segment can span multiple PEs and is identified by the + * 10-byte ES-ID. + * 3. Zebra manages the local ESI configuration. + * 4. It also maintains the aliasing that maps an ESI (local or remote) + * to one or more PEs/VTEPs. + * 5. remote ESs are added by BGP (on rxing EAD Type-1 routes) + */ +/* A list of remote VTEPs is maintained for each ES. This list includes - + * 1. VTEPs for which we have imported the ESR i.e. ES-peers + * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI + * have been imported into one or more EVPNs + */ +static int zebra_evpn_es_vtep_cmp(void *p1, void *p2) +{ + const struct zebra_evpn_es_vtep *es_vtep1 = p1; + const struct zebra_evpn_es_vtep *es_vtep2 = p2; + + return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr; +} + +static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_new( + struct zebra_evpn_es *es, struct in_addr vtep_ip) +{ + struct zebra_evpn_es_vtep *es_vtep; + + es_vtep = XCALLOC(MTYPE_ZES_VTEP, sizeof(*es_vtep)); + + es_vtep->es = es; + es_vtep->vtep_ip.s_addr = vtep_ip.s_addr; + listnode_init(&es_vtep->es_listnode, es_vtep); + listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode); + + return es_vtep; +} + +static void zebra_evpn_es_vtep_free(struct zebra_evpn_es_vtep *es_vtep) +{ + struct zebra_evpn_es *es = es_vtep->es; + + list_delete_node(es->es_vtep_list, &es_vtep->es_listnode); + /* update the L2-NHG associated with the ES */ + zebra_evpn_l2_nh_es_vtep_deref(es_vtep); + XFREE(MTYPE_ZES_VTEP, es_vtep); +} + + +/* check if VTEP is already part of the list */ +static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_find( + struct zebra_evpn_es *es, struct in_addr vtep_ip) +{ + struct listnode *node = NULL; + struct zebra_evpn_es_vtep *es_vtep; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr) + return es_vtep; + } + return NULL; +} + +/* flush all the dataplane br-port info associated with the ES */ +static bool zebra_evpn_es_br_port_dplane_clear(struct zebra_evpn_es *es) +{ + struct in_addr sph_filters[ES_VTEP_MAX_CNT]; + + if (!(es->flags & ZEBRA_EVPNES_BR_PORT)) + return false; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s br-port dplane clear", es->esi_str); + + memset(&sph_filters, 0, sizeof(sph_filters)); + dplane_br_port_update(es->zif->ifp, false /* non_df */, 0, sph_filters, + 0 /* backup_nhg_id */); + return true; +} + +static inline bool +zebra_evpn_es_br_port_dplane_update_needed(struct zebra_evpn_es *es) +{ + return (es->flags & ZEBRA_EVPNES_NON_DF) + || (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) + || listcount(es->es_vtep_list); +} + +/* returns TRUE if dplane entry was updated */ +static bool zebra_evpn_es_br_port_dplane_update(struct zebra_evpn_es *es, + const char *caller) +{ + uint32_t backup_nhg_id; + struct in_addr sph_filters[ES_VTEP_MAX_CNT]; + struct listnode *node = NULL; + struct zebra_evpn_es_vtep *es_vtep; + uint32_t sph_filter_cnt = 0; + + if (!(es->flags & ZEBRA_EVPNES_LOCAL)) + return zebra_evpn_es_br_port_dplane_clear(es); + + /* If the ES is not a bridge port there is nothing + * in the dataplane + */ + if (!(es->flags & ZEBRA_EVPNES_BR_PORT)) + return false; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s br-port dplane update by %s", es->esi_str, + caller); + backup_nhg_id = (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) ? es->nhg_id : 0; + + memset(&sph_filters, 0, sizeof(sph_filters)); + if (es->flags & ZEBRA_EVPNES_BYPASS) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "es %s SPH filter disabled as it is in bypass", + es->esi_str); + } else { + if (listcount(es->es_vtep_list) > ES_VTEP_MAX_CNT) { + zlog_warn("es %s vtep count %d exceeds filter cnt %d", + es->esi_str, listcount(es->es_vtep_list), + ES_VTEP_MAX_CNT); + } else { + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, + es_vtep)) { + if (es_vtep->flags + & ZEBRA_EVPNES_VTEP_DEL_IN_PROG) + continue; + sph_filters[sph_filter_cnt] = es_vtep->vtep_ip; + ++sph_filter_cnt; + } + } + } + + dplane_br_port_update(es->zif->ifp, !!(es->flags & ZEBRA_EVPNES_NON_DF), + sph_filter_cnt, sph_filters, backup_nhg_id); + + return true; +} + +/* returns TRUE if dplane entry was updated */ +static bool zebra_evpn_es_df_change(struct zebra_evpn_es *es, bool new_non_df, + const char *caller, const char *reason) +{ + bool old_non_df; + + old_non_df = !!(es->flags & ZEBRA_EVPNES_NON_DF); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("df-change es %s %s to %s; %s: %s", es->esi_str, + old_non_df ? "non-df" : "df", + new_non_df ? "non-df" : "df", caller, reason); + + if (old_non_df == new_non_df) + return false; + + if (new_non_df) + es->flags |= ZEBRA_EVPNES_NON_DF; + else + es->flags &= ~ZEBRA_EVPNES_NON_DF; + + /* update non-DF block filter in the dataplane */ + return zebra_evpn_es_br_port_dplane_update(es, __func__); +} + + +/* returns TRUE if dplane entry was updated */ +static bool zebra_evpn_es_run_df_election(struct zebra_evpn_es *es, + const char *caller) +{ + struct listnode *node = NULL; + struct zebra_evpn_es_vtep *es_vtep; + bool new_non_df = false; + + /* If the ES is not ready (i.e. not completely configured) there + * is no need to setup the BUM block filter + */ + if (!(es->flags & ZEBRA_EVPNES_LOCAL) + || (es->flags & ZEBRA_EVPNES_BYPASS) + || !zmh_info->es_originator_ip.s_addr) + return zebra_evpn_es_df_change(es, new_non_df, caller, + "not-ready"); + + /* if oper-state is down DF filtering must be on. when the link comes + * up again dataplane should block BUM till FRR has had the chance + * to run DF election again + */ + if (!(es->flags & ZEBRA_EVPNES_OPER_UP)) { + new_non_df = true; + return zebra_evpn_es_df_change(es, new_non_df, caller, + "oper-down"); + } + + /* ES was just created; we need to wait for the peers to rx the + * our Type-4 routes and for the switch to import the peers' Type-4 + * routes + */ + if (es->df_delay_timer) { + new_non_df = true; + return zebra_evpn_es_df_change(es, new_non_df, caller, + "df-delay"); + } + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + /* Only VTEPs that have advertised the ESR can participate + * in DF election + */ + if (!(es_vtep->flags & ZEBRA_EVPNES_VTEP_RXED_ESR)) + continue; + + /* If the DF alg is not the same we should fall back to + * service-carving. But as service-carving is not supported + * we will stop forwarding BUM + */ + if (es_vtep->df_alg != EVPN_MH_DF_ALG_PREF) { + new_non_df = true; + break; + } + + /* Peer VTEP wins DF election if - + * the peer-VTEP has higher preference (or) + * the pref is the same but peer's IP address is lower + */ + if ((es_vtep->df_pref > es->df_pref) + || ((es_vtep->df_pref == es->df_pref) + && (es_vtep->vtep_ip.s_addr + < zmh_info->es_originator_ip.s_addr))) { + new_non_df = true; + break; + } + } + + return zebra_evpn_es_df_change(es, new_non_df, caller, "elected"); +} + +static void zebra_evpn_es_vtep_add(struct zebra_evpn_es *es, + struct in_addr vtep_ip, bool esr_rxed, + uint8_t df_alg, uint16_t df_pref) +{ + struct zebra_evpn_es_vtep *es_vtep; + bool old_esr_rxed; + bool dplane_updated = false; + + es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip); + + if (!es_vtep) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s vtep %pI4 add", + es->esi_str, &vtep_ip); + es_vtep = zebra_evpn_es_vtep_new(es, vtep_ip); + /* update the L2-NHG associated with the ES */ + zebra_evpn_l2_nh_es_vtep_ref(es_vtep); + } + + old_esr_rxed = !!(es_vtep->flags & ZEBRA_EVPNES_VTEP_RXED_ESR); + if ((old_esr_rxed != esr_rxed) || (es_vtep->df_alg != df_alg) + || (es_vtep->df_pref != df_pref)) { + /* If any of the DF election params changed we need to re-run + * DF election + */ + if (esr_rxed) + es_vtep->flags |= ZEBRA_EVPNES_VTEP_RXED_ESR; + else + es_vtep->flags &= ~ZEBRA_EVPNES_VTEP_RXED_ESR; + es_vtep->df_alg = df_alg; + es_vtep->df_pref = df_pref; + dplane_updated = zebra_evpn_es_run_df_election(es, __func__); + } + /* add the vtep to the SPH list */ + if (!dplane_updated && (es->flags & ZEBRA_EVPNES_LOCAL)) + zebra_evpn_es_br_port_dplane_update(es, __func__); +} + +static void zebra_evpn_es_vtep_del(struct zebra_evpn_es *es, + struct in_addr vtep_ip) +{ + struct zebra_evpn_es_vtep *es_vtep; + bool dplane_updated = false; + + es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip); + + if (es_vtep) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s vtep %pI4 del", + es->esi_str, &vtep_ip); + es_vtep->flags |= ZEBRA_EVPNES_VTEP_DEL_IN_PROG; + if (es_vtep->flags & ZEBRA_EVPNES_VTEP_RXED_ESR) { + es_vtep->flags &= ~ZEBRA_EVPNES_VTEP_RXED_ESR; + dplane_updated = + zebra_evpn_es_run_df_election(es, __func__); + } + /* remove the vtep from the SPH list */ + if (!dplane_updated && (es->flags & ZEBRA_EVPNES_LOCAL)) + zebra_evpn_es_br_port_dplane_update(es, __func__); + zebra_evpn_es_vtep_free(es_vtep); + } +} + +/* compare ES-IDs for the global ES RB tree */ +static int zebra_es_rb_cmp(const struct zebra_evpn_es *es1, + const struct zebra_evpn_es *es2) +{ + return memcmp(&es1->esi, &es2->esi, ESI_BYTES); +} +RB_GENERATE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); + +/* Lookup ES */ +struct zebra_evpn_es *zebra_evpn_es_find(const esi_t *esi) +{ + struct zebra_evpn_es tmp; + + memcpy(&tmp.esi, esi, sizeof(esi_t)); + return RB_FIND(zebra_es_rb_head, &zmh_info->es_rb_tree, &tmp); +} + +/* A new local es is created when a local-es-id and sysmac is configured + * against an interface. + */ +static struct zebra_evpn_es *zebra_evpn_es_new(const esi_t *esi) +{ + struct zebra_evpn_es *es; + + if (!memcmp(esi, zero_esi, sizeof(esi_t))) + return NULL; + + es = XCALLOC(MTYPE_ZES, sizeof(struct zebra_evpn_es)); + + /* fill in ESI */ + memcpy(&es->esi, esi, sizeof(esi_t)); + esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str)); + + /* Add to rb_tree */ + RB_INSERT(zebra_es_rb_head, &zmh_info->es_rb_tree, es); + + /* Initialise the ES-EVI list */ + es->es_evi_list = list_new(); + listset_app_node_mem(es->es_evi_list); + + /* Initialise the VTEP list */ + es->es_vtep_list = list_new(); + listset_app_node_mem(es->es_vtep_list); + es->es_vtep_list->cmp = zebra_evpn_es_vtep_cmp; + + /* mac entries associated with the ES */ + es->mac_list = list_new(); + listset_app_node_mem(es->mac_list); + + /* reserve a NHG */ + es->nhg_id = zebra_evpn_nhid_alloc(es); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s nhg %u new", es->esi_str, es->nhg_id); + + return es; +} + +/* Free a given ES - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static void zebra_evpn_es_free(struct zebra_evpn_es **esp) +{ + struct zebra_evpn_es *es = *esp; + + /* If the ES has a local or remote reference it cannot be freed. + * Free is also prevented if there are MAC entries referencing + * it. + */ + if ((es->flags & (ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_REMOTE)) || + listcount(es->mac_list)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s free", es->esi_str); + + /* If the NHG is still installed uninstall it and free the id */ + if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { + es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE; + kernel_del_mac_nhg(es->nhg_id); + } + zebra_evpn_nhid_free(es->nhg_id, es); + + /* cleanup resources maintained against the ES */ + list_delete(&es->es_evi_list); + list_delete(&es->es_vtep_list); + list_delete(&es->mac_list); + + /* remove from the VNI-ESI rb tree */ + RB_REMOVE(zebra_es_rb_head, &zmh_info->es_rb_tree, es); + + XFREE(MTYPE_ZES, es); + + *esp = NULL; +} + +/* Inform BGP about local ES addition */ +static int zebra_evpn_es_send_add_to_client(struct zebra_evpn_es *es) +{ + struct zserv *client; + struct stream *s; + uint8_t oper_up; + bool bypass; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_LOCAL_ES_ADD, zebra_vrf_get_evpn_id()); + stream_put(s, &es->esi, sizeof(esi_t)); + stream_put_ipv4(s, zmh_info->es_originator_ip.s_addr); + oper_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP); + stream_putc(s, oper_up); + stream_putw(s, es->df_pref); + bypass = !!(es->flags & ZEBRA_EVPNES_BYPASS); + stream_putc(s, bypass); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "send add local es %s %pI4 active %u df_pref %u%s to %s", + es->esi_str, &zmh_info->es_originator_ip, oper_up, + es->df_pref, bypass ? " bypass" : "", + zebra_route_string(client->proto)); + + client->local_es_add_cnt++; + return zserv_send_message(client, s); +} + +/* Inform BGP about local ES deletion */ +static int zebra_evpn_es_send_del_to_client(struct zebra_evpn_es *es) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + zclient_create_header(s, ZEBRA_LOCAL_ES_DEL, zebra_vrf_get_evpn_id()); + stream_put(s, &es->esi, sizeof(esi_t)); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("send del local es %s to %s", es->esi_str, + zebra_route_string(client->proto)); + + client->local_es_del_cnt++; + return zserv_send_message(client, s); +} + +static void zebra_evpn_es_re_eval_send_to_client(struct zebra_evpn_es *es, + bool es_evi_re_reval) +{ + bool old_ready; + bool new_ready; + struct listnode *node; + struct zebra_evpn_es_evi *es_evi; + + old_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP); + + if ((es->flags & ZEBRA_EVPNES_LOCAL) && + zmh_info->es_originator_ip.s_addr) + es->flags |= ZEBRA_EVPNES_READY_FOR_BGP; + else + es->flags &= ~ZEBRA_EVPNES_READY_FOR_BGP; + + new_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP); + if (old_ready == new_ready) + return; + + if (new_ready) + zebra_evpn_es_send_add_to_client(es); + else + zebra_evpn_es_send_del_to_client(es); + + /* re-eval associated EVIs */ + if (es_evi_re_reval) { + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) { + if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)) + continue; + zebra_evpn_es_evi_re_eval_send_to_client(es_evi); + } + } +} + +void zebra_evpn_es_send_all_to_client(bool add) +{ + struct listnode *es_node; + struct listnode *evi_node; + struct zebra_evpn_es *es; + struct zebra_evpn_es_evi *es_evi; + + if (!zmh_info) + return; + + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, es_node, es)) { + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) { + if (add) + zebra_evpn_es_send_add_to_client(es); + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, + evi_node, es_evi)) { + if (!(es_evi->flags & + ZEBRA_EVPNES_EVI_READY_FOR_BGP)) + continue; + + if (add) + zebra_evpn_es_evi_send_to_client( + es, es_evi->zevpn, + true /* add */); + else + zebra_evpn_es_evi_send_to_client( + es, es_evi->zevpn, + false /* add */); + } + if (!add) + zebra_evpn_es_send_del_to_client(es); + } + } +} + +/* walk the vlan bitmap associated with the zif and create or delete + * es_evis for all vlans associated with a VNI. + * XXX: This API is really expensive. optimize later if possible. + */ +static void zebra_evpn_es_setup_evis(struct zebra_evpn_es *es) +{ + struct zebra_if *zif = es->zif; + uint16_t vid; + struct zebra_evpn_access_bd *acc_bd; + + if (!bf_is_inited(zif->vlan_bitmap)) + return; + + bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + acc_bd = zebra_evpn_acc_vl_find(vid); + if (acc_bd->zevpn) + zebra_evpn_local_es_evi_add(es, acc_bd->zevpn); + } +} + +static void zebra_evpn_flush_local_mac(struct zebra_mac *mac, + struct interface *ifp) +{ + struct zebra_if *zif; + struct interface *br_ifp; + vlanid_t vid; + + zif = ifp->info; + br_ifp = zif->brslave_info.br_if; + if (!br_ifp) + return; + + if (mac->zevpn->vxlan_if) { + zif = mac->zevpn->vxlan_if->info; + vid = zif->l2info.vxl.access_vlan; + } else { + vid = 0; + } + + /* delete the local mac from the dataplane */ + dplane_local_mac_del(ifp, br_ifp, vid, &mac->macaddr); + /* delete the local mac in zebra */ + zebra_evpn_del_local_mac(mac->zevpn, mac, true); +} + +static void zebra_evpn_es_flush_local_macs(struct zebra_evpn_es *es, + struct interface *ifp, bool add) +{ + struct zebra_mac *mac; + struct listnode *node; + struct listnode *nnode; + + for (ALL_LIST_ELEMENTS(es->mac_list, node, nnode, mac)) { + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + continue; + + /* If ES is being attached/detached from the access port we + * need to clear local activity and peer activity and start + * over */ + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("VNI %u mac %pEA update; local ES %s %s", + mac->zevpn->vni, + &mac->macaddr, + es->esi_str, add ? "add" : "del"); + zebra_evpn_flush_local_mac(mac, ifp); + } +} + +void zebra_evpn_es_local_br_port_update(struct zebra_if *zif) +{ + struct zebra_evpn_es *es = zif->es_info.es; + bool old_br_port = !!(es->flags & ZEBRA_EVPNES_BR_PORT); + bool new_br_port; + + if (zif->brslave_info.bridge_ifindex != IFINDEX_INTERNAL) + es->flags |= ZEBRA_EVPNES_BR_PORT; + else + es->flags &= ~ZEBRA_EVPNES_BR_PORT; + + new_br_port = !!(es->flags & ZEBRA_EVPNES_BR_PORT); + if (old_br_port == new_br_port) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s br_port change old %u new %u", es->esi_str, + old_br_port, new_br_port); + + /* update the dataplane br_port attrs */ + if (new_br_port && zebra_evpn_es_br_port_dplane_update_needed(es)) + zebra_evpn_es_br_port_dplane_update(es, __func__); +} + +/* On config of first local-ES turn off DAD */ +static void zebra_evpn_mh_dup_addr_detect_off(void) +{ + struct zebra_vrf *zvrf; + bool old_detect; + bool new_detect; + + if (zmh_info->flags & ZEBRA_EVPN_MH_DUP_ADDR_DETECT_OFF) + return; + + zvrf = zebra_vrf_get_evpn(); + old_detect = zebra_evpn_do_dup_addr_detect(zvrf); + zmh_info->flags |= ZEBRA_EVPN_MH_DUP_ADDR_DETECT_OFF; + new_detect = zebra_evpn_do_dup_addr_detect(zvrf); + + if (old_detect && !new_detect) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "evpn-mh config caused DAD addr detect chg from %s to %s", + old_detect ? "on" : "off", + new_detect ? "on" : "off"); + zebra_vxlan_clear_dup_detect_vni_all(zvrf); + } +} + +/* On config of first local-ES turn off advertisement of STALE/DELAY/PROBE + * neighbors + */ +static void zebra_evpn_mh_advertise_reach_neigh_only(void) +{ + if (zmh_info->flags & ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY) + return; + + zmh_info->flags |= ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("evpn-mh: only REACHABLE neigh advertised"); + + /* XXX - if STALE/DELAY/PROBE neighs were previously advertised we + * need to withdraw them + */ +} + +/* On config of first local-ES turn on advertisement of local SVI-MAC */ +static void zebra_evpn_mh_advertise_svi_mac(void) +{ + if (zmh_info->flags & ZEBRA_EVPN_MH_ADV_SVI_MAC) + return; + + zmh_info->flags |= ZEBRA_EVPN_MH_ADV_SVI_MAC; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("evpn-mh: advertise SVI MAC"); + + /* walk through all SVIs and see if we need to advertise the MAC */ + zebra_evpn_acc_vl_adv_svi_mac_all(); +} + +static void zebra_evpn_es_df_delay_exp_cb(struct thread *t) +{ + struct zebra_evpn_es *es; + + es = THREAD_ARG(t); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s df-delay expired", es->esi_str); + + zebra_evpn_es_run_df_election(es, __func__); +} + +/* currently there is no global config to turn on MH instead we use + * the addition of the first local Ethernet Segment as the trigger to + * init MH specific processing + */ +static void zebra_evpn_mh_on_first_local_es(void) +{ + zebra_evpn_mh_dup_addr_detect_off(); + zebra_evpn_mh_advertise_reach_neigh_only(); + zebra_evpn_mh_advertise_svi_mac(); +} + +static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, + struct zebra_if *zif) +{ + if (es->flags & ZEBRA_EVPNES_LOCAL) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("local es %s add; nhg %u if %s", es->esi_str, + es->nhg_id, zif->ifp->name); + + zebra_evpn_mh_on_first_local_es(); + + es->flags |= ZEBRA_EVPNES_LOCAL; + listnode_init(&es->local_es_listnode, es); + listnode_add(zmh_info->local_es_list, &es->local_es_listnode); + + /* attach es to interface */ + zif->es_info.es = es; + es->df_pref = zif->es_info.df_pref ? zif->es_info.df_pref + : EVPN_MH_DF_PREF_DEFAULT; + + /* attach interface to es */ + es->zif = zif; + if (if_is_operative(zif->ifp)) + es->flags |= ZEBRA_EVPNES_OPER_UP; + + if (zif->brslave_info.bridge_ifindex != IFINDEX_INTERNAL) + es->flags |= ZEBRA_EVPNES_BR_PORT; + + /* inherit the bypass flag from the interface */ + if (zif->flags & ZIF_FLAG_LACP_BYPASS) + es->flags |= ZEBRA_EVPNES_BYPASS; + + /* setup base-vni if one doesn't already exist; the ES will get sent + * to BGP as a part of that process + */ + if (!zmh_info->es_base_evpn) + zebra_evpn_es_get_one_base_evpn(); + else + /* send notification to bgp */ + zebra_evpn_es_re_eval_send_to_client(es, + false /* es_evi_re_reval */); + + /* Start the DF delay timer on the local ES */ + if (!es->df_delay_timer) + thread_add_timer(zrouter.master, zebra_evpn_es_df_delay_exp_cb, + es, ZEBRA_EVPN_MH_DF_DELAY_TIME, + &es->df_delay_timer); + + /* See if the local VTEP can function as DF on the ES */ + if (!zebra_evpn_es_run_df_election(es, __func__)) { + /* check if the dplane entry needs to be re-programmed as a + * result of some thing other than DF status change + */ + if (zebra_evpn_es_br_port_dplane_update_needed(es)) + zebra_evpn_es_br_port_dplane_update(es, __func__); + } + + + /* Setup ES-EVIs for all VxLAN stretched VLANs associated with + * the zif + */ + zebra_evpn_es_setup_evis(es); + /* if there any local macs referring to the ES as dest we + * need to clear the contents and start over + */ + zebra_evpn_es_flush_local_macs(es, zif->ifp, true); + + /* inherit EVPN protodown flags on the access port */ + zebra_evpn_mh_update_protodown_es(es, true /*resync_dplane*/); +} + +static void zebra_evpn_es_local_info_clear(struct zebra_evpn_es **esp) +{ + struct zebra_if *zif; + struct zebra_evpn_es *es = *esp; + bool dplane_updated = false; + + if (!(es->flags & ZEBRA_EVPNES_LOCAL)) + return; + + zif = es->zif; + + /* if there any local macs referring to the ES as dest we + * need to clear the contents and start over + */ + zebra_evpn_es_flush_local_macs(es, zif->ifp, false); + + es->flags &= ~(ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_READY_FOR_BGP); + + THREAD_OFF(es->df_delay_timer); + + /* clear EVPN protodown flags on the access port */ + zebra_evpn_mh_clear_protodown_es(es); + + /* remove the DF filter */ + dplane_updated = zebra_evpn_es_run_df_election(es, __func__); + + /* flush the BUM filters and backup NHG */ + if (!dplane_updated) + zebra_evpn_es_br_port_dplane_clear(es); + + /* clear the es from the parent interface */ + zif->es_info.es = NULL; + es->zif = NULL; + + /* clear all local flags associated with the ES */ + es->flags &= ~(ZEBRA_EVPNES_OPER_UP | ZEBRA_EVPNES_BR_PORT + | ZEBRA_EVPNES_BYPASS); + + /* remove from the ES list */ + list_delete_node(zmh_info->local_es_list, &es->local_es_listnode); + + /* free up the ES if there is no remote reference */ + zebra_evpn_es_free(esp); +} + +/* Delete an ethernet segment and inform BGP */ +static void zebra_evpn_local_es_del(struct zebra_evpn_es **esp) +{ + struct zebra_evpn_es_evi *es_evi; + struct listnode *node = NULL; + struct listnode *nnode = NULL; + struct zebra_if *zif; + struct zebra_evpn_es *es = *esp; + + if (!CHECK_FLAG(es->flags, ZEBRA_EVPNES_LOCAL)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { + zif = es->zif; + zlog_debug("local es %s del; nhg %u if %s", es->esi_str, + es->nhg_id, zif ? zif->ifp->name : "-"); + } + + /* remove all ES-EVIs associated with the ES */ + for (ALL_LIST_ELEMENTS(es->es_evi_list, node, nnode, es_evi)) + zebra_evpn_local_es_evi_do_del(es_evi); + + /* send a del if the ES had been sent to BGP earlier */ + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_del_to_client(es); + + zebra_evpn_es_local_info_clear(esp); +} + +/* eval remote info associated with the ES */ +static void zebra_evpn_es_remote_info_re_eval(struct zebra_evpn_es **esp) +{ + struct zebra_evpn_es *es = *esp; + + /* if there are remote VTEPs the ES-EVI is classified as "remote" */ + if (listcount(es->es_vtep_list)) { + if (!(es->flags & ZEBRA_EVPNES_REMOTE)) { + es->flags |= ZEBRA_EVPNES_REMOTE; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s add; nhg %u", + es->esi_str, es->nhg_id); + } + } else { + if (es->flags & ZEBRA_EVPNES_REMOTE) { + es->flags &= ~ZEBRA_EVPNES_REMOTE; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s del; nhg %u", + es->esi_str, es->nhg_id); + zebra_evpn_es_free(esp); + } + } +} + +/* A new local es is created when a local-es-id and sysmac is configured + * against an interface. + */ +static int zebra_evpn_local_es_update(struct zebra_if *zif, esi_t *esi) +{ + struct zebra_evpn_es *old_es = zif->es_info.es; + struct zebra_evpn_es *es; + + if (old_es && !memcmp(&old_es->esi, esi, sizeof(*esi))) + /* dup - nothing to be done */ + return 0; + + /* release the old_es against the zif */ + if (old_es) + zebra_evpn_local_es_del(&old_es); + + es = zebra_evpn_es_find(esi); + if (es) { + /* if it exists against another interface flag an error */ + if (es->zif && es->zif != zif) + return -1; + } else { + /* create new es */ + es = zebra_evpn_es_new(esi); + } + + memcpy(&zif->es_info.esi, esi, sizeof(*esi)); + if (es) + zebra_evpn_es_local_info_set(es, zif); + + return 0; +} + +static int zebra_evpn_type3_esi_update(struct zebra_if *zif, uint32_t lid, + struct ethaddr *sysmac) +{ + struct zebra_evpn_es *old_es = zif->es_info.es; + esi_t esi; + int offset = 0; + int field_bytes = 0; + + /* Complete config of the ES-ID bootstraps the ES */ + if (!lid || is_zero_mac(sysmac)) { + /* clear old esi */ + memset(&zif->es_info.esi, 0, sizeof(zif->es_info.esi)); + /* if in ES is attached to zif delete it */ + if (old_es) + zebra_evpn_local_es_del(&old_es); + return 0; + } + + /* build 10-byte type-3-ESI - + * Type(1-byte), MAC(6-bytes), ES-LID (3-bytes) + */ + field_bytes = 1; + esi.val[offset] = ESI_TYPE_MAC; + offset += field_bytes; + + field_bytes = ETH_ALEN; + memcpy(&esi.val[offset], (uint8_t *)sysmac, field_bytes); + offset += field_bytes; + + esi.val[offset++] = (uint8_t)(lid >> 16); + esi.val[offset++] = (uint8_t)(lid >> 8); + esi.val[offset++] = (uint8_t)lid; + + return zebra_evpn_local_es_update(zif, &esi); +} + +int zebra_evpn_remote_es_del(const esi_t *esi, struct in_addr vtep_ip) +{ + char buf[ESI_STR_LEN]; + struct zebra_evpn_es *es; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s vtep %pI4 del", + esi_to_str(esi, buf, sizeof(buf)), &vtep_ip); + + es = zebra_evpn_es_find(esi); + if (!es) { + zlog_warn("remote es %s vtep %pI4 del failed, es missing", + esi_to_str(esi, buf, sizeof(buf)), &vtep_ip); + return -1; + } + + zebra_evpn_es_vtep_del(es, vtep_ip); + zebra_evpn_es_remote_info_re_eval(&es); + + return 0; +} + +/* force delete a remote ES on the way down */ +static void zebra_evpn_remote_es_flush(struct zebra_evpn_es **esp) +{ + struct zebra_evpn_es_vtep *es_vtep; + struct listnode *node; + struct listnode *nnode; + struct zebra_evpn_es *es = *esp; + + for (ALL_LIST_ELEMENTS(es->es_vtep_list, node, nnode, es_vtep)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s vtep %pI4 flush", + es->esi_str, + &es_vtep->vtep_ip); + zebra_evpn_es_vtep_free(es_vtep); + } + zebra_evpn_es_remote_info_re_eval(esp); +} + +int zebra_evpn_remote_es_add(const esi_t *esi, struct in_addr vtep_ip, + bool esr_rxed, uint8_t df_alg, uint16_t df_pref) +{ + char buf[ESI_STR_LEN]; + struct zebra_evpn_es *es; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s vtep %pI4 add %s df_alg %d df_pref %d", + esi_to_str(esi, buf, sizeof(buf)), + &vtep_ip, esr_rxed ? "esr" : "", df_alg, + df_pref); + + es = zebra_evpn_es_find(esi); + if (!es) { + es = zebra_evpn_es_new(esi); + if (!es) { + zlog_warn( + "remote es %s vtep %pI4 add failed, es missing", + esi_to_str(esi, buf, sizeof(buf)), &vtep_ip); + return -1; + } + } + + if (df_alg != EVPN_MH_DF_ALG_PREF) + zlog_warn( + "remote es %s vtep %pI4 add %s with unsupported df_alg %d", + esi_to_str(esi, buf, sizeof(buf)), &vtep_ip, + esr_rxed ? "esr" : "", df_alg); + + zebra_evpn_es_vtep_add(es, vtep_ip, esr_rxed, df_alg, df_pref); + zebra_evpn_es_remote_info_re_eval(&es); + + return 0; +} + +void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct in_addr vtep_ip; + esi_t esi; + + if (!is_evpn_enabled()) { + zlog_debug( + "%s: EVPN not enabled yet we received a es_add zapi call", + __func__); + return; + } + + memset(&esi, 0, sizeof(esi_t)); + s = msg; + + STREAM_GET(&esi, s, sizeof(esi_t)); + STREAM_GET(&vtep_ip.s_addr, s, sizeof(vtep_ip.s_addr)); + + if (hdr->command == ZEBRA_REMOTE_ES_VTEP_ADD) { + uint32_t zapi_flags; + uint8_t df_alg; + uint16_t df_pref; + bool esr_rxed; + + STREAM_GETL(s, zapi_flags); + esr_rxed = (zapi_flags & ZAPI_ES_VTEP_FLAG_ESR_RXED) ? true + : false; + STREAM_GETC(s, df_alg); + STREAM_GETW(s, df_pref); + zebra_rib_queue_evpn_rem_es_add(&esi, &vtep_ip, esr_rxed, + df_alg, df_pref); + } else { + zebra_rib_queue_evpn_rem_es_del(&esi, &vtep_ip); + } + +stream_failure: + return; +} + +void zebra_evpn_es_mac_deref_entry(struct zebra_mac *mac) +{ + struct zebra_evpn_es *es = mac->es; + + mac->es = NULL; + if (!es) + return; + + list_delete_node(es->mac_list, &mac->es_listnode); + if (!listcount(es->mac_list)) + zebra_evpn_es_free(&es); +} + +/* Associate a MAC entry with a local or remote ES. Returns false if there + * was no ES change. + */ +bool zebra_evpn_es_mac_ref_entry(struct zebra_mac *mac, + struct zebra_evpn_es *es) +{ + if (mac->es == es) + return false; + + if (mac->es) + zebra_evpn_es_mac_deref_entry(mac); + + if (!es) + return true; + + mac->es = es; + listnode_init(&mac->es_listnode, mac); + listnode_add(es->mac_list, &mac->es_listnode); + + return true; +} + +bool zebra_evpn_es_mac_ref(struct zebra_mac *mac, const esi_t *esi) +{ + struct zebra_evpn_es *es; + + es = zebra_evpn_es_find(esi); + if (!es) { + /* If non-zero esi implicitly create a new ES */ + if (memcmp(esi, zero_esi, sizeof(esi_t))) { + es = zebra_evpn_es_new(esi); + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("auto es %s add on mac ref", + es->esi_str); + } + } + + return zebra_evpn_es_mac_ref_entry(mac, es); +} + +/* Inform BGP about local ES-EVI add or del */ +static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, + struct zebra_evpn *zevpn, bool add) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, + add ? ZEBRA_LOCAL_ES_EVI_ADD : ZEBRA_LOCAL_ES_EVI_DEL, + zebra_vrf_get_evpn_id()); + stream_put(s, &es->esi, sizeof(esi_t)); + stream_putl(s, zevpn->vni); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("send %s local es %s evi %u to %s", + add ? "add" : "del", + es->esi_str, zevpn->vni, + zebra_route_string(client->proto)); + + client->local_es_add_cnt++; + return zserv_send_message(client, s); +} + +/* sysmac part of a local ESI has changed */ +static int zebra_evpn_es_sys_mac_update(struct zebra_if *zif, + struct ethaddr *sysmac) +{ + int rv; + + rv = zebra_evpn_type3_esi_update(zif, zif->es_info.lid, sysmac); + if (!rv) + memcpy(&zif->es_info.sysmac, sysmac, sizeof(struct ethaddr)); + + return rv; +} + +/* local-ID part of ESI has changed */ +static int zebra_evpn_es_lid_update(struct zebra_if *zif, uint32_t lid) +{ + int rv; + + rv = zebra_evpn_type3_esi_update(zif, lid, &zif->es_info.sysmac); + if (!rv) + zif->es_info.lid = lid; + + return rv; +} + +/* type-0 esi has changed */ +static int zebra_evpn_es_type0_esi_update(struct zebra_if *zif, esi_t *esi) +{ + int rv; + + rv = zebra_evpn_local_es_update(zif, esi); + + /* clear the old es_lid, es_sysmac - type-0 is being set so old + * type-3 params need to be flushed + */ + memset(&zif->es_info.sysmac, 0, sizeof(struct ethaddr)); + zif->es_info.lid = 0; + + return rv; +} + +void zebra_evpn_es_cleanup(void) +{ + struct zebra_evpn_es *es; + struct zebra_evpn_es *es_next; + + RB_FOREACH_SAFE(es, zebra_es_rb_head, + &zmh_info->es_rb_tree, es_next) { + zebra_evpn_local_es_del(&es); + if (es) + zebra_evpn_remote_es_flush(&es); + } +} + +static void zebra_evpn_es_df_pref_update(struct zebra_if *zif, uint16_t df_pref) +{ + struct zebra_evpn_es *es; + uint16_t tmp_pref; + + if (zif->es_info.df_pref == df_pref) + return; + + zif->es_info.df_pref = df_pref; + es = zif->es_info.es; + + if (!es) + return; + + tmp_pref = zif->es_info.df_pref ? zif->es_info.df_pref + : EVPN_MH_DF_PREF_DEFAULT; + + if (es->df_pref == tmp_pref) + return; + + es->df_pref = tmp_pref; + /* run df election */ + zebra_evpn_es_run_df_election(es, __func__); + /* notify bgp */ + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_add_to_client(es); +} + +/* If bypass mode on an es changed we set all local macs to + * inactive and drop the sync info + */ +static void zebra_evpn_es_bypass_update_macs(struct zebra_evpn_es *es, + struct interface *ifp, bool bypass) +{ + struct zebra_mac *mac; + struct listnode *node; + struct listnode *nnode; + struct zebra_if *zif; + + /* Flush all MACs linked to the ES */ + for (ALL_LIST_ELEMENTS(es->mac_list, node, nnode, mac)) { + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + continue; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("VNI %u mac %pEA %s update es %s", + mac->zevpn->vni, + &mac->macaddr, + bypass ? "bypass" : "non-bypass", + es->esi_str); + zebra_evpn_flush_local_mac(mac, ifp); + } + + /* While in bypass-mode locally learnt MACs are linked + * to the access port instead of the ES + */ + zif = ifp->info; + if (!zif->mac_list) + return; + + for (ALL_LIST_ELEMENTS(zif->mac_list, node, nnode, mac)) { + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + continue; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("VNI %u mac %pEA %s update ifp %s", + mac->zevpn->vni, + &mac->macaddr, + bypass ? "bypass" : "non-bypass", ifp->name); + zebra_evpn_flush_local_mac(mac, ifp); + } +} + +void zebra_evpn_es_bypass_update(struct zebra_evpn_es *es, + struct interface *ifp, bool bypass) +{ + bool old_bypass; + bool dplane_updated; + + old_bypass = !!(es->flags & ZEBRA_EVPNES_BYPASS); + if (old_bypass == bypass) + return; + + if (bypass) + es->flags |= ZEBRA_EVPNES_BYPASS; + else + es->flags &= ~ZEBRA_EVPNES_BYPASS; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("bond %s es %s lacp bypass changed to %s", ifp->name, + es->esi_str, bypass ? "on" : "off"); + + /* send bypass update to BGP */ + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_add_to_client(es); + + zebra_evpn_es_bypass_update_macs(es, ifp, bypass); + + /* re-run DF election */ + dplane_updated = zebra_evpn_es_run_df_election(es, __func__); + + /* disable SPH filter */ + if (!dplane_updated && (es->flags & ZEBRA_EVPNES_LOCAL) + && (listcount(es->es_vtep_list) > ES_VTEP_MAX_CNT)) + zebra_evpn_es_br_port_dplane_update(es, __func__); +} + +static void zebra_evpn_es_bypass_cfg_update(struct zebra_if *zif, bool bypass) +{ + bool old_bypass = !!(zif->es_info.flags & ZIF_CFG_ES_FLAG_BYPASS); + + if (old_bypass == bypass) + return; + + if (bypass) + zif->es_info.flags |= ZIF_CFG_ES_FLAG_BYPASS; + else + zif->es_info.flags &= ~ZIF_CFG_ES_FLAG_BYPASS; + + + if (zif->es_info.es) + zebra_evpn_es_bypass_update(zif->es_info.es, zif->ifp, bypass); +} + + +/* Only certain types of access ports can be setup as an Ethernet Segment */ +bool zebra_evpn_is_if_es_capable(struct zebra_if *zif) +{ + if (zif->zif_type == ZEBRA_IF_BOND) + return true; + + /* relax the checks to allow config to be applied in zebra + * before interface is rxed from the kernel + */ + if (zif->ifp->ifindex == IFINDEX_INTERNAL) + return true; + + /* XXX: allow swpX i.e. a regular ethernet port to be an ES link too */ + return false; +} + +void zebra_evpn_if_es_print(struct vty *vty, json_object *json, + struct zebra_if *zif) +{ + char buf[ETHER_ADDR_STRLEN]; + char esi_buf[ESI_STR_LEN]; + + if (json) { + json_object *json_evpn; + + json_evpn = json_object_new_object(); + json_object_object_add(json, "evpnMh", json_evpn); + + if (zif->es_info.lid || !is_zero_mac(&zif->es_info.sysmac)) { + json_object_int_add(json_evpn, "esId", + zif->es_info.lid); + json_object_string_add( + json_evpn, "esSysmac", + prefix_mac2str(&zif->es_info.sysmac, buf, + sizeof(buf))); + } else if (memcmp(&zif->es_info.esi, zero_esi, + sizeof(*zero_esi))) { + json_object_string_add(json_evpn, "esId", + esi_to_str(&zif->es_info.esi, + esi_buf, + sizeof(esi_buf))); + } + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + json_object_string_add( + json_evpn, "uplink", + CHECK_FLAG(zif->flags, + ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP) + ? "up" + : "down"); + } else { + char mh_buf[80]; + bool vty_print = false; + + mh_buf[0] = '\0'; + strlcat(mh_buf, " EVPN-MH:", sizeof(mh_buf)); + if (zif->es_info.lid || !is_zero_mac(&zif->es_info.sysmac)) { + vty_print = true; + snprintf(mh_buf + strlen(mh_buf), + sizeof(mh_buf) - strlen(mh_buf), + " ES id %u ES sysmac %s", zif->es_info.lid, + prefix_mac2str(&zif->es_info.sysmac, buf, + sizeof(buf))); + } else if (memcmp(&zif->es_info.esi, zero_esi, + sizeof(*zero_esi))) { + vty_print = true; + snprintf(mh_buf + strnlen(mh_buf, sizeof(mh_buf)), + sizeof(mh_buf) + - strnlen(mh_buf, sizeof(mh_buf)), + " ES id %s", + esi_to_str(&zif->es_info.esi, esi_buf, + sizeof(esi_buf))); + } + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) { + vty_print = true; + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP) + strlcat(mh_buf, " uplink (up)", sizeof(mh_buf)); + else + strlcat(mh_buf, " uplink (down)", + sizeof(mh_buf)); + } + + if (vty_print) + vty_out(vty, "%s\n", mh_buf); + } +} + +static void zebra_evpn_local_mac_oper_state_change(struct zebra_evpn_es *es) +{ + struct zebra_mac *mac; + struct listnode *node; + + /* If fast-failover is supported by the dataplane via the use + * of an ES backup NHG there is nothing to be done in the + * control plane + */ + if (!(zmh_info->flags & ZEBRA_EVPN_MH_REDIRECT_OFF)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("mac slow-fail on es %s %s ", es->esi_str, + (es->flags & ZEBRA_EVPNES_OPER_UP) ? "up" : "down"); + + for (ALL_LIST_ELEMENTS_RO(es->mac_list, node, mac)) { + if (!(mac->flags & ZEBRA_MAC_LOCAL) + || !zebra_evpn_mac_is_static(mac)) + continue; + + if (es->flags & ZEBRA_EVPNES_OPER_UP) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "VNI %u mac %pEA move to acc %s es %s %s ", + mac->zevpn->vni, + &mac->macaddr, + es->zif->ifp->name, es->esi_str, + (es->flags & ZEBRA_EVPNES_OPER_UP) + ? "up" + : "down"); + /* switch the local macs to access port */ + if (zebra_evpn_sync_mac_dp_install( + mac, false /*set_inactive*/, + false /*force_clear_static*/, __func__) + < 0) + /* if the local mac install fails get rid of the + * old rem entry + */ + zebra_evpn_rem_mac_uninstall(mac->zevpn, mac, + true /*force*/); + } else { + /* switch the local macs to network port. if there + * is no active NHG we don't bother deleting the MAC; + * that is left up to the dataplane to handle. + */ + if (!(es->flags & ZEBRA_EVPNES_NHG_ACTIVE)) + continue; + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "VNI %u mac %pEA move to nhg %u es %s %s ", + mac->zevpn->vni, + &mac->macaddr, + es->nhg_id, es->esi_str, + (es->flags & ZEBRA_EVPNES_OPER_UP) + ? "up" + : "down"); + zebra_evpn_rem_mac_install(mac->zevpn, mac, + true /*was_static*/); + } + } +} + +void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up) +{ + struct zebra_evpn_es *es = zif->es_info.es; + bool old_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP); + + if (old_up == up) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s state changed to %s ", + es->esi_str, + up ? "up" : "down"); + if (up) + es->flags |= ZEBRA_EVPNES_OPER_UP; + else + es->flags &= ~ZEBRA_EVPNES_OPER_UP; + + zebra_evpn_es_run_df_election(es, __func__); + zebra_evpn_local_mac_oper_state_change(es); + + /* inform BGP of the ES oper state change */ + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_add_to_client(es); +} + +static char *zebra_evpn_es_vtep_str(char *vtep_str, struct zebra_evpn_es *es, + uint8_t vtep_str_size) +{ + struct zebra_evpn_es_vtep *zvtep; + struct listnode *node; + bool first = true; + char ip_buf[INET6_ADDRSTRLEN]; + + vtep_str[0] = '\0'; + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep)) { + if (first) { + first = false; + strlcat(vtep_str, + inet_ntop(AF_INET, &zvtep->vtep_ip, ip_buf, + sizeof(ip_buf)), + vtep_str_size); + } else { + strlcat(vtep_str, ",", vtep_str_size); + strlcat(vtep_str, + inet_ntop(AF_INET, &zvtep->vtep_ip, ip_buf, + sizeof(ip_buf)), + vtep_str_size); + } + } + return vtep_str; +} + +static void zebra_evpn_es_json_vtep_fill(struct zebra_evpn_es *es, + json_object *json_vteps) +{ + struct zebra_evpn_es_vtep *es_vtep; + struct listnode *node; + json_object *json_vtep_entry; + char alg_buf[EVPN_DF_ALG_STR_LEN]; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + json_vtep_entry = json_object_new_object(); + json_object_string_addf(json_vtep_entry, "vtep", "%pI4", + &es_vtep->vtep_ip); + if (es_vtep->flags & ZEBRA_EVPNES_VTEP_RXED_ESR) { + json_object_string_add( + json_vtep_entry, "dfAlgorithm", + evpn_es_df_alg2str(es_vtep->df_alg, alg_buf, + sizeof(alg_buf))); + json_object_int_add(json_vtep_entry, "dfPreference", + es_vtep->df_pref); + } + if (es_vtep->nh) + json_object_int_add(json_vtep_entry, "nexthopId", + es_vtep->nh->nh_id); + json_object_array_add(json_vteps, json_vtep_entry); + } +} + +static void zebra_evpn_es_show_entry(struct vty *vty, struct zebra_evpn_es *es, + json_object *json_array) +{ + char type_str[5]; + char vtep_str[ES_VTEP_LIST_STR_SZ]; + + if (json_array) { + json_object *json = NULL; + json_object *json_vteps; + json_object *json_flags; + + json = json_object_new_object(); + json_object_string_add(json, "esi", es->esi_str); + + if (es->flags + & (ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_REMOTE + | ZEBRA_EVPNES_NON_DF)) { + json_flags = json_object_new_array(); + if (es->flags & ZEBRA_EVPNES_LOCAL) + json_array_string_add(json_flags, "local"); + if (es->flags & ZEBRA_EVPNES_REMOTE) + json_array_string_add(json_flags, "remote"); + if (es->flags & ZEBRA_EVPNES_NON_DF) + json_array_string_add(json_flags, "nonDF"); + if (es->flags & ZEBRA_EVPNES_BYPASS) + json_array_string_add(json_flags, "bypass"); + json_object_object_add(json, "flags", json_flags); + } + + if (es->zif) + json_object_string_add(json, "accessPort", + es->zif->ifp->name); + + if (listcount(es->es_vtep_list)) { + json_vteps = json_object_new_array(); + zebra_evpn_es_json_vtep_fill(es, json_vteps); + json_object_object_add(json, "vteps", json_vteps); + } + json_object_array_add(json_array, json); + } else { + type_str[0] = '\0'; + if (es->flags & ZEBRA_EVPNES_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + if (es->flags & ZEBRA_EVPNES_REMOTE) + strlcat(type_str, "R", sizeof(type_str)); + if (es->flags & ZEBRA_EVPNES_NON_DF) + strlcat(type_str, "N", sizeof(type_str)); + if (es->flags & ZEBRA_EVPNES_BYPASS) + strlcat(type_str, "B", sizeof(type_str)); + + zebra_evpn_es_vtep_str(vtep_str, es, sizeof(vtep_str)); + + vty_out(vty, "%-30s %-4s %-21s %s\n", + es->esi_str, type_str, + es->zif ? es->zif->ifp->name : "-", + vtep_str); + } +} + +static void zebra_evpn_es_show_entry_detail(struct vty *vty, + struct zebra_evpn_es *es, json_object *json) +{ + char type_str[80]; + char alg_buf[EVPN_DF_ALG_STR_LEN]; + struct zebra_evpn_es_vtep *es_vtep; + struct listnode *node; + char thread_buf[THREAD_TIMER_STRLEN]; + + if (json) { + json_object *json_vteps; + json_object *json_flags; + + json_object_string_add(json, "esi", es->esi_str); + if (es->zif) + json_object_string_add(json, "accessPort", + es->zif->ifp->name); + + + if (es->flags) { + json_flags = json_object_new_array(); + if (es->flags & ZEBRA_EVPNES_LOCAL) + json_array_string_add(json_flags, "local"); + if (es->flags & ZEBRA_EVPNES_REMOTE) + json_array_string_add(json_flags, "remote"); + if (es->flags & ZEBRA_EVPNES_NON_DF) + json_array_string_add(json_flags, "nonDF"); + if (es->flags & ZEBRA_EVPNES_BYPASS) + json_array_string_add(json_flags, "bypass"); + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + json_array_string_add(json_flags, + "readyForBgp"); + if (es->flags & ZEBRA_EVPNES_BR_PORT) + json_array_string_add(json_flags, "bridgePort"); + if (es->flags & ZEBRA_EVPNES_OPER_UP) + json_array_string_add(json_flags, "operUp"); + if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) + json_array_string_add(json_flags, + "nexthopGroupActive"); + json_object_object_add(json, "flags", json_flags); + } + + json_object_int_add(json, "vniCount", + listcount(es->es_evi_list)); + json_object_int_add(json, "macCount", listcount(es->mac_list)); + json_object_int_add(json, "dfPreference", es->df_pref); + if (es->df_delay_timer) + json_object_string_add( + json, "dfDelayTimer", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + es->df_delay_timer)); + json_object_int_add(json, "nexthopGroup", es->nhg_id); + if (listcount(es->es_vtep_list)) { + json_vteps = json_object_new_array(); + zebra_evpn_es_json_vtep_fill(es, json_vteps); + json_object_object_add(json, "vteps", json_vteps); + } + } else { + type_str[0] = '\0'; + if (es->flags & ZEBRA_EVPNES_LOCAL) + strlcat(type_str, "Local", sizeof(type_str)); + if (es->flags & ZEBRA_EVPNES_REMOTE) { + if (strnlen(type_str, sizeof(type_str))) + strlcat(type_str, ",", sizeof(type_str)); + strlcat(type_str, "Remote", sizeof(type_str)); + } + + vty_out(vty, "ESI: %s\n", es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " Interface: %s\n", + (es->zif) ? + es->zif->ifp->name : "-"); + if (es->flags & ZEBRA_EVPNES_LOCAL) { + vty_out(vty, " State: %s\n", + (es->flags & ZEBRA_EVPNES_OPER_UP) ? "up" + : "down"); + vty_out(vty, " Bridge port: %s\n", + (es->flags & ZEBRA_EVPNES_BR_PORT) ? "yes" + : "no"); + } + vty_out(vty, " Ready for BGP: %s\n", + (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) ? + "yes" : "no"); + if (es->flags & ZEBRA_EVPNES_BYPASS) + vty_out(vty, " LACP bypass: on\n"); + vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list)); + vty_out(vty, " MAC Count: %d\n", listcount(es->mac_list)); + if (es->flags & ZEBRA_EVPNES_LOCAL) + vty_out(vty, " DF status: %s \n", + (es->flags & ZEBRA_EVPNES_NON_DF) ? "non-df" + : "df"); + if (es->df_delay_timer) + vty_out(vty, " DF delay: %s\n", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + es->df_delay_timer)); + vty_out(vty, " DF preference: %u\n", es->df_pref); + vty_out(vty, " Nexthop group: %u\n", es->nhg_id); + vty_out(vty, " VTEPs:\n"); + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + vty_out(vty, " %pI4", + &es_vtep->vtep_ip); + if (es_vtep->flags & ZEBRA_EVPNES_VTEP_RXED_ESR) + vty_out(vty, " df_alg: %s df_pref: %d", + evpn_es_df_alg2str(es_vtep->df_alg, + alg_buf, + sizeof(alg_buf)), + es_vtep->df_pref); + vty_out(vty, " nh: %u\n", + es_vtep->nh ? es_vtep->nh->nh_id : 0); + } + + vty_out(vty, "\n"); + } +} + +void zebra_evpn_es_show(struct vty *vty, bool uj) +{ + struct zebra_evpn_es *es; + json_object *json_array = NULL; + + if (uj) { + json_array = json_object_new_array(); + } else { + vty_out(vty, "Type: B bypass, L local, R remote, N non-DF\n"); + vty_out(vty, "%-30s %-4s %-21s %s\n", + "ESI", "Type", "ES-IF", "VTEPs"); + } + + RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree) + zebra_evpn_es_show_entry(vty, es, json_array); + + if (uj) + vty_json(vty, json_array); +} + +void zebra_evpn_es_show_detail(struct vty *vty, bool uj) +{ + struct zebra_evpn_es *es; + json_object *json_array = NULL; + + if (uj) + json_array = json_object_new_array(); + + RB_FOREACH (es, zebra_es_rb_head, &zmh_info->es_rb_tree) { + json_object *json = NULL; + + if (uj) + json = json_object_new_object(); + zebra_evpn_es_show_entry_detail(vty, es, json); + if (uj) + json_object_array_add(json_array, json); + } + + if (uj) + vty_json(vty, json_array); +} + +void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi) +{ + struct zebra_evpn_es *es; + char esi_str[ESI_STR_LEN]; + json_object *json = NULL; + + if (uj) + json = json_object_new_object(); + + es = zebra_evpn_es_find(esi); + + if (es) { + zebra_evpn_es_show_entry_detail(vty, es, json); + } else { + if (!uj) { + esi_to_str(esi, esi_str, sizeof(esi_str)); + vty_out(vty, "ESI %s does not exist\n", esi_str); + } + } + + if (uj) + vty_json(vty, json); +} + +int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + char buf[ETHER_ADDR_STRLEN]; + bool type_3_esi = false; + char esi_buf[ESI_STR_LEN]; + + if (zif->es_info.lid) { + vty_out(vty, " evpn mh es-id %u\n", zif->es_info.lid); + type_3_esi = true; + } + + if (!is_zero_mac(&zif->es_info.sysmac)) { + vty_out(vty, " evpn mh es-sys-mac %s\n", + prefix_mac2str(&zif->es_info.sysmac, + buf, sizeof(buf))); + type_3_esi = true; + } + + if (!type_3_esi + && memcmp(&zif->es_info.esi, zero_esi, sizeof(*zero_esi))) + vty_out(vty, " evpn mh es-id %s\n", + esi_to_str(&zif->es_info.esi, esi_buf, sizeof(esi_buf))); + + if (zif->es_info.df_pref) + vty_out(vty, " evpn mh es-df-pref %u\n", zif->es_info.df_pref); + + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + vty_out(vty, " evpn mh uplink\n"); + + return 0; +} + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_evpn_mh_clippy.c" +#endif +/* CLI for setting an ES in bypass mode */ +DEFPY_HIDDEN(zebra_evpn_es_bypass, zebra_evpn_es_bypass_cmd, + "[no] evpn mh bypass", + NO_STR "EVPN\n" EVPN_MH_VTY_STR "set bypass mode\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + + zif = ifp->info; + + if (no) { + zebra_evpn_es_bypass_cfg_update(zif, false); + } else { + if (!zebra_evpn_is_if_es_capable(zif)) { + vty_out(vty, + "%% DF bypass cannot be associated with this interface type\n"); + return CMD_WARNING; + } + zebra_evpn_es_bypass_cfg_update(zif, true); + } + return CMD_SUCCESS; +} + +/* CLI for configuring DF preference part for an ES */ +DEFPY(zebra_evpn_es_pref, zebra_evpn_es_pref_cmd, + "[no$no] evpn mh es-df-pref [(1-65535)$df_pref]", + NO_STR "EVPN\n" EVPN_MH_VTY_STR + "preference value used for DF election\n" + "pref\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + + zif = ifp->info; + + if (no) { + zebra_evpn_es_df_pref_update(zif, 0); + } else { + if (!zebra_evpn_is_if_es_capable(zif)) { + vty_out(vty, + "%% DF preference cannot be associated with this interface type\n"); + return CMD_WARNING; + } + zebra_evpn_es_df_pref_update(zif, df_pref); + } + return CMD_SUCCESS; +} + +/* CLI for setting up sysmac part of ESI on an access port */ +DEFPY(zebra_evpn_es_sys_mac, + zebra_evpn_es_sys_mac_cmd, + "[no$no] evpn mh es-sys-mac [X:X:X:X:X:X$mac]", + NO_STR + "EVPN\n" + EVPN_MH_VTY_STR + "Ethernet segment system MAC\n" + MAC_STR +) +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + int ret = 0; + + zif = ifp->info; + + if (no) { + static struct ethaddr zero_mac; + + ret = zebra_evpn_es_sys_mac_update(zif, &zero_mac); + if (ret == -1) { + vty_out(vty, "%% Failed to clear ES sysmac\n"); + return CMD_WARNING; + } + } else { + + if (!zebra_evpn_is_if_es_capable(zif)) { + vty_out(vty, + "%% ESI cannot be associated with this interface type\n"); + return CMD_WARNING; + } + + if (!mac || is_zero_mac(&mac->eth_addr)) { + vty_out(vty, "%% ES sysmac value is invalid\n"); + return CMD_WARNING; + } + + ret = zebra_evpn_es_sys_mac_update(zif, &mac->eth_addr); + if (ret == -1) { + vty_out(vty, + "%% ESI already exists on a different interface\n"); + return CMD_WARNING; + } + } + return CMD_SUCCESS; +} + +/* CLI for setting up local-ID part of ESI on an access port */ +DEFPY(zebra_evpn_es_id, + zebra_evpn_es_id_cmd, + "[no$no] evpn mh es-id [(1-16777215)$es_lid | NAME$esi_str]", + NO_STR + "EVPN\n" + EVPN_MH_VTY_STR + "Ethernet segment identifier\n" + "local discriminator\n" + "10-byte ID - 00:AA:BB:CC:DD:EE:FF:GG:HH:II\n" +) +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + int ret = 0; + esi_t esi; + + zif = ifp->info; + + if (no) { + if (zif->es_info.lid) + ret = zebra_evpn_es_lid_update(zif, 0); + else if (memcmp(&zif->es_info.esi, zero_esi, sizeof(*zero_esi))) + ret = zebra_evpn_es_type0_esi_update(zif, zero_esi); + + if (ret == -1) { + vty_out(vty, + "%% Failed to clear ES local id or ESI name\n"); + return CMD_WARNING; + } + } else { + if (!zebra_evpn_is_if_es_capable(zif)) { + vty_out(vty, + "%% ESI cannot be associated with this interface type\n"); + return CMD_WARNING; + } + + if (esi_str) { + if (!str_to_esi(esi_str, &esi)) { + vty_out(vty, "%% Malformed ESI name\n"); + return CMD_WARNING; + } + ret = zebra_evpn_es_type0_esi_update(zif, &esi); + } else { + if (!es_lid) { + vty_out(vty, + "%% Specify ES local id or ESI name\n"); + return CMD_WARNING; + } + ret = zebra_evpn_es_lid_update(zif, es_lid); + } + + if (ret == -1) { + vty_out(vty, + "%% ESI already exists on a different interface\n"); + return CMD_WARNING; + } + } + return CMD_SUCCESS; +} + +/* CLI for tagging an interface as an uplink */ +DEFPY(zebra_evpn_mh_uplink, zebra_evpn_mh_uplink_cmd, "[no] evpn mh uplink", + NO_STR "EVPN\n" EVPN_MH_VTY_STR "uplink to the VxLAN core\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + + zif = ifp->info; + zebra_evpn_mh_uplink_cfg_update(zif, no ? false : true); + + return CMD_SUCCESS; +} + +void zebra_evpn_mh_json(json_object *json) +{ + json_object *json_array; + char thread_buf[THREAD_TIMER_STRLEN]; + + json_object_int_add(json, "macHoldtime", zmh_info->mac_hold_time); + json_object_int_add(json, "neighHoldtime", zmh_info->neigh_hold_time); + json_object_int_add(json, "startupDelay", zmh_info->startup_delay_time); + json_object_string_add( + json, "startupDelayTimer", + thread_timer_to_hhmmss(thread_buf, sizeof(thread_buf), + zmh_info->startup_delay_timer)); + json_object_int_add(json, "uplinkConfigCount", + zmh_info->uplink_cfg_cnt); + json_object_int_add(json, "uplinkActiveCount", + zmh_info->uplink_oper_up_cnt); + + if (zmh_info->protodown_rc) { + json_array = json_object_new_array(); + if (CHECK_FLAG(zmh_info->protodown_rc, + ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY)) + json_object_array_add( + json_array, + json_object_new_string("startupDelay")); + if (CHECK_FLAG(zmh_info->protodown_rc, + ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN)) + json_object_array_add( + json_array, + json_object_new_string("uplinkDown")); + json_object_object_add(json, "protodownReasons", json_array); + } +} + +void zebra_evpn_mh_print(struct vty *vty) +{ + char pd_buf[ZEBRA_PROTODOWN_RC_STR_LEN]; + char thread_buf[THREAD_TIMER_STRLEN]; + + vty_out(vty, "EVPN MH:\n"); + vty_out(vty, " mac-holdtime: %ds, neigh-holdtime: %ds\n", + zmh_info->mac_hold_time, zmh_info->neigh_hold_time); + vty_out(vty, " startup-delay: %ds, start-delay-timer: %s\n", + zmh_info->startup_delay_time, + thread_timer_to_hhmmss(thread_buf, sizeof(thread_buf), + zmh_info->startup_delay_timer)); + vty_out(vty, " uplink-cfg-cnt: %u, uplink-active-cnt: %u\n", + zmh_info->uplink_cfg_cnt, zmh_info->uplink_oper_up_cnt); + if (zmh_info->protodown_rc) + vty_out(vty, " protodown reasons: %s\n", + zebra_protodown_rc_str(zmh_info->protodown_rc, pd_buf, + sizeof(pd_buf))); +} + +/*****************************************************************************/ +/* A base L2-VNI is maintained to derive parameters such as ES originator-IP. + * XXX: once single vxlan device model becomes available this will not be + * necessary + */ +/* called when a new vni is added or becomes oper up or becomes a bridge port */ +void zebra_evpn_es_set_base_evpn(struct zebra_evpn *zevpn) +{ + struct listnode *node; + struct zebra_evpn_es *es; + + if (zmh_info->es_base_evpn) { + if (zmh_info->es_base_evpn != zevpn) { + /* unrelated EVPN; ignore it */ + return; + } + /* check if the local vtep-ip has changed */ + } else { + /* check if the EVPN can be used as base EVPN */ + if (!zebra_evpn_send_to_client_ok(zevpn)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es base vni set to %d", + zevpn->vni); + zmh_info->es_base_evpn = zevpn; + } + + /* update local VTEP-IP */ + if (zmh_info->es_originator_ip.s_addr == + zmh_info->es_base_evpn->local_vtep_ip.s_addr) + return; + + zmh_info->es_originator_ip.s_addr = + zmh_info->es_base_evpn->local_vtep_ip.s_addr; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es originator ip set to %pI4", + &zmh_info->es_base_evpn->local_vtep_ip); + + /* if originator ip changes we need to update bgp */ + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) { + zebra_evpn_es_run_df_election(es, __func__); + + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_add_to_client(es); + else + zebra_evpn_es_re_eval_send_to_client(es, + true /* es_evi_re_reval */); + } +} + +/* called when a vni is removed or becomes oper down or is removed from a + * bridge + */ +void zebra_evpn_es_clear_base_evpn(struct zebra_evpn *zevpn) +{ + struct listnode *node; + struct zebra_evpn_es *es; + + if (zmh_info->es_base_evpn != zevpn) + return; + + zmh_info->es_base_evpn = NULL; + /* lost current base EVPN; try to find a new one */ + zebra_evpn_es_get_one_base_evpn(); + + /* couldn't locate an eligible base evpn */ + if (!zmh_info->es_base_evpn && zmh_info->es_originator_ip.s_addr) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es originator ip cleared"); + + zmh_info->es_originator_ip.s_addr = 0; + /* lost originator ip */ + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) { + zebra_evpn_es_re_eval_send_to_client(es, + true /* es_evi_re_reval */); + } + } +} + +/* Locate an "eligible" L2-VNI to follow */ +static int zebra_evpn_es_get_one_base_evpn_cb(struct hash_bucket *b, void *data) +{ + struct zebra_evpn *zevpn = b->data; + + zebra_evpn_es_set_base_evpn(zevpn); + + if (zmh_info->es_base_evpn) + return HASHWALK_ABORT; + + return HASHWALK_CONTINUE; +} + +/* locate a base_evpn to follow for the purposes of common params like + * originator IP + */ +static void zebra_evpn_es_get_one_base_evpn(void) +{ + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + hash_walk(zvrf->evpn_table, zebra_evpn_es_get_one_base_evpn_cb, NULL); +} + +/***************************************************************************** + * local ethernet segments can be error-disabled if the switch is not + * ready to start transmitting traffic via the VxLAN overlay + */ +bool zebra_evpn_is_es_bond(struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + + return !!(struct zebra_if *)zif->es_info.es; +} + +bool zebra_evpn_is_es_bond_member(struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + + return IS_ZEBRA_IF_BOND_SLAVE(zif->ifp) && zif->bondslave_info.bond_if + && ((struct zebra_if *)zif->bondslave_info.bond_if->info) + ->es_info.es; +} + +void zebra_evpn_mh_update_protodown_bond_mbr(struct zebra_if *zif, bool clear, + const char *caller) +{ + bool new_protodown; + uint32_t old_protodown_rc = 0; + uint32_t new_protodown_rc = 0; + uint32_t protodown_rc = 0; + + if (!clear) { + struct zebra_if *bond_zif; + + bond_zif = zif->bondslave_info.bond_if->info; + protodown_rc = bond_zif->protodown_rc; + } + + old_protodown_rc = zif->protodown_rc; + new_protodown_rc = (old_protodown_rc & ~ZEBRA_PROTODOWN_EVPN_ALL); + new_protodown_rc |= (protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL); + new_protodown = !!new_protodown_rc; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES && (new_protodown_rc != old_protodown_rc)) + zlog_debug( + "%s bond mbr %s protodown_rc changed; old 0x%x new 0x%x", + caller, zif->ifp->name, old_protodown_rc, + new_protodown_rc); + + if (zebra_if_update_protodown_rc(zif->ifp, new_protodown, + new_protodown_rc) == 0) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("%s protodown %s", zif->ifp->name, + new_protodown ? "on" : "off"); + } +} + +/* The bond members inherit the protodown reason code from the bond */ +static void zebra_evpn_mh_update_protodown_bond(struct zebra_if *bond_zif) +{ + struct zebra_if *zif; + struct listnode *node; + + if (!bond_zif->bond_info.mbr_zifs) + return; + + for (ALL_LIST_ELEMENTS_RO(bond_zif->bond_info.mbr_zifs, node, zif)) { + zebra_evpn_mh_update_protodown_bond_mbr(zif, false /*clear*/, + __func__); + } +} + +/* The global EVPN MH protodown rc is applied to all local ESs */ +static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es, + bool resync_dplane) +{ + struct zebra_if *zif; + uint32_t old_protodown_rc; + + zif = es->zif; + /* if the reason code is the same bail unless it is a new + * ES bond in that case we would need to ensure that the + * dplane is really in sync with zebra + */ + if (!resync_dplane + && (zif->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL) + == (zmh_info->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL)) + return; + + old_protodown_rc = zif->protodown_rc; + zif->protodown_rc &= ~ZEBRA_PROTODOWN_EVPN_ALL; + zif->protodown_rc |= + (zmh_info->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES + && (old_protodown_rc != zif->protodown_rc)) + zlog_debug( + "es %s ifp %s protodown_rc changed; old 0x%x new 0x%x", + es->esi_str, zif->ifp->name, old_protodown_rc, + zif->protodown_rc); + + /* update dataplane with the new protodown setting */ + zebra_evpn_mh_update_protodown_bond(zif); +} + +static void zebra_evpn_mh_clear_protodown_es(struct zebra_evpn_es *es) +{ + struct zebra_if *zif; + uint32_t old_protodown_rc; + + zif = es->zif; + if (!(zif->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL)) + return; + + old_protodown_rc = zif->protodown_rc; + zif->protodown_rc &= ~ZEBRA_PROTODOWN_EVPN_ALL; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "clear: es %s ifp %s protodown_rc cleared; old 0x%x new 0x%x", + es->esi_str, zif->ifp->name, old_protodown_rc, + zif->protodown_rc); + + /* update dataplane with the new protodown setting */ + zebra_evpn_mh_update_protodown_bond(zif); +} + +static void zebra_evpn_mh_update_protodown_es_all(void) +{ + struct listnode *node; + struct zebra_evpn_es *es; + + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) + zebra_evpn_mh_update_protodown_es(es, false /*resync_dplane*/); +} + +static void zebra_evpn_mh_update_protodown(uint32_t protodown_rc, bool set) +{ + uint32_t old_protodown_rc = zmh_info->protodown_rc; + + if (set) { + if ((protodown_rc & zmh_info->protodown_rc) == protodown_rc) + return; + + zmh_info->protodown_rc |= protodown_rc; + } else { + if (!(protodown_rc & zmh_info->protodown_rc)) + return; + zmh_info->protodown_rc &= ~protodown_rc; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("mh protodown_rc changed; old 0x%x new 0x%x", + old_protodown_rc, zmh_info->protodown_rc); + zebra_evpn_mh_update_protodown_es_all(); +} + +static inline bool zebra_evpn_mh_is_all_uplinks_down(void) +{ + return zmh_info->uplink_cfg_cnt && !zmh_info->uplink_oper_up_cnt; +} + +static void zebra_evpn_mh_uplink_oper_flags_update(struct zebra_if *zif, + bool set) +{ + if (set && if_is_operative(zif->ifp)) { + if (!(zif->flags & ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP)) { + zif->flags |= ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP; + ++zmh_info->uplink_oper_up_cnt; + } + } else { + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP) { + zif->flags &= ~ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP; + if (zmh_info->uplink_oper_up_cnt) + --zmh_info->uplink_oper_up_cnt; + } + } +} + +static void zebra_evpn_mh_uplink_cfg_update(struct zebra_if *zif, bool set) +{ + bool old_protodown = zebra_evpn_mh_is_all_uplinks_down(); + bool new_protodown; + + if (set) { + if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) + return; + + zif->flags |= ZIF_FLAG_EVPN_MH_UPLINK; + ++zmh_info->uplink_cfg_cnt; + } else { + if (!(zif->flags & ZIF_FLAG_EVPN_MH_UPLINK)) + return; + + zif->flags &= ~ZIF_FLAG_EVPN_MH_UPLINK; + if (zmh_info->uplink_cfg_cnt) + --zmh_info->uplink_cfg_cnt; + } + + zebra_evpn_mh_uplink_oper_flags_update(zif, set); + new_protodown = zebra_evpn_mh_is_all_uplinks_down(); + if (old_protodown == new_protodown) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "mh-uplink-cfg-chg on if %s/%d %s uplinks cfg %u up %u", + zif->ifp->name, zif->ifp->ifindex, set ? "set" : "down", + zmh_info->uplink_cfg_cnt, zmh_info->uplink_oper_up_cnt); + + zebra_evpn_mh_update_protodown(ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN, + new_protodown); +} + +void zebra_evpn_mh_uplink_oper_update(struct zebra_if *zif) +{ + bool old_protodown = zebra_evpn_mh_is_all_uplinks_down(); + bool new_protodown; + + zebra_evpn_mh_uplink_oper_flags_update(zif, true /*set*/); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "mh-uplink-oper-chg on if %s/%d %s; uplinks cfg %u up %u", + zif->ifp->name, zif->ifp->ifindex, + if_is_operative(zif->ifp) ? "up" : "down", + zmh_info->uplink_cfg_cnt, zmh_info->uplink_oper_up_cnt); + + new_protodown = zebra_evpn_mh_is_all_uplinks_down(); + if (old_protodown == new_protodown) + return; + + /* if protodown_rc XXX_UPLINK_DOWN is about to be cleared + * fire up the start-up delay timer to allow the EVPN network + * to converge (Type-2 routes need to be advertised and processed) + */ + if (!new_protodown && (zmh_info->uplink_oper_up_cnt == 1)) + zebra_evpn_mh_startup_delay_timer_start("uplink-up"); + + zebra_evpn_mh_update_protodown(ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN, + new_protodown); +} + +static void zebra_evpn_mh_startup_delay_exp_cb(struct thread *t) +{ + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("startup-delay expired"); + + zebra_evpn_mh_update_protodown(ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY, + false /* set */); +} + +static void zebra_evpn_mh_startup_delay_timer_start(const char *rc) +{ + if (zmh_info->startup_delay_timer) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("startup-delay timer cancelled"); + THREAD_OFF(zmh_info->startup_delay_timer); + } + + if (zmh_info->startup_delay_time) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "startup-delay timer started for %d sec on %s", + zmh_info->startup_delay_time, rc); + thread_add_timer(zrouter.master, + zebra_evpn_mh_startup_delay_exp_cb, NULL, + zmh_info->startup_delay_time, + &zmh_info->startup_delay_timer); + zebra_evpn_mh_update_protodown( + ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY, true /* set */); + } else { + zebra_evpn_mh_update_protodown( + ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY, false /* set */); + } +} + +/***************************************************************************** + * Nexthop management: nexthops associated with Type-2 routes that have + * an ES as destination are consolidated by BGP into a per-VRF nh->rmac + * mapping which is the installed as a remote neigh/fdb entry with a + * dummy (type-1) prefix referencing it. + * This handling is needed because Type-2 routes with ES as dest use NHG + * that are setup using EAD routes (i.e. such NHGs do not include the + * RMAC info). + ****************************************************************************/ +void zebra_evpn_proc_remote_nh(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + vrf_id_t vrf_id; + struct ipaddr nh; + struct ethaddr rmac; + struct prefix_evpn dummy_prefix; + size_t min_len = 4 + sizeof(nh); + + s = msg; + + /* + * Ensure that the stream sent to us is long enough + */ + if (hdr->command == ZEBRA_EVPN_REMOTE_NH_ADD) + min_len += sizeof(rmac); + if (hdr->length < min_len) + return; + + vrf_id = stream_getl(s); + stream_get(&nh, s, sizeof(nh)); + + memset(&dummy_prefix, 0, sizeof(dummy_prefix)); + dummy_prefix.family = AF_EVPN; + dummy_prefix.prefixlen = (sizeof(struct evpn_addr) * 8); + dummy_prefix.prefix.route_type = 1; /* XXX - fixup to type-1 def */ + dummy_prefix.prefix.ead_addr.ip.ipa_type = nh.ipa_type; + + if (hdr->command == ZEBRA_EVPN_REMOTE_NH_ADD) { + stream_get(&rmac, s, sizeof(rmac)); + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug( + "evpn remote nh %d %pIA rmac %pEA add pfx %pFX", + vrf_id, &nh, &rmac, &dummy_prefix); + zebra_rib_queue_evpn_route_add(vrf_id, &rmac, &nh, + (struct prefix *)&dummy_prefix); + } else { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("evpn remote nh %d %pIA del pfx %pFX", + vrf_id, &nh, &dummy_prefix); + zebra_rib_queue_evpn_route_del(vrf_id, &nh, + (struct prefix *)&dummy_prefix); + } +} + +/*****************************************************************************/ +void zebra_evpn_mh_config_write(struct vty *vty) +{ + if (zmh_info->mac_hold_time != ZEBRA_EVPN_MH_MAC_HOLD_TIME_DEF) + vty_out(vty, "evpn mh mac-holdtime %d\n", + zmh_info->mac_hold_time); + + if (zmh_info->neigh_hold_time != ZEBRA_EVPN_MH_NEIGH_HOLD_TIME_DEF) + vty_out(vty, "evpn mh neigh-holdtime %d\n", + zmh_info->neigh_hold_time); + + if (zmh_info->startup_delay_time != ZEBRA_EVPN_MH_STARTUP_DELAY_DEF) + vty_out(vty, "evpn mh startup-delay %d\n", + zmh_info->startup_delay_time); + + if (zmh_info->flags & ZEBRA_EVPN_MH_REDIRECT_OFF) + vty_out(vty, "evpn mh redirect-off\n"); +} + +int zebra_evpn_mh_neigh_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default) +{ + if (set_default) + duration = ZEBRA_EVPN_MH_NEIGH_HOLD_TIME_DEF; + + zmh_info->neigh_hold_time = duration; + + return 0; +} + +int zebra_evpn_mh_mac_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default) +{ + if (set_default) + duration = ZEBRA_EVPN_MH_MAC_HOLD_TIME_DEF; + + zmh_info->mac_hold_time = duration; + + return 0; +} + +int zebra_evpn_mh_startup_delay_update(struct vty *vty, uint32_t duration, + bool set_default) +{ + if (set_default) + duration = ZEBRA_EVPN_MH_STARTUP_DELAY_DEF; + + zmh_info->startup_delay_time = duration; + + /* if startup_delay_timer is running allow it to be adjusted + * up or down + */ + if (zmh_info->startup_delay_timer) + zebra_evpn_mh_startup_delay_timer_start("config"); + + return 0; +} + +int zebra_evpn_mh_redirect_off(struct vty *vty, bool redirect_off) +{ + /* This knob needs to be set before ESs are configured + * i.e. cannot be changed on the fly + */ + if (redirect_off) + zmh_info->flags |= ZEBRA_EVPN_MH_REDIRECT_OFF; + else + zmh_info->flags &= ~ZEBRA_EVPN_MH_REDIRECT_OFF; + + return 0; +} + +void zebra_evpn_interface_init(void) +{ + install_element(INTERFACE_NODE, &zebra_evpn_es_id_cmd); + install_element(INTERFACE_NODE, &zebra_evpn_es_sys_mac_cmd); + install_element(INTERFACE_NODE, &zebra_evpn_es_pref_cmd); + install_element(INTERFACE_NODE, &zebra_evpn_es_bypass_cmd); + install_element(INTERFACE_NODE, &zebra_evpn_mh_uplink_cmd); +} + +void zebra_evpn_mh_init(void) +{ + zrouter.mh_info = XCALLOC(MTYPE_ZMH_INFO, sizeof(*zrouter.mh_info)); + + zmh_info->mac_hold_time = ZEBRA_EVPN_MH_MAC_HOLD_TIME_DEF; + zmh_info->neigh_hold_time = ZEBRA_EVPN_MH_NEIGH_HOLD_TIME_DEF; + /* setup ES tables */ + RB_INIT(zebra_es_rb_head, &zmh_info->es_rb_tree); + zmh_info->local_es_list = list_new(); + listset_app_node_mem(zmh_info->local_es_list); + + bf_init(zmh_info->nh_id_bitmap, EVPN_NH_ID_MAX); + bf_assign_zero_index(zmh_info->nh_id_bitmap); + zmh_info->nhg_table = hash_create(zebra_evpn_nhg_hash_keymake, + zebra_evpn_nhg_cmp, "l2 NHG table"); + zmh_info->nh_ip_table = + hash_create(zebra_evpn_nh_ip_hash_keymake, zebra_evpn_nh_ip_cmp, + "l2 NH IP table"); + + /* setup broadcast domain tables */ + zmh_info->evpn_vlan_table = hash_create(zebra_evpn_acc_vl_hash_keymake, + zebra_evpn_acc_vl_cmp, "access VLAN hash table"); + + zmh_info->startup_delay_time = ZEBRA_EVPN_MH_STARTUP_DELAY_DEF; + zebra_evpn_mh_startup_delay_timer_start("init"); +} + +void zebra_evpn_mh_terminate(void) +{ + list_delete(&zmh_info->local_es_list); + + hash_iterate(zmh_info->evpn_vlan_table, + zebra_evpn_acc_vl_cleanup_all, NULL); + hash_free(zmh_info->evpn_vlan_table); + hash_free(zmh_info->nhg_table); + hash_free(zmh_info->nh_ip_table); + bf_free(zmh_info->nh_id_bitmap); + + XFREE(MTYPE_ZMH_INFO, zrouter.mh_info); +} diff --git a/zebra/zebra_evpn_mh.h b/zebra/zebra_evpn_mh.h new file mode 100644 index 0000000..0376483 --- /dev/null +++ b/zebra/zebra_evpn_mh.h @@ -0,0 +1,382 @@ +/* + * Zebra EVPN MH Data structures and definitions + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ZEBRA_EVPN_MH_H +#define _ZEBRA_EVPN_MH_H + +#include <zebra.h> + +#include "if.h" +#include "linklist.h" +#include "bitfield.h" +#include "zebra_vxlan.h" +#include "zebra_vxlan_private.h" +#include "zebra_nhg.h" + +#define EVPN_MH_VTY_STR "Multihoming\n" + +/* Ethernet Segment entry - + * - Local and remote ESs are maintained in a global RB tree, + * zmh_info->es_rb_tree using ESI as key + * - Local ESs are added via zebra config (ZEBRA_EVPNES_LOCAL) when an + * access port is associated with an ES-ID + * - Remotes ESs are added by BGP based on received/remote EAD/Type-1 routes + * (ZEBRA_EVPNES_REMOTE) + * - An ES can be simultaneously LOCAL and REMOTE; infact all LOCAL ESs are + * expected to have REMOTE ES peers. + */ +struct zebra_evpn_es { + esi_t esi; + char esi_str[ESI_STR_LEN]; + + /* ES flags */ + uint32_t flags; +#define ZEBRA_EVPNES_LOCAL (1 << 0) /* configured in zebra */ +#define ZEBRA_EVPNES_REMOTE (1 << 1) /* added by bgp */ +#define ZEBRA_EVPNES_OPER_UP (1 << 2) /* es->ifp is oper-up */ +#define ZEBRA_EVPNES_READY_FOR_BGP (1 << 3) /* ready to be sent to BGP */ +#define ZEBRA_EVPNES_NHG_ACTIVE (1 << 4) /* NHG has been installed */ +/* This flag is only applicable to local ESs and signifies that this + * VTEP is not the DF + */ +#define ZEBRA_EVPNES_NON_DF (1 << 5) +/* When the ES becomes a bridge port we need to activate the BUM non-DF + * filter, SPH filter and backup NHG for fast-failover + */ +#define ZEBRA_EVPNES_BR_PORT (1 << 6) +/* ES is in bypass mode i.e. must not be advertised. ES-bypass is set + * when the associated host bond goes into LACP bypass + */ +#define ZEBRA_EVPNES_BYPASS (1 << 7) + + /* memory used for adding the es to zmh_info->es_rb_tree */ + RB_ENTRY(zebra_evpn_es) rb_node; + + /* [EVPNES_LOCAL] memory used for linking the es to + * zmh_info->local_es_list + */ + struct listnode local_es_listnode; + + /* [EVPNES_LOCAL] corresponding interface */ + struct zebra_if *zif; + + /* list of ES-EVIs associated with the ES */ + struct list *es_evi_list; + + /* [!EVPNES_LOCAL] List of remote VTEPs (zebra_evpn_es_vtep) */ + struct list *es_vtep_list; + + /* list of zebra_mac entries using this ES as destination */ + struct list *mac_list; + + /* Nexthop group id */ + uint32_t nhg_id; + + /* Preference config for BUM-DF election. Sent to BGP and + * advertised via the ESR + */ + uint16_t df_pref; + + /* When a new ES is configured it is held in a non-DF state + * for 3 seconds. This allows the peer Type-4 routes to be + * imported before running the DF election. + */ +#define ZEBRA_EVPN_MH_DF_DELAY_TIME 3 /* seconds */ + struct thread *df_delay_timer; +}; +RB_HEAD(zebra_es_rb_head, zebra_evpn_es); +RB_PROTOTYPE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); + +/* ES per-EVI info + * - ES-EVIs are maintained per-EVPN (vni->es_evi_rb_tree) + * - Local ES-EVIs are linked to per-EVPN list for quick access + * - Although some infrastucture is present for remote ES-EVIs, currently + * BGP does NOT send remote ES-EVIs to zebra. This may change in the + * future (but must be changed thoughtfully and only if needed as ES-EVI + * can get prolific and come in the way of rapid failovers) + */ +struct zebra_evpn_es_evi { + struct zebra_evpn_es *es; + struct zebra_evpn *zevpn; + + /* ES-EVI flags */ + uint32_t flags; + /* local ES-EVI */ +#define ZEBRA_EVPNES_EVI_LOCAL (1 << 0) /* created by zebra */ +#define ZEBRA_EVPNES_EVI_READY_FOR_BGP (1 << 1) /* ready to be sent to BGP */ + + /* memory used for adding the es_evi to + * es_evi->zevpn->es_evi_rb_tree + */ + RB_ENTRY(zebra_evpn_es_evi) rb_node; + /* memory used for linking the es_evi to + * es_evi->zevpn->local_es_evi_list + */ + struct listnode l2vni_listnode; + /* memory used for linking the es_evi to + * es_evi->es->es_evi_list + */ + struct listnode es_listnode; +}; + +/* A single L2 nexthop is allocated across all ESs with the same PE/VTEP + * nexthop + */ +struct zebra_evpn_l2_nh { + struct in_addr vtep_ip; + + /* MAC nexthop id */ + uint32_t nh_id; + + /* es_vtep entries using this nexthop */ + uint32_t ref_cnt; +}; + +/* PE attached to an ES */ +struct zebra_evpn_es_vtep { + struct zebra_evpn_es *es; /* parent ES */ + struct in_addr vtep_ip; + + uint32_t flags; + /* Rxed Type-4 route from this VTEP */ +#define ZEBRA_EVPNES_VTEP_RXED_ESR (1 << 0) +#define ZEBRA_EVPNES_VTEP_DEL_IN_PROG (1 << 1) + + /* MAC nexthop info */ + struct zebra_evpn_l2_nh *nh; + + /* memory used for adding the entry to es->es_vtep_list */ + struct listnode es_listnode; + + /* Parameters for DF election */ + uint8_t df_alg; + uint32_t df_pref; + + /* XXX - maintain a backpointer to struct zebra_vtep */ +}; + +/* Local/access-side broadcast domain - zebra_evpn_access_bd is added to - + * zrouter->evpn_vlan_table (for VLAN aware bridges) OR + * zrouter->evpn_bridge_table (for VLAN unaware bridges) + * XXX - support for VLAN unaware bridges is yet to be flushed out + */ +struct zebra_evpn_access_bd { + vlanid_t vid; + + struct zebra_if *vxlan_zif; /* vxlan device */ + /* list of members associated with the BD i.e. (potential) ESs */ + struct list *mbr_zifs; + /* presence of zevpn activates the EVI on all the ESs in mbr_zifs */ + struct zebra_evpn *zevpn; + /* SVI associated with the VLAN */ + struct zebra_if *vlan_zif; +}; + +/* multihoming information stored in zrouter */ +#define zmh_info (zrouter.mh_info) +struct zebra_evpn_mh_info { + uint32_t flags; +/* If the dataplane is not capable of handling a backup NHG on an access + * port we will need to explicitly failover each MAC entry on + * local ES down + */ +#define ZEBRA_EVPN_MH_REDIRECT_OFF (1 << 0) +/* DAD support for EVPN-MH is yet to be added. So on detection of + * first local ES, DAD is turned off + */ +#define ZEBRA_EVPN_MH_DUP_ADDR_DETECT_OFF (1 << 1) +/* If EVPN MH is enabled we only advertise REACHABLE neigh entries as Type-2 + * routes. As there is no global config knob for enabling EVPN MH we turn + * this flag when the first local ES is detected. + */ +#define ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY (1 << 2) +/* If EVPN MH is enabled we advertise the SVI MAC address to avoid + * flooding of ARP replies rxed from the multi-homed host + */ +#define ZEBRA_EVPN_MH_ADV_SVI_MAC (1 << 3) + + /* RB tree of Ethernet segments (used for EVPN-MH) */ + struct zebra_es_rb_head es_rb_tree; + /* List of local ESs */ + struct list *local_es_list; + + /* EVPN MH broadcast domains indexed by the VID */ + struct hash *evpn_vlan_table; + + /* A base L2-VNI is maintained to derive parameters such as + * ES originator-IP. + * XXX: once single vxlan device model becomes available this will + * not be necessary + */ + struct zebra_evpn *es_base_evpn; + struct in_addr es_originator_ip; + + /* L2 NH and NHG ids - + * Most significant 4 bits is type. Lower 28 bits is the value + * allocated from the nh_id_bitmap. + */ + bitfield_t nh_id_bitmap; +#define EVPN_NH_ID_MAX (16*1024) +#define EVPN_NH_ID_VAL_MASK 0xffffff +/* The purpose of using different types for NHG and NH is NOT to manage the + * id space separately. It is simply to make debugging easier. + */ +#define EVPN_NH_ID_TYPE_BIT (NHG_TYPE_L2_NH << NHG_ID_TYPE_POS) +#define EVPN_NHG_ID_TYPE_BIT (NHG_TYPE_L2 << NHG_ID_TYPE_POS) + /* L2-NHG table - key: nhg_id, data: zebra_evpn_es */ + struct hash *nhg_table; + /* L2-NH table - key: vtep_up, data: zebra_evpn_nh */ + struct hash *nh_ip_table; + + /* XXX - re-visit the default hold timer value */ + int mac_hold_time; +#define ZEBRA_EVPN_MH_MAC_HOLD_TIME_DEF (18 * 60) + int neigh_hold_time; +#define ZEBRA_EVPN_MH_NEIGH_HOLD_TIME_DEF (18 * 60) + + /* During this period access ports will be held in a protodown + * state + */ + int startup_delay_time; /* seconds */ +#define ZEBRA_EVPN_MH_STARTUP_DELAY_DEF (3 * 60) + struct thread *startup_delay_timer; + + /* Number of configured uplinks */ + uint32_t uplink_cfg_cnt; + /* Number of operationally-up uplinks */ + uint32_t uplink_oper_up_cnt; + + /* These protodown bits are inherited by all ES bonds */ + uint32_t protodown_rc; +}; + +/* returns TRUE if the EVPN is ready to be sent to BGP */ +static inline bool zebra_evpn_send_to_client_ok(struct zebra_evpn *zevpn) +{ + return !!(zevpn->flags & ZEVPN_READY_FOR_BGP); +} + +static inline bool zebra_evpn_mac_is_es_local(struct zebra_mac *mac) +{ + return mac->es && (mac->es->flags & ZEBRA_EVPNES_LOCAL); +} + +/* Returns true if the id is of L2-NHG or L2-NH type */ +static inline bool zebra_evpn_mh_is_fdb_nh(uint32_t id) +{ + return ((id & EVPN_NHG_ID_TYPE_BIT) || + (id & EVPN_NH_ID_TYPE_BIT)); +} + +static inline bool +zebra_evpn_es_local_mac_via_network_port(struct zebra_evpn_es *es) +{ + return !(es->flags & ZEBRA_EVPNES_OPER_UP) + && (zmh_info->flags & ZEBRA_EVPN_MH_REDIRECT_OFF); +} + +static inline bool zebra_evpn_mh_do_dup_addr_detect(void) +{ + return !(zmh_info->flags & ZEBRA_EVPN_MH_DUP_ADDR_DETECT_OFF); +} + +static inline bool zebra_evpn_mh_do_adv_reachable_neigh_only(void) +{ + return !!(zmh_info->flags & ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY); +} + +static inline bool zebra_evpn_mh_do_adv_svi_mac(void) +{ + return zmh_info && (zmh_info->flags & ZEBRA_EVPN_MH_ADV_SVI_MAC); +} + +/*****************************************************************************/ +extern esi_t *zero_esi; +extern void zebra_evpn_mh_init(void); +extern void zebra_evpn_mh_terminate(void); +extern bool zebra_evpn_is_if_es_capable(struct zebra_if *zif); +extern void zebra_evpn_if_init(struct zebra_if *zif); +extern void zebra_evpn_if_cleanup(struct zebra_if *zif); +extern void zebra_evpn_es_evi_init(struct zebra_evpn *zevpn); +extern void zebra_evpn_es_evi_cleanup(struct zebra_evpn *zevpn); +extern void zebra_evpn_vxl_evpn_set(struct zebra_if *zif, + struct zebra_evpn *zevpn, bool set); +extern void zebra_evpn_es_set_base_evpn(struct zebra_evpn *zevpn); +extern void zebra_evpn_es_clear_base_evpn(struct zebra_evpn *zevpn); +extern void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif); +extern void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif); +extern void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif); +extern void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif); +extern void zebra_evpn_es_send_all_to_client(bool add); +extern void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up); +extern void zebra_evpn_es_show(struct vty *vty, bool uj); +extern void zebra_evpn_es_show_detail(struct vty *vty, bool uj); +extern void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi); +extern void zebra_evpn_update_all_es(struct zebra_evpn *zevpn); +extern void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS); +int zebra_evpn_remote_es_add(const esi_t *esi, struct in_addr vtep_ip, + bool esr_rxed, uint8_t df_alg, uint16_t df_pref); +int zebra_evpn_remote_es_del(const esi_t *esi, struct in_addr vtep_ip); +extern void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail); +extern void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, + vni_t vni, int detail); +extern void zebra_evpn_es_mac_deref_entry(struct zebra_mac *mac); +extern bool zebra_evpn_es_mac_ref_entry(struct zebra_mac *mac, + struct zebra_evpn_es *es); +extern bool zebra_evpn_es_mac_ref(struct zebra_mac *mac, const esi_t *esi); +extern struct zebra_evpn_es *zebra_evpn_es_find(const esi_t *esi); +extern void zebra_evpn_interface_init(void); +extern int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp); +extern void zebra_evpn_acc_vl_show(struct vty *vty, bool uj); +extern void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj); +extern void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid); +extern void zebra_evpn_if_es_print(struct vty *vty, json_object *json, + struct zebra_if *zif); +extern void zebra_evpn_es_cleanup(void); +extern int zebra_evpn_mh_mac_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default); +void zebra_evpn_mh_config_write(struct vty *vty); +int zebra_evpn_mh_neigh_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default); +void zebra_evpn_es_local_br_port_update(struct zebra_if *zif); +extern int zebra_evpn_mh_startup_delay_update(struct vty *vty, + uint32_t duration, + bool set_default); +extern void zebra_evpn_mh_uplink_oper_update(struct zebra_if *zif); +extern void zebra_evpn_mh_update_protodown_bond_mbr(struct zebra_if *zif, + bool clear, + const char *caller); +extern bool zebra_evpn_is_es_bond(struct interface *ifp); +extern bool zebra_evpn_is_es_bond_member(struct interface *ifp); +extern void zebra_evpn_mh_print(struct vty *vty); +extern void zebra_evpn_mh_json(json_object *json); +extern bool zebra_evpn_nhg_is_local_es(uint32_t nhg_id, + struct zebra_evpn_es **local_es); +extern int zebra_evpn_mh_redirect_off(struct vty *vty, bool redirect_off); +extern void zebra_evpn_l2_nh_show(struct vty *vty, bool uj); +extern void zebra_evpn_acc_bd_svi_set(struct zebra_if *vlan_zif, + struct zebra_if *br_zif, bool is_up); +extern void zebra_evpn_acc_bd_svi_mac_add(struct interface *vlan_if); +extern void zebra_evpn_es_bypass_update(struct zebra_evpn_es *es, + struct interface *ifp, bool bypass); +extern void zebra_evpn_proc_remote_nh(ZAPI_HANDLER_ARGS); +extern struct zebra_evpn_es_evi * +zebra_evpn_es_evi_find(struct zebra_evpn_es *es, struct zebra_evpn *zevpn); + +#endif /* _ZEBRA_EVPN_MH_H */ diff --git a/zebra/zebra_evpn_neigh.c b/zebra/zebra_evpn_neigh.c new file mode 100644 index 0000000..6d90a60 --- /dev/null +++ b/zebra/zebra_evpn_neigh.c @@ -0,0 +1,2324 @@ +/* + * Zebra EVPN Neighbor code + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "hash.h" +#include "interface.h" +#include "jhash.h" +#include "memory.h" +#include "prefix.h" +#include "vlan.h" +#include "json.h" + +#include "zebra/zserv.h" +#include "zebra/debug.h" +#include "zebra/zebra_router.h" +#include "zebra/rt.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_evpn_neigh.h" +#include "zebra/zebra_evpn_mac.h" + +DEFINE_MTYPE_STATIC(ZEBRA, NEIGH, "EVI Neighbor"); + +/* + * Make hash key for neighbors. + */ +static unsigned int neigh_hash_keymake(const void *p) +{ + const struct zebra_neigh *n = p; + const struct ipaddr *ip = &n->ip; + + if (IS_IPADDR_V4(ip)) + return jhash_1word(ip->ipaddr_v4.s_addr, 0); + + return jhash2(ip->ipaddr_v6.s6_addr32, + array_size(ip->ipaddr_v6.s6_addr32), 0); +} + +/* + * Compare two neighbor hash structures. + */ +static bool neigh_cmp(const void *p1, const void *p2) +{ + const struct zebra_neigh *n1 = p1; + const struct zebra_neigh *n2 = p2; + + if (n1 == NULL && n2 == NULL) + return true; + + if (n1 == NULL || n2 == NULL) + return false; + + return ipaddr_cmp(&n1->ip, &n2->ip) == 0; +} + +int neigh_list_cmp(void *p1, void *p2) +{ + const struct zebra_neigh *n1 = p1; + const struct zebra_neigh *n2 = p2; + + return ipaddr_cmp(&n1->ip, &n2->ip); +} + +struct hash *zebra_neigh_db_create(const char *desc) +{ + return hash_create_size(8, neigh_hash_keymake, neigh_cmp, desc); +} + +uint32_t num_dup_detected_neighs(struct zebra_evpn *zevpn) +{ + unsigned int i; + uint32_t num_neighs = 0; + struct hash *hash; + struct hash_bucket *hb; + struct zebra_neigh *nbr; + + hash = zevpn->neigh_table; + if (!hash) + return num_neighs; + for (i = 0; i < hash->size; i++) { + for (hb = hash->index[i]; hb; hb = hb->next) { + nbr = (struct zebra_neigh *)hb->data; + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) + num_neighs++; + } + } + + return num_neighs; +} + +/* + * Helper function to determine maximum width of neighbor IP address for + * display - just because we're dealing with IPv6 addresses that can + * widely vary. + */ +void zebra_evpn_find_neigh_addr_width(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_neigh *n; + char buf[INET6_ADDRSTRLEN]; + struct neigh_walk_ctx *wctx = ctxt; + int width; + + n = (struct zebra_neigh *)bucket->data; + + ipaddr2str(&n->ip, buf, sizeof(buf)); + width = strlen(buf); + if (width > wctx->addr_width) + wctx->addr_width = width; +} + +/* + * Count of remote neighbors referencing this MAC. + */ +int remote_neigh_count(struct zebra_mac *zmac) +{ + struct zebra_neigh *n = NULL; + struct listnode *node = NULL; + int count = 0; + + for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) + count++; + } + + return count; +} + +/* + * Install remote neighbor into the kernel. + */ +int zebra_evpn_rem_neigh_install(struct zebra_evpn *zevpn, + struct zebra_neigh *n, bool was_static) +{ + struct interface *vlan_if; + int flags; + int ret = 0; + + if (!(n->flags & ZEBRA_NEIGH_REMOTE)) + return 0; + + vlan_if = zevpn_map_to_svi(zevpn); + if (!vlan_if) + return -1; + + flags = DPLANE_NTF_EXT_LEARNED; + if (n->flags & ZEBRA_NEIGH_ROUTER_FLAG) + flags |= DPLANE_NTF_ROUTER; + ZEBRA_NEIGH_SET_ACTIVE(n); + + dplane_rem_neigh_add(vlan_if, &n->ip, &n->emac, flags, was_static); + + return ret; +} + +/* + * Install neighbor hash entry - called upon access VLAN change. + */ +void zebra_evpn_install_neigh_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_neigh *n; + struct neigh_walk_ctx *wctx = ctxt; + + n = (struct zebra_neigh *)bucket->data; + + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) + zebra_evpn_rem_neigh_install(wctx->zevpn, n, + false /*was_static*/); +} + +/* + * Callback to allocate neighbor hash entry. + */ +static void *zebra_evpn_neigh_alloc(void *p) +{ + const struct zebra_neigh *tmp_n = p; + struct zebra_neigh *n; + + n = XCALLOC(MTYPE_NEIGH, sizeof(struct zebra_neigh)); + *n = *tmp_n; + + return ((void *)n); +} + +static void zebra_evpn_local_neigh_ref_mac(struct zebra_neigh *n, + const struct ethaddr *macaddr, + struct zebra_mac *mac, + bool send_mac_update) +{ + bool old_static; + bool new_static; + + memcpy(&n->emac, macaddr, ETH_ALEN); + n->mac = mac; + + /* Link to new MAC */ + if (!mac) + return; + + listnode_add_sort(mac->neigh_list, n); + if (n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) { + old_static = zebra_evpn_mac_is_static(mac); + ++mac->sync_neigh_cnt; + new_static = zebra_evpn_mac_is_static(mac); + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "sync-neigh ref mac vni %u ip %pIA mac %pEA ref %d", + n->zevpn->vni, &n->ip, &n->emac, + mac->sync_neigh_cnt); + if ((old_static != new_static) && send_mac_update) + /* program the local mac in the kernel */ + zebra_evpn_sync_mac_dp_install( + mac, false /*set_inactive*/, + false /*force_clear_static*/, __func__); + } +} + +/* sync-path that is active on an ES peer */ +static void zebra_evpn_sync_neigh_dp_install(struct zebra_neigh *n, + bool set_inactive, + bool force_clear_static, + const char *caller) +{ + struct zebra_ns *zns; + struct interface *ifp; + bool set_static; + bool set_router; + + zns = zebra_ns_lookup(NS_DEFAULT); + ifp = if_lookup_by_index_per_ns(zns, n->ifindex); + if (!ifp) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "%s: dp-install sync-neigh vni %u ip %pIA mac %pEA if %d f 0x%x skipped", + caller, n->zevpn->vni, &n->ip, &n->emac, + n->ifindex, n->flags); + return; + } + + if (force_clear_static) + set_static = false; + else + set_static = zebra_evpn_neigh_is_static(n); + + set_router = !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + /* XXX - this will change post integration with the new kernel */ + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) + set_inactive = true; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "%s: dp-install sync-neigh vni %u ip %pIA mac %pEA if %s(%d) f 0x%x%s%s%s", + caller, n->zevpn->vni, &n->ip, &n->emac, + ifp->name, n->ifindex, n->flags, + set_router ? " router" : "", + set_static ? " static" : "", + set_inactive ? " inactive" : ""); + dplane_local_neigh_add(ifp, &n->ip, &n->emac, set_router, set_static, + set_inactive); +} + +/* + * Inform BGP about local neighbor addition. + */ +int zebra_evpn_neigh_send_add_to_client(vni_t vni, const struct ipaddr *ip, + const struct ethaddr *macaddr, + struct zebra_mac *zmac, + uint32_t neigh_flags, uint32_t seq) +{ + uint8_t flags = 0; + + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) { + /* host reachability has not been verified locally */ + + /* if no ES peer is claiming reachability we can't advertise + * the entry + */ + if (!CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + return 0; + + /* ES peers are claiming reachability; we will + * advertise the entry but with a proxy flag + */ + SET_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT); + } + + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_DEF_GW)) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + /* Set router flag (R-bit) based on local neigh entry add */ + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_ROUTER_FLAG)) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_SVI_IP)) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP); + + return zebra_evpn_macip_send_msg_to_client(vni, macaddr, ip, flags, seq, + ZEBRA_NEIGH_ACTIVE, zmac->es, + ZEBRA_MACIP_ADD); +} + +/* + * Inform BGP about local neighbor deletion. + */ +int zebra_evpn_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip, + struct ethaddr *macaddr, uint32_t flags, + int state, bool force) +{ + if (!force) { + if (CHECK_FLAG(flags, ZEBRA_NEIGH_LOCAL_INACTIVE) + && !CHECK_FLAG(flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + /* the neigh was not advertised - nothing to delete */ + return 0; + } + + return zebra_evpn_macip_send_msg_to_client( + vni, macaddr, ip, flags, 0, state, NULL, ZEBRA_MACIP_DEL); +} + +static void zebra_evpn_neigh_send_add_del_to_client(struct zebra_neigh *n, + bool old_bgp_ready, + bool new_bgp_ready) +{ + if (new_bgp_ready) + zebra_evpn_neigh_send_add_to_client(n->zevpn->vni, &n->ip, + &n->emac, n->mac, n->flags, + n->loc_seq); + else if (old_bgp_ready) + zebra_evpn_neigh_send_del_to_client(n->zevpn->vni, &n->ip, + &n->emac, n->flags, + n->state, true /*force*/); +} + +/* if the static flag associated with the neigh changes we need + * to update the sync-neigh references against the MAC + * and inform the dataplane about the static flag changes. + */ +void zebra_evpn_sync_neigh_static_chg(struct zebra_neigh *n, bool old_n_static, + bool new_n_static, bool defer_n_dp, + bool defer_mac_dp, const char *caller) +{ + struct zebra_mac *mac = n->mac; + bool old_mac_static; + bool new_mac_static; + + if (old_n_static == new_n_static) + return; + + /* update the neigh sync references in the dataplane. if + * the neigh is in the middle of updates the caller can + * request for a defer + */ + if (!defer_n_dp) + zebra_evpn_sync_neigh_dp_install(n, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + if (!mac) + return; + + /* update the mac sync ref cnt */ + old_mac_static = zebra_evpn_mac_is_static(mac); + if (new_n_static) { + ++mac->sync_neigh_cnt; + } else if (old_n_static) { + if (mac->sync_neigh_cnt) + --mac->sync_neigh_cnt; + } + new_mac_static = zebra_evpn_mac_is_static(mac); + + /* update the mac sync references in the dataplane */ + if ((old_mac_static != new_mac_static) && !defer_mac_dp) + zebra_evpn_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "sync-neigh ref-chg vni %u ip %pIA mac %pEA f 0x%x %d%s%s%s%s by %s", + n->zevpn->vni, &n->ip, &n->emac, n->flags, + mac->sync_neigh_cnt, + old_n_static ? " old_n_static" : "", + new_n_static ? " new_n_static" : "", + old_mac_static ? " old_mac_static" : "", + new_mac_static ? " new_mac_static" : "", caller); +} + +/* Neigh hold timer is used to age out peer-active flag. + * + * During this wait time we expect the dataplane component or an + * external neighmgr daemon to probe existing hosts to independently + * establish their presence on the ES. + */ +static void zebra_evpn_neigh_hold_exp_cb(struct thread *t) +{ + struct zebra_neigh *n; + bool old_bgp_ready; + bool new_bgp_ready; + bool old_n_static; + bool new_n_static; + + n = THREAD_ARG(t); + /* the purpose of the hold timer is to age out the peer-active + * flag + */ + if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + return; + + old_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + old_n_static = zebra_evpn_neigh_is_static(n); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + new_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + new_n_static = zebra_evpn_neigh_is_static(n); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %pIA mac %pEA 0x%x hold expired", + n->zevpn->vni, &n->ip, &n->emac, n->flags); + + /* re-program the local neigh in the dataplane if the neigh is no + * longer static + */ + if (old_n_static != new_n_static) + zebra_evpn_sync_neigh_static_chg( + n, old_n_static, new_n_static, false /*defer_n_dp*/, + false /*defer_mac_dp*/, __func__); + + /* inform bgp if needed */ + if (old_bgp_ready != new_bgp_ready) + zebra_evpn_neigh_send_add_del_to_client(n, old_bgp_ready, + new_bgp_ready); +} + +static inline void zebra_evpn_neigh_start_hold_timer(struct zebra_neigh *n) +{ + if (n->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %pIA mac %pEA 0x%x hold start", + n->zevpn->vni, &n->ip, &n->emac, n->flags); + thread_add_timer(zrouter.master, zebra_evpn_neigh_hold_exp_cb, n, + zmh_info->neigh_hold_time, &n->hold_timer); +} + +static void zebra_evpn_local_neigh_deref_mac(struct zebra_neigh *n, + bool send_mac_update) +{ + struct zebra_mac *mac = n->mac; + struct zebra_evpn *zevpn = n->zevpn; + bool old_static; + bool new_static; + + n->mac = NULL; + if (!mac) + return; + + if ((n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) && mac->sync_neigh_cnt) { + old_static = zebra_evpn_mac_is_static(mac); + --mac->sync_neigh_cnt; + new_static = zebra_evpn_mac_is_static(mac); + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "sync-neigh deref mac vni %u ip %pIA mac %pEA ref %d", + n->zevpn->vni, &n->ip, &n->emac, + mac->sync_neigh_cnt); + if ((old_static != new_static) && send_mac_update) + /* program the local mac in the kernel */ + zebra_evpn_sync_mac_dp_install( + mac, false /* set_inactive */, + false /* force_clear_static */, __func__); + } + + listnode_delete(mac->neigh_list, n); + zebra_evpn_deref_ip2mac(zevpn, mac); +} + +bool zebra_evpn_neigh_is_bgp_seq_ok(struct zebra_evpn *zevpn, + struct zebra_neigh *n, + const struct ethaddr *macaddr, uint32_t seq, + bool sync) +{ + uint32_t tmp_seq; + const char *n_type; + + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + tmp_seq = n->loc_seq; + n_type = "local"; + } else { + tmp_seq = n->rem_seq; + n_type = "remote"; + } + + if (seq < tmp_seq) { + /* if the neigh was never advertised to bgp we must accept + * whatever sequence number bgp sends + * XXX - check with Vivek + */ + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) + && !zebra_evpn_neigh_is_ready_for_bgp(n)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH + || IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s-macip accept vni %u %s mac %pEA IP %pIA lower seq %u f 0x%x", + sync ? "sync" : "remote", zevpn->vni, + n_type, macaddr, &n->ip, + tmp_seq, n->flags); + return true; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH || IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s-macip ignore vni %u %s mac %pEA IP %pIA as existing has higher seq %u f 0x%x", + sync ? "sync" : "remote", zevpn->vni, n_type, + macaddr, &n->ip, tmp_seq, n->flags); + return false; + } + + return true; +} + +/* + * Add neighbor entry. + */ +static struct zebra_neigh *zebra_evpn_neigh_add(struct zebra_evpn *zevpn, + const struct ipaddr *ip, + const struct ethaddr *mac, + struct zebra_mac *zmac, + uint32_t n_flags) +{ + struct zebra_neigh tmp_n; + struct zebra_neigh *n = NULL; + + memset(&tmp_n, 0, sizeof(tmp_n)); + memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr)); + n = hash_get(zevpn->neigh_table, &tmp_n, zebra_evpn_neigh_alloc); + + n->state = ZEBRA_NEIGH_INACTIVE; + n->zevpn = zevpn; + n->dad_ip_auto_recovery_timer = NULL; + n->flags = n_flags; + n->uptime = monotime(NULL); + + if (!zmac) + zmac = zebra_evpn_mac_lookup(zevpn, mac); + zebra_evpn_local_neigh_ref_mac(n, mac, zmac, + false /* send_mac_update */); + + return n; +} + +/* + * Delete neighbor entry. + */ +int zebra_evpn_neigh_del(struct zebra_evpn *zevpn, struct zebra_neigh *n) +{ + struct zebra_neigh *tmp_n; + + if (n->mac) + listnode_delete(n->mac->neigh_list, n); + + /* Cancel auto recovery */ + THREAD_OFF(n->dad_ip_auto_recovery_timer); + + /* Cancel proxy hold timer */ + zebra_evpn_neigh_stop_hold_timer(n); + + /* Free the VNI hash entry and allocated memory. */ + tmp_n = hash_release(zevpn->neigh_table, n); + XFREE(MTYPE_NEIGH, tmp_n); + + return 0; +} + +void zebra_evpn_sync_neigh_del(struct zebra_neigh *n) +{ + bool old_n_static; + bool new_n_static; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh del vni %u ip %pIA mac %pEA f 0x%x", + n->zevpn->vni, &n->ip, &n->emac, n->flags); + + old_n_static = zebra_evpn_neigh_is_static(n); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + zebra_evpn_neigh_start_hold_timer(n); + new_n_static = zebra_evpn_neigh_is_static(n); + + if (old_n_static != new_n_static) + zebra_evpn_sync_neigh_static_chg( + n, old_n_static, new_n_static, false /*defer-dp*/, + false /*defer_mac_dp*/, __func__); +} + +struct zebra_neigh *zebra_evpn_proc_sync_neigh_update( + struct zebra_evpn *zevpn, struct zebra_neigh *n, uint16_t ipa_len, + const struct ipaddr *ipaddr, uint8_t flags, uint32_t seq, + const esi_t *esi, struct sync_mac_ip_ctx *ctx) +{ + struct interface *ifp = NULL; + bool is_router; + struct zebra_mac *mac = ctx->mac; + uint32_t tmp_seq; + bool old_router = false; + bool old_bgp_ready = false; + bool new_bgp_ready; + bool inform_dataplane = false; + bool inform_bgp = false; + bool old_mac_static; + bool new_mac_static; + bool set_dp_inactive = false; + bool created; + ifindex_t ifindex = 0; + + /* locate l3-svi */ + ifp = zevpn_map_to_svi(zevpn); + if (ifp) + ifindex = ifp->ifindex; + + is_router = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + old_mac_static = zebra_evpn_mac_is_static(mac); + + if (!n) { + uint32_t n_flags = 0; + + /* New neighbor - create */ + SET_FLAG(n_flags, ZEBRA_NEIGH_LOCAL); + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) + SET_FLAG(n_flags, ZEBRA_NEIGH_ES_PEER_PROXY); + else + SET_FLAG(n_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + SET_FLAG(n_flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + + n = zebra_evpn_neigh_add(zevpn, ipaddr, &mac->macaddr, mac, + n_flags); + n->ifindex = ifindex; + ZEBRA_NEIGH_SET_ACTIVE(n); + + created = true; + inform_dataplane = true; + inform_bgp = true; + set_dp_inactive = true; + } else { + bool mac_change; + uint32_t old_flags = n->flags; + bool old_n_static; + bool new_n_static; + + created = false; + old_n_static = zebra_evpn_neigh_is_static(n); + old_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + old_router = !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + mac_change = !!memcmp(&n->emac, &mac->macaddr, ETH_ALEN); + + /* deref and clear old info */ + if (mac_change) { + if (old_bgp_ready) { + zebra_evpn_neigh_send_del_to_client( + zevpn->vni, &n->ip, &n->emac, n->flags, + n->state, false /*force*/); + old_bgp_ready = false; + } + zebra_evpn_local_neigh_deref_mac(n, + false /*send_mac_update*/); + } + /* clear old fwd info */ + n->rem_seq = 0; + n->r_vtep_ip.s_addr = 0; + + /* setup new flags */ + n->flags = 0; + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + /* retain activity flag if the neigh was + * previously local + */ + if (old_flags & ZEBRA_NEIGH_LOCAL) { + n->flags |= (old_flags & ZEBRA_NEIGH_LOCAL_INACTIVE); + } else { + inform_dataplane = true; + set_dp_inactive = true; + n->flags |= ZEBRA_NEIGH_LOCAL_INACTIVE; + } + + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) { + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY); + /* if the neigh was peer-active previously we + * need to keep the flag and start the + * holdtimer on it. the peer-active flag is + * cleared on holdtimer expiry. + */ + if (CHECK_FLAG(old_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) { + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + zebra_evpn_neigh_start_hold_timer(n); + } + } else { + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + /* stop hold timer if a peer has verified + * reachability + */ + zebra_evpn_neigh_stop_hold_timer(n); + } + ZEBRA_NEIGH_SET_ACTIVE(n); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH && (old_flags != n->flags)) + zlog_debug( + "sync-neigh vni %u ip %pIA mac %pEA old_f 0x%x new_f 0x%x", + n->zevpn->vni, &n->ip, &n->emac, + old_flags, n->flags); + + new_n_static = zebra_evpn_neigh_is_static(n); + if (mac_change) { + set_dp_inactive = true; + n->flags |= ZEBRA_NEIGH_LOCAL_INACTIVE; + inform_dataplane = true; + zebra_evpn_local_neigh_ref_mac( + n, &mac->macaddr, mac, + false /*send_mac_update*/); + } else if (old_n_static != new_n_static) { + inform_dataplane = true; + /* if static flags have changed without a mac change + * we need to create the correct sync-refs against + * the existing mac + */ + zebra_evpn_sync_neigh_static_chg( + n, old_n_static, new_n_static, + true /*defer_dp*/, true /*defer_mac_dp*/, + __func__); + } + + /* Update the forwarding info. */ + if (n->ifindex != ifindex) { + n->ifindex = ifindex; + inform_dataplane = true; + } + + n->uptime = monotime(NULL); + } + + /* update the neigh seq. we don't bother with the mac seq as + * sync_mac_update already took care of that + */ + tmp_seq = MAX(n->loc_seq, seq); + if (tmp_seq != n->loc_seq) { + n->loc_seq = tmp_seq; + inform_bgp = true; + } + + /* Mark Router flag (R-bit) */ + if (is_router) + SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + else + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + if (old_router != is_router) + inform_dataplane = true; + + new_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + if (old_bgp_ready != new_bgp_ready) + inform_bgp = true; + + new_mac_static = zebra_evpn_mac_is_static(mac); + if ((old_mac_static != new_mac_static) || ctx->mac_dp_update_deferred) + zebra_evpn_sync_mac_dp_install(mac, ctx->mac_inactive, + false /* force_clear_static */, + __func__); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "sync-neigh %s vni %u ip %pIA mac %pEA if %s(%d) seq %d f 0x%x%s%s", + created ? "created" : "updated", n->zevpn->vni, + &n->ip, &n->emac, + ifp ? ifp->name : "", ifindex, n->loc_seq, n->flags, + inform_bgp ? " inform_bgp" : "", + inform_dataplane ? " inform_dp" : ""); + + if (inform_dataplane) + zebra_evpn_sync_neigh_dp_install(n, set_dp_inactive, + false /* force_clear_static */, + __func__); + + if (inform_bgp) + zebra_evpn_neigh_send_add_del_to_client(n, old_bgp_ready, + new_bgp_ready); + + return n; +} + +/* + * Uninstall remote neighbor from the kernel. + */ +static int zebra_evpn_neigh_uninstall(struct zebra_evpn *zevpn, + struct zebra_neigh *n) +{ + struct interface *vlan_if; + + if (!(n->flags & ZEBRA_NEIGH_REMOTE)) + return 0; + + vlan_if = zevpn_map_to_svi(zevpn); + if (!vlan_if) + return -1; + + ZEBRA_NEIGH_SET_INACTIVE(n); + n->loc_seq = 0; + + dplane_rem_neigh_delete(vlan_if, &n->ip); + + return 0; +} + +/* + * Free neighbor hash entry (callback) + */ +static void zebra_evpn_neigh_del_hash_entry(struct hash_bucket *bucket, + void *arg) +{ + struct neigh_walk_ctx *wctx = arg; + struct zebra_neigh *n = bucket->data; + + if (((wctx->flags & DEL_LOCAL_NEIGH) && (n->flags & ZEBRA_NEIGH_LOCAL)) + || ((wctx->flags & DEL_REMOTE_NEIGH) + && (n->flags & ZEBRA_NEIGH_REMOTE)) + || ((wctx->flags & DEL_REMOTE_NEIGH_FROM_VTEP) + && (n->flags & ZEBRA_NEIGH_REMOTE) + && IPV4_ADDR_SAME(&n->r_vtep_ip, &wctx->r_vtep_ip))) { + if (wctx->upd_client && (n->flags & ZEBRA_NEIGH_LOCAL)) + zebra_evpn_neigh_send_del_to_client( + wctx->zevpn->vni, &n->ip, &n->emac, n->flags, + n->state, false /*force*/); + + if (wctx->uninstall) { + if (zebra_evpn_neigh_is_static(n)) + zebra_evpn_sync_neigh_dp_install( + n, false /* set_inactive */, + true /* force_clear_static */, + __func__); + if ((n->flags & ZEBRA_NEIGH_REMOTE)) + zebra_evpn_neigh_uninstall(wctx->zevpn, n); + } + + zebra_evpn_neigh_del(wctx->zevpn, n); + } + + return; +} + +/* + * Delete all neighbor entries for this EVPN. + */ +void zebra_evpn_neigh_del_all(struct zebra_evpn *zevpn, int uninstall, + int upd_client, uint32_t flags) +{ + struct neigh_walk_ctx wctx; + + if (!zevpn->neigh_table) + return; + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.uninstall = uninstall; + wctx.upd_client = upd_client; + wctx.flags = flags; + + hash_iterate(zevpn->neigh_table, zebra_evpn_neigh_del_hash_entry, + &wctx); +} + +/* + * Look up neighbor hash entry. + */ +struct zebra_neigh *zebra_evpn_neigh_lookup(struct zebra_evpn *zevpn, + const struct ipaddr *ip) +{ + struct zebra_neigh tmp; + struct zebra_neigh *n; + + memset(&tmp, 0, sizeof(tmp)); + memcpy(&tmp.ip, ip, sizeof(struct ipaddr)); + n = hash_lookup(zevpn->neigh_table, &tmp); + + return n; +} + +/* + * Process all neighbors associated with a MAC upon the MAC being learnt + * locally or undergoing any other change (such as sequence number). + */ +void zebra_evpn_process_neigh_on_local_mac_change(struct zebra_evpn *zevpn, + struct zebra_mac *zmac, + bool seq_change, + bool es_change) +{ + struct zebra_neigh *n = NULL; + struct listnode *node = NULL; + struct zebra_vrf *zvrf = NULL; + + zvrf = zevpn->vxlan_if->vrf->info; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Processing neighbors on local MAC %pEA %s, VNI %u", + &zmac->macaddr, seq_change ? "CHANGE" : "ADD", + zevpn->vni); + + /* Walk all neighbors and mark any inactive local neighbors as + * active and/or update sequence number upon a move, and inform BGP. + * The action for remote neighbors is TBD. + * NOTE: We can't simply uninstall remote neighbors as the kernel may + * accidentally end up deleting a just-learnt local neighbor. + */ + for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change + || es_change) { + ZEBRA_NEIGH_SET_ACTIVE(n); + n->loc_seq = zmac->loc_seq; + if (!(zebra_evpn_do_dup_addr_detect(zvrf) + && zvrf->dad_freeze + && !!CHECK_FLAG(n->flags, + ZEBRA_NEIGH_DUPLICATE))) + zebra_evpn_neigh_send_add_to_client( + zevpn->vni, &n->ip, &n->emac, + n->mac, n->flags, n->loc_seq); + } + } + } +} + +/* + * Process all neighbors associated with a local MAC upon the MAC being + * deleted. + */ +void zebra_evpn_process_neigh_on_local_mac_del(struct zebra_evpn *zevpn, + struct zebra_mac *zmac) +{ + struct zebra_neigh *n = NULL; + struct listnode *node = NULL; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Processing neighbors on local MAC %pEA DEL, VNI %u", + &zmac->macaddr, zevpn->vni); + + /* Walk all local neighbors and mark as inactive and inform + * BGP, if needed. + * TBD: There is currently no handling for remote neighbors. We + * don't expect them to exist, if they do, do we install the MAC + * as a remote MAC and the neighbor as remote? + */ + for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + if (IS_ZEBRA_NEIGH_ACTIVE(n)) { + ZEBRA_NEIGH_SET_INACTIVE(n); + n->loc_seq = 0; + zebra_evpn_neigh_send_del_to_client( + zevpn->vni, &n->ip, &n->emac, n->flags, + ZEBRA_NEIGH_ACTIVE, false /*force*/); + } + } + } +} + +/* + * Process all neighbors associated with a MAC upon the MAC being remotely + * learnt. + */ +void zebra_evpn_process_neigh_on_remote_mac_add(struct zebra_evpn *zevpn, + struct zebra_mac *zmac) +{ + struct zebra_neigh *n = NULL; + struct listnode *node = NULL; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Processing neighbors on remote MAC %pEA ADD, VNI %u", + &zmac->macaddr, zevpn->vni); + + /* Walk all local neighbors and mark as inactive and inform + * BGP, if needed. + */ + for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + if (IS_ZEBRA_NEIGH_ACTIVE(n)) { + ZEBRA_NEIGH_SET_INACTIVE(n); + n->loc_seq = 0; + zebra_evpn_neigh_send_del_to_client( + zevpn->vni, &n->ip, &n->emac, n->flags, + ZEBRA_NEIGH_ACTIVE, false /* force */); + } + } + } +} + +/* + * Process all neighbors associated with a remote MAC upon the MAC being + * deleted. + */ +void zebra_evpn_process_neigh_on_remote_mac_del(struct zebra_evpn *zevpn, + struct zebra_mac *zmac) +{ + /* NOTE: Currently a NO-OP. */ +} + +static inline void zebra_evpn_local_neigh_update_log( + const char *pfx, struct zebra_neigh *n, bool is_router, + bool local_inactive, bool old_bgp_ready, bool new_bgp_ready, + bool inform_dataplane, bool inform_bgp, const char *sfx) +{ + if (!IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + return; + + zlog_debug("%s neigh vni %u ip %pIA mac %pEA f 0x%x%s%s%s%s%s%s %s", pfx, + n->zevpn->vni, &n->ip, &n->emac, n->flags, + is_router ? " router" : "", + local_inactive ? " local-inactive" : "", + old_bgp_ready ? " old_bgp_ready" : "", + new_bgp_ready ? " new_bgp_ready" : "", + inform_dataplane ? " inform_dp" : "", + inform_bgp ? " inform_bgp" : "", sfx); +} + +/* As part Duplicate Address Detection (DAD) for IP mobility + * MAC binding changes, ensure to inherit duplicate flag + * from MAC. + */ +static int zebra_evpn_ip_inherit_dad_from_mac(struct zebra_vrf *zvrf, + struct zebra_mac *old_zmac, + struct zebra_mac *new_zmac, + struct zebra_neigh *nbr) +{ + bool is_old_mac_dup = false; + bool is_new_mac_dup = false; + + if (!zebra_evpn_do_dup_addr_detect(zvrf)) + return 0; + /* Check old or new MAC is detected as duplicate + * mark this neigh as duplicate + */ + if (old_zmac) + is_old_mac_dup = + CHECK_FLAG(old_zmac->flags, ZEBRA_MAC_DUPLICATE); + if (new_zmac) + is_new_mac_dup = + CHECK_FLAG(new_zmac->flags, ZEBRA_MAC_DUPLICATE); + /* Old and/or new MAC can be in duplicate state, + * based on that IP/Neigh Inherits the flag. + * If New MAC is marked duplicate, inherit to the IP. + * If old MAC is duplicate but new MAC is not, clear + * duplicate flag for IP and reset detection params + * and let IP DAD retrigger. + */ + if (is_new_mac_dup && !CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) { + SET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + /* Capture Duplicate detection time */ + nbr->dad_dup_detect_time = monotime(NULL); + /* Mark neigh inactive */ + ZEBRA_NEIGH_SET_INACTIVE(nbr); + + return 1; + } else if (is_old_mac_dup && !is_new_mac_dup) { + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->detect_start_time.tv_usec = 0; + } + return 0; +} + +static void zebra_evpn_dad_ip_auto_recovery_exp(struct thread *t) +{ + struct zebra_vrf *zvrf = NULL; + struct zebra_neigh *nbr = NULL; + struct zebra_evpn *zevpn = NULL; + + nbr = THREAD_ARG(t); + + /* since this is asynchronous we need sanity checks*/ + zvrf = vrf_info_lookup(nbr->zevpn->vrf_id); + if (!zvrf) + return; + + zevpn = zebra_evpn_lookup(nbr->zevpn->vni); + if (!zevpn) + return; + + nbr = zebra_evpn_neigh_lookup(zevpn, &nbr->ip); + if (!nbr) + return; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: duplicate addr MAC %pEA IP %pIA flags 0x%x learn count %u vni %u auto recovery expired", + __func__, &nbr->emac, &nbr->ip, nbr->flags, + nbr->dad_count, zevpn->vni); + + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->detect_start_time.tv_usec = 0; + nbr->dad_dup_detect_time = 0; + nbr->dad_ip_auto_recovery_timer = NULL; + ZEBRA_NEIGH_SET_ACTIVE(nbr); + + /* Send to BGP */ + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) { + zebra_evpn_neigh_send_add_to_client(zevpn->vni, &nbr->ip, + &nbr->emac, nbr->mac, + nbr->flags, nbr->loc_seq); + } else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_evpn_rem_neigh_install(zevpn, nbr, false /*was_static*/); + } +} + +static void zebra_evpn_dup_addr_detect_for_neigh( + struct zebra_vrf *zvrf, struct zebra_neigh *nbr, struct in_addr vtep_ip, + bool do_dad, bool *is_dup_detect, bool is_local) +{ + + struct timeval elapsed = {0, 0}; + bool reset_params = false; + + if (!zebra_evpn_do_dup_addr_detect(zvrf)) + return; + + /* IP is detected as duplicate or inherit dup + * state, hold on to install as remote entry + * only if freeze is enabled. + */ + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: duplicate addr MAC %pEA IP %pIA flags 0x%x skip installing, learn count %u recover time %u", + __func__, &nbr->emac, &nbr->ip, + nbr->flags, nbr->dad_count, + zvrf->dad_freeze_time); + + if (zvrf->dad_freeze) + *is_dup_detect = true; + + /* warn-only action, neigh will be installed. + * freeze action, it wil not be installed. + */ + return; + } + + if (!do_dad) + return; + + /* Check if detection time (M-secs) expired. + * Reset learn count and detection start time. + * During remote mac add, count should already be 1 + * via local learning. + */ + monotime_since(&nbr->detect_start_time, &elapsed); + reset_params = (elapsed.tv_sec > zvrf->dad_time); + + if (is_local && !reset_params) { + /* RFC-7432: A PE/VTEP that detects a MAC mobility + * event via LOCAL learning starts an M-second timer. + * + * NOTE: This is the START of the probe with count is + * 0 during LOCAL learn event. + */ + reset_params = !nbr->dad_count; + } + + if (reset_params) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: duplicate addr MAC %pEA IP %pIA flags 0x%x detection time passed, reset learn count %u", + __func__, &nbr->emac, &nbr->ip, + nbr->flags, nbr->dad_count); + /* Reset learn count but do not start detection + * during REMOTE learn event. + */ + nbr->dad_count = 0; + /* Start dup. addr detection (DAD) start time, + * ONLY during LOCAL learn. + */ + if (is_local) + monotime(&nbr->detect_start_time); + + } else if (!is_local) { + /* For REMOTE IP/Neigh, increment detection count + * ONLY while in probe window, once window passed, + * next local learn event should trigger DAD. + */ + nbr->dad_count++; + } + + /* For LOCAL IP/Neigh learn event, once count is reset above via either + * initial/start detection time or passed the probe time, the count + * needs to be incremented. + */ + if (is_local) + nbr->dad_count++; + + if (nbr->dad_count >= zvrf->dad_max_moves) { + flog_warn( + EC_ZEBRA_DUP_IP_DETECTED, + "VNI %u: MAC %pEA IP %pIA detected as duplicate during %s VTEP %pI4", + nbr->zevpn->vni, &nbr->emac, &nbr->ip, + is_local ? "local update, last" : "remote update, from", + &vtep_ip); + + SET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + + /* Capture Duplicate detection time */ + nbr->dad_dup_detect_time = monotime(NULL); + + /* Start auto recovery timer for this IP */ + THREAD_OFF(nbr->dad_ip_auto_recovery_timer); + if (zvrf->dad_freeze && zvrf->dad_freeze_time) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: duplicate addr MAC %pEA IP %pIA flags 0x%x auto recovery time %u start", + __func__, &nbr->emac, &nbr->ip, + nbr->flags, zvrf->dad_freeze_time); + + thread_add_timer(zrouter.master, + zebra_evpn_dad_ip_auto_recovery_exp, + nbr, zvrf->dad_freeze_time, + &nbr->dad_ip_auto_recovery_timer); + } + if (zvrf->dad_freeze) + *is_dup_detect = true; + } +} + +int zebra_evpn_local_neigh_update(struct zebra_evpn *zevpn, + struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *macaddr, bool is_router, + bool local_inactive, bool dp_static) +{ + struct zebra_vrf *zvrf; + struct zebra_neigh *n = NULL; + struct zebra_mac *zmac = NULL, *old_zmac = NULL; + uint32_t old_mac_seq = 0, mac_new_seq = 0; + bool upd_mac_seq = false; + bool neigh_mac_change = false; + bool neigh_on_hold = false; + bool neigh_was_remote = false; + bool do_dad = false; + struct in_addr vtep_ip = {.s_addr = 0}; + bool inform_dataplane = false; + bool created = false; + bool new_static = false; + bool old_bgp_ready = false; + bool new_bgp_ready; + + /* Check if the MAC exists. */ + zmac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!zmac) { + /* create a dummy MAC if the MAC is not already present */ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("AUTO MAC %pEA created for neigh %pIA on VNI %u", + macaddr, ip, zevpn->vni); + + zmac = zebra_evpn_mac_add(zevpn, macaddr); + zebra_evpn_mac_clear_fwd_info(zmac); + memset(&zmac->flags, 0, sizeof(uint32_t)); + SET_FLAG(zmac->flags, ZEBRA_MAC_AUTO); + } else { + if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_REMOTE)) { + /* + * We don't change the MAC to local upon a neighbor + * learn event, we wait for the explicit local MAC + * learn. However, we have to compute its sequence + * number in preparation for when it actually turns + * local. + */ + upd_mac_seq = true; + } + } + + zvrf = zevpn->vxlan_if->vrf->info; + if (!zvrf) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug(" Unable to find vrf for: %d", + zevpn->vxlan_if->vrf->vrf_id); + return -1; + } + + /* Check if the neighbor exists. */ + n = zebra_evpn_neigh_lookup(zevpn, ip); + if (!n) { + /* New neighbor - create */ + n = zebra_evpn_neigh_add(zevpn, ip, macaddr, zmac, 0); + + /* Set "local" forwarding info. */ + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + n->ifindex = ifp->ifindex; + created = true; + } else { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + bool mac_different; + bool cur_is_router; + bool old_local_inactive; + + old_local_inactive = !!CHECK_FLAG( + n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + + old_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + + /* Note any changes and see if of interest to BGP. */ + mac_different = !!memcmp(&n->emac, macaddr, ETH_ALEN); + cur_is_router = + !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + new_static = zebra_evpn_neigh_is_static(n); + if (!mac_different && is_router == cur_is_router + && old_local_inactive == local_inactive + && dp_static != new_static) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + " Ignoring entry mac is the same and is_router == cur_is_router"); + n->ifindex = ifp->ifindex; + return 0; + } + + old_zmac = n->mac; + if (!mac_different) { + /* XXX - cleanup this code duplication */ + bool is_neigh_freezed = false; + + /* Only the router flag has changed. */ + if (is_router) + SET_FLAG(n->flags, + ZEBRA_NEIGH_ROUTER_FLAG); + else + UNSET_FLAG(n->flags, + ZEBRA_NEIGH_ROUTER_FLAG); + + if (local_inactive) + SET_FLAG(n->flags, + ZEBRA_NEIGH_LOCAL_INACTIVE); + else + UNSET_FLAG(n->flags, + ZEBRA_NEIGH_LOCAL_INACTIVE); + new_bgp_ready = + zebra_evpn_neigh_is_ready_for_bgp(n); + + if (dp_static != new_static) + inform_dataplane = true; + + /* Neigh is in freeze state and freeze action + * is enabled, do not send update to client. + */ + is_neigh_freezed = + (zebra_evpn_do_dup_addr_detect(zvrf) + && zvrf->dad_freeze + && CHECK_FLAG(n->flags, + ZEBRA_NEIGH_DUPLICATE)); + + zebra_evpn_local_neigh_update_log( + "local", n, is_router, local_inactive, + old_bgp_ready, new_bgp_ready, false, + false, "flag-update"); + + if (inform_dataplane) + zebra_evpn_sync_neigh_dp_install( + n, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + /* if the neigh can no longer be advertised + * remove it from bgp + */ + if (!is_neigh_freezed) { + zebra_evpn_neigh_send_add_del_to_client( + n, old_bgp_ready, + new_bgp_ready); + } else { + if (IS_ZEBRA_DEBUG_VXLAN + && IS_ZEBRA_NEIGH_ACTIVE(n)) + zlog_debug( + " Neighbor active and frozen"); + } + return 0; + } + + /* The MAC has changed, need to issue a delete + * first as this means a different MACIP route. + * Also, need to do some unlinking/relinking. + * We also need to update the MAC's sequence number + * in different situations. + */ + if (old_bgp_ready) { + zebra_evpn_neigh_send_del_to_client( + zevpn->vni, &n->ip, &n->emac, n->flags, + n->state, false /*force*/); + old_bgp_ready = false; + } + if (old_zmac) { + old_mac_seq = CHECK_FLAG(old_zmac->flags, + ZEBRA_MAC_REMOTE) + ? old_zmac->rem_seq + : old_zmac->loc_seq; + neigh_mac_change = upd_mac_seq = true; + zebra_evpn_local_neigh_deref_mac( + n, true /* send_mac_update */); + } + + /* if mac changes abandon peer flags and tell + * dataplane to clear the static flag + */ + if (zebra_evpn_neigh_clear_sync_info(n)) + inform_dataplane = true; + /* Update the forwarding info. */ + n->ifindex = ifp->ifindex; + + /* Link to new MAC */ + zebra_evpn_local_neigh_ref_mac( + n, macaddr, zmac, true /* send_mac_update */); + } else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { + /* + * Neighbor has moved from remote to local. Its + * MAC could have also changed as part of the move. + */ + if (memcmp(n->emac.octet, macaddr->octet, ETH_ALEN) + != 0) { + old_zmac = n->mac; + if (old_zmac) { + old_mac_seq = + CHECK_FLAG(old_zmac->flags, + ZEBRA_MAC_REMOTE) + ? old_zmac->rem_seq + : old_zmac->loc_seq; + neigh_mac_change = upd_mac_seq = true; + zebra_evpn_local_neigh_deref_mac( + n, true /* send_update */); + } + + /* Link to new MAC */ + zebra_evpn_local_neigh_ref_mac( + n, macaddr, zmac, true /*send_update*/); + } + /* Based on Mobility event Scenario-B from the + * draft, neigh's previous state was remote treat this + * event for DAD. + */ + neigh_was_remote = true; + vtep_ip = n->r_vtep_ip; + /* Mark appropriately */ + UNSET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE); + n->r_vtep_ip.s_addr = INADDR_ANY; + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + n->ifindex = ifp->ifindex; + } + } + + /* If MAC was previously remote, or the neighbor had a different + * MAC earlier, recompute the sequence number. + */ + if (upd_mac_seq) { + uint32_t seq1, seq2; + + seq1 = CHECK_FLAG(zmac->flags, ZEBRA_MAC_REMOTE) + ? zmac->rem_seq + 1 + : zmac->loc_seq; + seq2 = neigh_mac_change ? old_mac_seq + 1 : 0; + mac_new_seq = zmac->loc_seq < MAX(seq1, seq2) ? MAX(seq1, seq2) + : zmac->loc_seq; + } + + if (local_inactive) + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + else + UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + + /* Mark Router flag (R-bit) */ + if (is_router) + SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + else + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + /* if zebra and dataplane don't agree this is a sync entry + * re-install in the dataplane */ + new_static = zebra_evpn_neigh_is_static(n); + if (dp_static != new_static) + inform_dataplane = true; + + /* Check old and/or new MAC detected as duplicate mark + * the neigh as duplicate + */ + if (zebra_evpn_ip_inherit_dad_from_mac(zvrf, old_zmac, zmac, n)) { + flog_warn( + EC_ZEBRA_DUP_IP_INHERIT_DETECTED, + "VNI %u: MAC %pEA IP %pIA detected as duplicate during local update, inherit duplicate from MAC", + zevpn->vni, macaddr, &n->ip); + } + + /* For IP Duplicate Address Detection (DAD) is trigger, + * when the event is extended mobility based on scenario-B + * from the draft, IP/Neigh's MAC binding changed and + * neigh's previous state was remote. + */ + if (neigh_mac_change && neigh_was_remote) + do_dad = true; + + zebra_evpn_dup_addr_detect_for_neigh(zvrf, n, vtep_ip, do_dad, + &neigh_on_hold, true); + + if (inform_dataplane) + zebra_evpn_sync_neigh_dp_install(n, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + /* Before we program this in BGP, we need to check if MAC is locally + * learnt. If not, force neighbor to be inactive and reset its seq. + */ + if (!CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL)) { + zebra_evpn_local_neigh_update_log( + "local", n, is_router, local_inactive, false, false, + inform_dataplane, false, "auto-mac"); + ZEBRA_NEIGH_SET_INACTIVE(n); + n->loc_seq = 0; + zmac->loc_seq = mac_new_seq; + return 0; + } + + zebra_evpn_local_neigh_update_log("local", n, is_router, local_inactive, + false, false, inform_dataplane, true, + created ? "created" : "updated"); + + /* If the MAC's sequence number has changed, inform the MAC and all + * neighbors associated with the MAC to BGP, else just inform this + * neighbor. + */ + if (upd_mac_seq && zmac->loc_seq != mac_new_seq) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Seq changed for MAC %pEA VNI %u - old %u new %u", + macaddr, zevpn->vni, + zmac->loc_seq, mac_new_seq); + zmac->loc_seq = mac_new_seq; + if (zebra_evpn_mac_send_add_to_client(zevpn->vni, macaddr, + zmac->flags, + zmac->loc_seq, zmac->es)) + return -1; + zebra_evpn_process_neigh_on_local_mac_change(zevpn, zmac, 1, + 0 /*es_change*/); + return 0; + } + + n->loc_seq = zmac->loc_seq; + + if (!neigh_on_hold) { + ZEBRA_NEIGH_SET_ACTIVE(n); + new_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + zebra_evpn_neigh_send_add_del_to_client(n, old_bgp_ready, + new_bgp_ready); + } else { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug(" Neighbor on hold not sending"); + } + return 0; +} + +int zebra_evpn_remote_neigh_update(struct zebra_evpn *zevpn, + struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *macaddr, + uint16_t state) +{ + struct zebra_neigh *n = NULL; + struct zebra_mac *zmac = NULL; + + /* If the neighbor is unknown, there is no further action. */ + n = zebra_evpn_neigh_lookup(zevpn, ip); + if (!n) + return 0; + + /* If a remote entry, see if it needs to be refreshed */ + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { +#ifdef GNU_LINUX + if (state & NUD_STALE) + zebra_evpn_rem_neigh_install(zevpn, n, + false /*was_static*/); +#endif + } else { + /* We got a "remote" neighbor notification for an entry + * we think is local. This can happen in a multihoming + * scenario - but only if the MAC is already "remote". + * Just mark our entry as "remote". + */ + zmac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!zmac || !CHECK_FLAG(zmac->flags, ZEBRA_MAC_REMOTE)) { + zlog_debug( + "Ignore remote neigh %pIA (MAC %pEA) on L2-VNI %u - MAC unknown or local", + &n->ip, macaddr, zevpn->vni); + return -1; + } + + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_LOCAL_FLAGS); + SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE); + ZEBRA_NEIGH_SET_ACTIVE(n); + n->r_vtep_ip = zmac->fwd_info.r_vtep_ip; + } + + return 0; +} + +/* Notify Neighbor entries to the Client, skips the GW entry */ +static void +zebra_evpn_send_neigh_hash_entry_to_client(struct hash_bucket *bucket, + void *arg) +{ + struct mac_walk_ctx *wctx = arg; + struct zebra_neigh *zn = bucket->data; + struct zebra_mac *zmac = NULL; + + if (CHECK_FLAG(zn->flags, ZEBRA_NEIGH_DEF_GW)) + return; + + if (CHECK_FLAG(zn->flags, ZEBRA_NEIGH_LOCAL) + && IS_ZEBRA_NEIGH_ACTIVE(zn)) { + zmac = zebra_evpn_mac_lookup(wctx->zevpn, &zn->emac); + if (!zmac) + return; + + zebra_evpn_neigh_send_add_to_client(wctx->zevpn->vni, &zn->ip, + &zn->emac, zn->mac, + zn->flags, zn->loc_seq); + } +} + +/* Iterator of a specific EVPN */ +void zebra_evpn_send_neigh_to_client(struct zebra_evpn *zevpn) +{ + struct neigh_walk_ctx wctx; + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + + hash_iterate(zevpn->neigh_table, + zebra_evpn_send_neigh_hash_entry_to_client, &wctx); +} + +void zebra_evpn_clear_dup_neigh_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct neigh_walk_ctx *wctx = ctxt; + struct zebra_neigh *nbr; + struct zebra_evpn *zevpn; + char buf[INET6_ADDRSTRLEN]; + + nbr = (struct zebra_neigh *)bucket->data; + if (!nbr) + return; + + zevpn = wctx->zevpn; + + if (!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) + return; + + if (IS_ZEBRA_DEBUG_VXLAN) { + ipaddr2str(&nbr->ip, buf, sizeof(buf)); + zlog_debug("%s: clear neigh %s dup state, flags 0x%x seq %u", + __func__, buf, nbr->flags, nbr->loc_seq); + } + + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->detect_start_time.tv_usec = 0; + nbr->dad_dup_detect_time = 0; + THREAD_OFF(nbr->dad_ip_auto_recovery_timer); + + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) { + zebra_evpn_neigh_send_add_to_client(zevpn->vni, &nbr->ip, + &nbr->emac, nbr->mac, + nbr->flags, nbr->loc_seq); + } else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_evpn_rem_neigh_install(zevpn, nbr, false /*was_static*/); + } +} + +/* + * Print a specific neighbor entry. + */ +void zebra_evpn_print_neigh(struct zebra_neigh *n, void *ctxt, + json_object *json) +{ + struct vty *vty; + char buf1[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + const char *type_str; + const char *state_str; + bool flags_present = false; + struct zebra_vrf *zvrf = NULL; + struct timeval detect_start_time = {0, 0}; + char timebuf[MONOTIME_STRLEN]; + char thread_buf[THREAD_TIMER_STRLEN]; + time_t uptime; + char up_str[MONOTIME_STRLEN]; + + zvrf = zebra_vrf_get_evpn(); + uptime = monotime(NULL); + uptime -= n->uptime; + + frrtime_to_interval(uptime, up_str, sizeof(up_str)); + + ipaddr2str(&n->ip, buf2, sizeof(buf2)); + prefix_mac2str(&n->emac, buf1, sizeof(buf1)); + type_str = CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) ? "local" : "remote"; + state_str = IS_ZEBRA_NEIGH_ACTIVE(n) ? "active" : "inactive"; + vty = (struct vty *)ctxt; + if (json == NULL) { + bool sync_info = false; + + vty_out(vty, "IP: %s\n", + ipaddr2str(&n->ip, buf2, sizeof(buf2))); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " State: %s\n", state_str); + vty_out(vty, " Uptime: %s\n", up_str); + vty_out(vty, " MAC: %s\n", + prefix_mac2str(&n->emac, buf1, sizeof(buf1))); + vty_out(vty, " Sync-info:"); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) { + vty_out(vty, " local-inactive"); + sync_info = true; + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY)) { + vty_out(vty, " peer-proxy"); + sync_info = true; + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) { + vty_out(vty, " peer-active"); + sync_info = true; + } + if (n->hold_timer) { + vty_out(vty, " (ht: %s)", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + n->hold_timer)); + sync_info = true; + } + if (!sync_info) + vty_out(vty, " -"); + vty_out(vty, "\n"); + } else { + json_object_string_add(json, "uptime", up_str); + json_object_string_add(json, "ip", buf2); + json_object_string_add(json, "type", type_str); + json_object_string_add(json, "state", state_str); + json_object_string_add(json, "mac", buf1); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) + json_object_boolean_true_add(json, "localInactive"); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY)) + json_object_boolean_true_add(json, "peerProxy"); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + json_object_boolean_true_add(json, "peerActive"); + if (n->hold_timer) + json_object_string_add( + json, "peerActiveHold", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + n->hold_timer)); + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { + if (n->mac->es) { + if (json) + json_object_string_add(json, "remoteEs", + n->mac->es->esi_str); + else + vty_out(vty, " Remote ES: %s\n", + n->mac->es->esi_str); + } else { + if (json) + json_object_string_addf(json, "remoteVtep", + "%pI4", &n->r_vtep_ip); + else + vty_out(vty, " Remote VTEP: %pI4\n", + &n->r_vtep_ip); + } + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) { + if (!json) { + vty_out(vty, " Flags: Default-gateway"); + flags_present = true; + } else + json_object_boolean_true_add(json, "defaultGateway"); + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG)) { + if (!json) { + vty_out(vty, + flags_present ? " ,Router" : " Flags: Router"); + flags_present = true; + } + } + if (json == NULL) { + if (flags_present) + vty_out(vty, "\n"); + vty_out(vty, " Local Seq: %u Remote Seq: %u\n", n->loc_seq, + n->rem_seq); + + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE)) { + vty_out(vty, " Duplicate, detected at %s", + time_to_string(n->dad_dup_detect_time, + timebuf)); + } else if (n->dad_count) { + monotime_since(&n->detect_start_time, + &detect_start_time); + if (detect_start_time.tv_sec <= zvrf->dad_time) { + time_to_string(n->detect_start_time.tv_sec, + timebuf); + vty_out(vty, + " Duplicate detection started at %s, detection count %u\n", + timebuf, n->dad_count); + } + } + } else { + json_object_int_add(json, "localSequence", n->loc_seq); + json_object_int_add(json, "remoteSequence", n->rem_seq); + json_object_int_add(json, "detectionCount", n->dad_count); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE)) + json_object_boolean_true_add(json, "isDuplicate"); + else + json_object_boolean_false_add(json, "isDuplicate"); + } +} + +void zebra_evpn_print_neigh_hdr(struct vty *vty, struct neigh_walk_ctx *wctx) +{ + vty_out(vty, "Flags: I=local-inactive, P=peer-active, X=peer-proxy\n"); + vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %s\n", -wctx->addr_width, + "Neighbor", "Type", "Flags", "State", "MAC", "Remote ES/VTEP", + "Seq #'s"); +} + +static char *zebra_evpn_print_neigh_flags(struct zebra_neigh *n, + char *flags_buf, + uint32_t flags_buf_sz) +{ + snprintf(flags_buf, flags_buf_sz, "%s%s%s", + (n->flags & ZEBRA_NEIGH_ES_PEER_ACTIVE) ? + "P" : "", + (n->flags & ZEBRA_NEIGH_ES_PEER_PROXY) ? + "X" : "", + (n->flags & ZEBRA_NEIGH_LOCAL_INACTIVE) ? + "I" : ""); + + return flags_buf; +} + +/* + * Print neighbor hash entry - called for display of all neighbors. + */ +void zebra_evpn_print_neigh_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct vty *vty; + json_object *json_evpn = NULL, *json_row = NULL; + struct zebra_neigh *n; + char buf1[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + char addr_buf[PREFIX_STRLEN]; + struct neigh_walk_ctx *wctx = ctxt; + const char *state_str; + char flags_buf[6]; + + vty = wctx->vty; + json_evpn = wctx->json; + n = (struct zebra_neigh *)bucket->data; + + if (json_evpn) + json_row = json_object_new_object(); + + prefix_mac2str(&n->emac, buf1, sizeof(buf1)); + ipaddr2str(&n->ip, buf2, sizeof(buf2)); + state_str = IS_ZEBRA_NEIGH_ACTIVE(n) ? "active" : "inactive"; + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + if (wctx->flags & SHOW_REMOTE_NEIGH_FROM_VTEP) + return; + + if (json_evpn == NULL) { + vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %u/%u\n", + -wctx->addr_width, buf2, "local", + zebra_evpn_print_neigh_flags(n, flags_buf, + sizeof(flags_buf)), state_str, buf1, + "", n->loc_seq, n->rem_seq); + } else { + json_object_string_add(json_row, "type", "local"); + json_object_string_add(json_row, "state", state_str); + json_object_string_add(json_row, "mac", buf1); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) + json_object_boolean_true_add(json_row, + "defaultGateway"); + json_object_int_add(json_row, "localSequence", + n->loc_seq); + json_object_int_add(json_row, "remoteSequence", + n->rem_seq); + json_object_int_add(json_row, "detectionCount", + n->dad_count); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE)) + json_object_boolean_true_add(json_row, + "isDuplicate"); + else + json_object_boolean_false_add(json_row, + "isDuplicate"); + } + wctx->count++; + } else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { + if ((wctx->flags & SHOW_REMOTE_NEIGH_FROM_VTEP) + && !IPV4_ADDR_SAME(&n->r_vtep_ip, &wctx->r_vtep_ip)) + return; + + if (json_evpn == NULL) { + if ((wctx->flags & SHOW_REMOTE_NEIGH_FROM_VTEP) + && (wctx->count == 0)) + zebra_evpn_print_neigh_hdr(vty, wctx); + + if (n->mac->es == NULL) + inet_ntop(AF_INET, &n->r_vtep_ip, + addr_buf, sizeof(addr_buf)); + + vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %u/%u\n", + -wctx->addr_width, buf2, "remote", + zebra_evpn_print_neigh_flags(n, flags_buf, + sizeof(flags_buf)), state_str, buf1, + n->mac->es ? n->mac->es->esi_str : addr_buf, + n->loc_seq, n->rem_seq); + } else { + json_object_string_add(json_row, "type", "remote"); + json_object_string_add(json_row, "state", state_str); + json_object_string_add(json_row, "mac", buf1); + if (n->mac->es) + json_object_string_add(json_row, "remoteEs", + n->mac->es->esi_str); + else + json_object_string_addf(json_row, "remoteVtep", + "%pI4", &n->r_vtep_ip); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) + json_object_boolean_true_add(json_row, + "defaultGateway"); + json_object_int_add(json_row, "localSequence", + n->loc_seq); + json_object_int_add(json_row, "remoteSequence", + n->rem_seq); + json_object_int_add(json_row, "detectionCount", + n->dad_count); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE)) + json_object_boolean_true_add(json_row, + "isDuplicate"); + else + json_object_boolean_false_add(json_row, + "isDuplicate"); + } + wctx->count++; + } + + if (json_evpn) + json_object_object_add(json_evpn, buf2, json_row); +} + +/* + * Print neighbor hash entry in detail - called for display of all neighbors. + */ +void zebra_evpn_print_neigh_hash_detail(struct hash_bucket *bucket, void *ctxt) +{ + struct vty *vty; + json_object *json_evpn = NULL, *json_row = NULL; + struct zebra_neigh *n; + char buf[INET6_ADDRSTRLEN]; + struct neigh_walk_ctx *wctx = ctxt; + + vty = wctx->vty; + json_evpn = wctx->json; + n = (struct zebra_neigh *)bucket->data; + if (!n) + return; + + ipaddr2str(&n->ip, buf, sizeof(buf)); + if (json_evpn) + json_row = json_object_new_object(); + + zebra_evpn_print_neigh(n, vty, json_row); + + if (json_evpn) + json_object_object_add(json_evpn, buf, json_row); +} + +void zebra_evpn_print_dad_neigh_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_neigh *nbr; + + nbr = (struct zebra_neigh *)bucket->data; + if (!nbr) + return; + + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) + zebra_evpn_print_neigh_hash(bucket, ctxt); +} + +void zebra_evpn_print_dad_neigh_hash_detail(struct hash_bucket *bucket, + void *ctxt) +{ + struct zebra_neigh *nbr; + + nbr = (struct zebra_neigh *)bucket->data; + if (!nbr) + return; + + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) + zebra_evpn_print_neigh_hash_detail(bucket, ctxt); +} + +void zebra_evpn_neigh_remote_macip_add(struct zebra_evpn *zevpn, + struct zebra_vrf *zvrf, + const struct ipaddr *ipaddr, + struct zebra_mac *mac, + struct in_addr vtep_ip, uint8_t flags, + uint32_t seq) +{ + struct zebra_neigh *n; + int update_neigh = 0; + struct zebra_mac *old_mac = NULL; + bool old_static = false; + bool do_dad = false; + bool is_dup_detect = false; + bool is_router; + + assert(mac); + is_router = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + + /* Check if the remote neighbor itself is unknown or has a + * change. If so, create or update and then install the entry. + */ + n = zebra_evpn_neigh_lookup(zevpn, ipaddr); + if (!n || !CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE) + || is_router != !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG) + || (memcmp(&n->emac, &mac->macaddr, sizeof(struct ethaddr)) != 0) + || !IPV4_ADDR_SAME(&n->r_vtep_ip, &vtep_ip) || seq != n->rem_seq) + update_neigh = 1; + + if (update_neigh) { + if (!n) { + n = zebra_evpn_neigh_add(zevpn, ipaddr, &mac->macaddr, + mac, 0); + } else { + /* When host moves but changes its (MAC,IP) + * binding, BGP may install a MACIP entry that + * corresponds to "older" location of the host + * in transient situations (because {IP1,M1} + * is a different route from {IP1,M2}). Check + * the sequence number and ignore this update + * if appropriate. + */ + + if (!zebra_evpn_neigh_is_bgp_seq_ok( + zevpn, n, &mac->macaddr, seq, false)) + return; + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + old_static = zebra_evpn_neigh_is_static(n); + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "sync->remote neigh vni %u ip %pIA mac %pEA seq %d f0x%x", + n->zevpn->vni, &n->ip, &n->emac, + seq, n->flags); + if (IS_ZEBRA_NEIGH_ACTIVE(n)) + zebra_evpn_neigh_send_del_to_client( + zevpn->vni, &n->ip, &n->emac, + n->flags, n->state, + false /*force*/); + zebra_evpn_neigh_clear_sync_info(n); + } + if (memcmp(&n->emac, &mac->macaddr, + sizeof(struct ethaddr)) + != 0) { + /* update neigh list for macs */ + old_mac = + zebra_evpn_mac_lookup(zevpn, &n->emac); + if (old_mac) { + listnode_delete(old_mac->neigh_list, n); + n->mac = NULL; + zebra_evpn_deref_ip2mac(zevpn, old_mac); + } + n->mac = mac; + listnode_add_sort(mac->neigh_list, n); + memcpy(&n->emac, &mac->macaddr, ETH_ALEN); + + /* Check Neigh's curent state is local + * (this is the case where neigh/host has moved + * from L->R) and check previous detction + * started via local learning. + * + * RFC-7432: A PE/VTEP that detects a MAC + * mobilit event via local learning starts + * an M-second timer. + * VTEP-IP or seq. change along is not + * considered for dup. detection. + * + * Mobilty event scenario-B IP-MAC binding + * changed. + */ + if ((!CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) + && n->dad_count) + do_dad = true; + } + } + + /* Set "remote" forwarding info. */ + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_LOCAL_FLAGS); + n->r_vtep_ip = vtep_ip; + SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE); + + /* Set router flag (R-bit) to this Neighbor entry */ + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG)) + SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + else + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + /* Check old or new MAC detected as duplicate, + * inherit duplicate flag to this neigh. + */ + if (zebra_evpn_ip_inherit_dad_from_mac(zvrf, old_mac, mac, n)) { + flog_warn( + EC_ZEBRA_DUP_IP_INHERIT_DETECTED, + "VNI %u: MAC %pEA IP %pIA detected as duplicate during remote update, inherit duplicate from MAC", + zevpn->vni, &mac->macaddr, &n->ip); + } + + /* Check duplicate address detection for IP */ + zebra_evpn_dup_addr_detect_for_neigh( + zvrf, n, n->r_vtep_ip, do_dad, &is_dup_detect, false); + /* Install the entry. */ + if (!is_dup_detect) + zebra_evpn_rem_neigh_install(zevpn, n, old_static); + } + + /* Update seq number. */ + n->rem_seq = seq; +} + +int zebra_evpn_neigh_gw_macip_add(struct interface *ifp, + struct zebra_evpn *zevpn, struct ipaddr *ip, + struct zebra_mac *mac) +{ + struct zebra_neigh *n; + + assert(mac); + + n = zebra_evpn_neigh_lookup(zevpn, ip); + if (!n) + n = zebra_evpn_neigh_add(zevpn, ip, &mac->macaddr, mac, 0); + + /* Set "local" forwarding info. */ + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + ZEBRA_NEIGH_SET_ACTIVE(n); + memcpy(&n->emac, &mac->macaddr, ETH_ALEN); + n->ifindex = ifp->ifindex; + + /* Only advertise in BGP if the knob is enabled */ + if (advertise_gw_macip_enabled(zevpn)) { + + SET_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW); + /* Set Router flag (R-bit) */ + if (ip->ipa_type == IPADDR_V6) + SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "SVI %s(%u) L2-VNI %u, sending GW MAC %pEA IP %pIA add to BGP with flags 0x%x", + ifp->name, ifp->ifindex, zevpn->vni, + &mac->macaddr, ip, n->flags); + + zebra_evpn_neigh_send_add_to_client( + zevpn->vni, ip, &n->emac, n->mac, n->flags, n->loc_seq); + } else if (advertise_svi_macip_enabled(zevpn)) { + + SET_FLAG(n->flags, ZEBRA_NEIGH_SVI_IP); + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "SVI %s(%u) L2-VNI %u, sending SVI MAC %pEA IP %pIA add to BGP with flags 0x%x", + ifp->name, ifp->ifindex, zevpn->vni, + &mac->macaddr, ip, n->flags); + + zebra_evpn_neigh_send_add_to_client( + zevpn->vni, ip, &n->emac, n->mac, n->flags, n->loc_seq); + } + + return 0; +} + +void zebra_evpn_neigh_remote_uninstall(struct zebra_evpn *zevpn, + struct zebra_vrf *zvrf, + struct zebra_neigh *n, + struct zebra_mac *mac, + const struct ipaddr *ipaddr) +{ + if (zvrf->dad_freeze && CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE) + && CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE) + && (memcmp(n->emac.octet, mac->macaddr.octet, ETH_ALEN) == 0)) { + struct interface *vlan_if; + + vlan_if = zevpn_map_to_svi(zevpn); + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: IP %pIA (flags 0x%x intf %s) is remote and duplicate, read kernel for local entry", + __func__, ipaddr, n->flags, + vlan_if ? vlan_if->name : "Unknown"); + if (vlan_if) + neigh_read_specific_ip(ipaddr, vlan_if); + } + + /* When the MAC changes for an IP, it is possible the + * client may update the new MAC before trying to delete the + * "old" neighbor (as these are two different MACIP routes). + * Do the delete only if the MAC matches. + */ + if (!memcmp(n->emac.octet, mac->macaddr.octet, ETH_ALEN)) { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + zebra_evpn_sync_neigh_del(n); + } else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_evpn_neigh_uninstall(zevpn, n); + zebra_evpn_neigh_del(zevpn, n); + zebra_evpn_deref_ip2mac(zevpn, mac); + } + } +} + +int zebra_evpn_neigh_del_ip(struct zebra_evpn *zevpn, const struct ipaddr *ip) +{ + struct zebra_neigh *n; + struct zebra_mac *zmac; + bool old_bgp_ready; + bool new_bgp_ready; + struct zebra_vrf *zvrf; + + /* If entry doesn't exist, nothing to do. */ + n = zebra_evpn_neigh_lookup(zevpn, ip); + if (!n) + return 0; + + zmac = zebra_evpn_mac_lookup(zevpn, &n->emac); + if (!zmac) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Trying to del a neigh %pIA without a mac %pEA on VNI %u", + ip, &n->emac, + zevpn->vni); + + return 0; + } + + /* If it is a remote entry, the kernel has aged this out or someone has + * deleted it, it needs to be re-installed as FRR is the owner. + */ + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_evpn_rem_neigh_install(zevpn, n, false /*was_static*/); + return 0; + } + + /* if this is a sync entry it cannot be dropped re-install it in + * the dataplane + */ + old_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + if (zebra_evpn_neigh_is_static(n)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("re-add sync neigh vni %u ip %pIA mac %pEA 0x%x", + n->zevpn->vni, &n->ip, &n->emac, + n->flags); + + if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + /* inform-bgp about change in local-activity if any */ + new_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + zebra_evpn_neigh_send_add_del_to_client(n, old_bgp_ready, + new_bgp_ready); + + /* re-install the entry in the kernel */ + zebra_evpn_sync_neigh_dp_install(n, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + return 0; + } + + zvrf = zevpn->vxlan_if->vrf->info; + if (!zvrf) { + zlog_debug("%s: VNI %u vrf lookup failed.", __func__, + zevpn->vni); + return -1; + } + + /* In case of feeze action, if local neigh is in duplicate state, + * Mark the Neigh as inactive before sending delete request to BGPd, + * If BGPd has remote entry, it will re-install + */ + if (zvrf->dad_freeze && CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE)) + ZEBRA_NEIGH_SET_INACTIVE(n); + + /* Remove neighbor from BGP. */ + zebra_evpn_neigh_send_del_to_client(zevpn->vni, &n->ip, &n->emac, + n->flags, n->state, + false /* force */); + + /* Delete this neighbor entry. */ + zebra_evpn_neigh_del(zevpn, n); + + /* see if the AUTO mac needs to be deleted */ + if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_AUTO) + && !zebra_evpn_mac_in_use(zmac)) + zebra_evpn_mac_del(zevpn, zmac); + + return 0; +} diff --git a/zebra/zebra_evpn_neigh.h b/zebra/zebra_evpn_neigh.h new file mode 100644 index 0000000..c779109 --- /dev/null +++ b/zebra/zebra_evpn_neigh.h @@ -0,0 +1,293 @@ +/* + * Zebra EVPN Neighbor Data structures and definitions + * These are "internal" to this function. + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * Copyright (C) 2020 Volta Networks. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_EVPN_NEIGH_H +#define _ZEBRA_EVPN_NEIGH_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define IS_ZEBRA_NEIGH_ACTIVE(n) (n->state == ZEBRA_NEIGH_ACTIVE) + +#define IS_ZEBRA_NEIGH_INACTIVE(n) (n->state == ZEBRA_NEIGH_INACTIVE) + +#define ZEBRA_NEIGH_SET_ACTIVE(n) n->state = ZEBRA_NEIGH_ACTIVE + +#define ZEBRA_NEIGH_SET_INACTIVE(n) n->state = ZEBRA_NEIGH_INACTIVE + +/* + * Neighbor hash table. + * + * This table contains the neighbors (IP to MAC bindings) pertaining to + * this VNI. This includes local neighbors learnt on the attached VLAN + * device that maps to this VNI as well as remote neighbors learnt and + * installed by BGP. + * Local neighbors will be known against the VLAN device (SVI); however, + * it is sufficient for zebra to maintain against the VNI. The correct + * VNI will be obtained as zebra maintains the mapping (of VLAN to VNI). + */ +struct zebra_neigh { + /* IP address. */ + struct ipaddr ip; + + /* MAC address. */ + struct ethaddr emac; + + /* Back pointer to MAC. Only applicable to hosts in a L2-VNI. */ + struct zebra_mac *mac; + + /* Underlying interface. */ + ifindex_t ifindex; + + struct zebra_evpn *zevpn; + + uint32_t flags; +#define ZEBRA_NEIGH_LOCAL 0x01 +#define ZEBRA_NEIGH_REMOTE 0x02 +#define ZEBRA_NEIGH_REMOTE_NH 0x04 /* neigh entry for remote vtep */ +#define ZEBRA_NEIGH_DEF_GW 0x08 +#define ZEBRA_NEIGH_ROUTER_FLAG 0x10 +#define ZEBRA_NEIGH_DUPLICATE 0x20 +#define ZEBRA_NEIGH_SVI_IP 0x40 +/* rxed from an ES peer */ +#define ZEBRA_NEIGH_ES_PEER_ACTIVE 0x80 +/* rxed from an ES peer as a proxy advertisement */ +#define ZEBRA_NEIGH_ES_PEER_PROXY 0x100 +/* We have not been able to independently establish that the host + * is local connected + */ +#define ZEBRA_NEIGH_LOCAL_INACTIVE 0x200 +#define ZEBRA_NEIGH_ALL_LOCAL_FLAGS \ + (ZEBRA_NEIGH_LOCAL | ZEBRA_NEIGH_LOCAL_INACTIVE) +#define ZEBRA_NEIGH_ALL_PEER_FLAGS \ + (ZEBRA_NEIGH_ES_PEER_PROXY | ZEBRA_NEIGH_ES_PEER_ACTIVE) + + enum zebra_neigh_state state; + + /* Remote VTEP IP - applicable only for remote neighbors. */ + struct in_addr r_vtep_ip; + + /* + * Mobility sequence numbers associated with this entry. The rem_seq + * represents the sequence number from the client (BGP) for the most + * recent add or update of this entry while the loc_seq represents + * the sequence number informed (or to be informed) by zebra to BGP + * for this entry. + */ + uint32_t rem_seq; + uint32_t loc_seq; + + /* list of hosts pointing to this remote NH entry */ + struct host_rb_tree_entry host_rb; + + /* Duplicate ip detection */ + uint32_t dad_count; + + struct thread *dad_ip_auto_recovery_timer; + + struct timeval detect_start_time; + + time_t dad_dup_detect_time; + + time_t uptime; + + /* used for ageing out the PEER_ACTIVE flag */ + struct thread *hold_timer; +}; + +/* + * Context for neighbor hash walk - used by callbacks. + */ +struct neigh_walk_ctx { + struct zebra_evpn *zevpn; /* VNI hash */ + struct zebra_vrf *zvrf; /* VRF - for client notification. */ + int uninstall; /* uninstall from kernel? */ + int upd_client; /* uninstall from client? */ + + uint32_t flags; +#define DEL_LOCAL_NEIGH 0x1 +#define DEL_REMOTE_NEIGH 0x2 +#define DEL_ALL_NEIGH (DEL_LOCAL_NEIGH | DEL_REMOTE_NEIGH) +#define DEL_REMOTE_NEIGH_FROM_VTEP 0x4 +#define SHOW_REMOTE_NEIGH_FROM_VTEP 0x8 + + struct in_addr r_vtep_ip; /* To walk neighbors from specific VTEP */ + + struct vty *vty; /* Used by VTY handlers */ + uint32_t count; /* Used by VTY handlers */ + uint8_t addr_width; /* Used by VTY handlers */ + struct json_object *json; /* Used for JSON Output */ +}; + +/**************************** SYNC neigh handling **************************/ +static inline bool zebra_evpn_neigh_is_static(struct zebra_neigh *neigh) +{ + return !!(neigh->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS); +} + +static inline bool zebra_evpn_neigh_is_ready_for_bgp(struct zebra_neigh *n) +{ + bool mac_ready; + bool neigh_ready; + + mac_ready = !!(n->mac->flags & ZEBRA_MAC_LOCAL); + neigh_ready = + ((n->flags & ZEBRA_NEIGH_LOCAL) && IS_ZEBRA_NEIGH_ACTIVE(n) + && (!(n->flags & ZEBRA_NEIGH_LOCAL_INACTIVE) + || (n->flags & ZEBRA_NEIGH_ES_PEER_ACTIVE))) + ? true + : false; + + return mac_ready && neigh_ready; +} + +static inline void zebra_evpn_neigh_stop_hold_timer(struct zebra_neigh *n) +{ + if (!n->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %pIA mac %pEA 0x%x hold stop", + n->zevpn->vni, &n->ip, &n->emac, n->flags); + THREAD_OFF(n->hold_timer); +} + +void zebra_evpn_sync_neigh_static_chg(struct zebra_neigh *n, bool old_n_static, + bool new_n_static, bool defer_n_dp, + bool defer_mac_dp, const char *caller); + +static inline bool zebra_evpn_neigh_clear_sync_info(struct zebra_neigh *n) +{ + bool old_n_static = false; + bool new_n_static = false; + + if (n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %pIA mac %pEA 0x%x clear", + n->zevpn->vni, &n->ip, &n->emac, n->flags); + + old_n_static = zebra_evpn_neigh_is_static(n); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_PEER_FLAGS); + new_n_static = zebra_evpn_neigh_is_static(n); + if (old_n_static != new_n_static) + zebra_evpn_sync_neigh_static_chg( + n, old_n_static, new_n_static, + true /*defer_dp)*/, false /*defer_mac_dp*/, + __func__); + } + zebra_evpn_neigh_stop_hold_timer(n); + + /* if the neigh static flag changed inform that a dp + * re-install maybe needed + */ + return old_n_static != new_n_static; +} + +int remote_neigh_count(struct zebra_mac *zmac); + +int neigh_list_cmp(void *p1, void *p2); +struct hash *zebra_neigh_db_create(const char *desc); +uint32_t num_dup_detected_neighs(struct zebra_evpn *zevpn); +void zebra_evpn_find_neigh_addr_width(struct hash_bucket *bucket, void *ctxt); +int remote_neigh_count(struct zebra_mac *zmac); +int zebra_evpn_rem_neigh_install(struct zebra_evpn *zevpn, + struct zebra_neigh *n, bool was_static); +void zebra_evpn_install_neigh_hash(struct hash_bucket *bucket, void *ctxt); +int zebra_evpn_neigh_send_add_to_client(vni_t vni, const struct ipaddr *ip, + const struct ethaddr *macaddr, + struct zebra_mac *zmac, + uint32_t neigh_flags, uint32_t seq); +int zebra_evpn_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip, + struct ethaddr *macaddr, uint32_t flags, + int state, bool force); +bool zebra_evpn_neigh_is_bgp_seq_ok(struct zebra_evpn *zevpn, + struct zebra_neigh *n, + const struct ethaddr *macaddr, uint32_t seq, + bool sync); +int zebra_evpn_neigh_del(struct zebra_evpn *zevpn, struct zebra_neigh *n); +void zebra_evpn_sync_neigh_del(struct zebra_neigh *n); +struct zebra_neigh *zebra_evpn_proc_sync_neigh_update( + struct zebra_evpn *zevpn, struct zebra_neigh *n, uint16_t ipa_len, + const struct ipaddr *ipaddr, uint8_t flags, uint32_t seq, + const esi_t *esi, struct sync_mac_ip_ctx *ctx); +void zebra_evpn_neigh_del_all(struct zebra_evpn *zevpn, int uninstall, + int upd_client, uint32_t flags); +struct zebra_neigh *zebra_evpn_neigh_lookup(struct zebra_evpn *zevpn, + const struct ipaddr *ip); + +int zebra_evpn_rem_neigh_install(struct zebra_evpn *zevpn, + struct zebra_neigh *n, bool was_static); +void zebra_evpn_process_neigh_on_remote_mac_add(struct zebra_evpn *zevpn, + struct zebra_mac *zmac); +void zebra_evpn_process_neigh_on_local_mac_del(struct zebra_evpn *zevpn, + struct zebra_mac *zmac); +void zebra_evpn_process_neigh_on_local_mac_change(struct zebra_evpn *zevpn, + struct zebra_mac *zmac, + bool seq_change, + bool es_change); +void zebra_evpn_process_neigh_on_remote_mac_del(struct zebra_evpn *zevpn, + struct zebra_mac *zmac); +int zebra_evpn_local_neigh_update(struct zebra_evpn *zevpn, + struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *macaddr, bool is_router, + bool local_inactive, bool dp_static); +int zebra_evpn_remote_neigh_update(struct zebra_evpn *zevpn, + struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *macaddr, + uint16_t state); +void zebra_evpn_send_neigh_to_client(struct zebra_evpn *zevpn); +void zebra_evpn_clear_dup_neigh_hash(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_print_neigh(struct zebra_neigh *n, void *ctxt, + json_object *json); +void zebra_evpn_print_neigh_hash(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_print_neigh_hdr(struct vty *vty, struct neigh_walk_ctx *wctx); +void zebra_evpn_print_neigh_hash_detail(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_print_dad_neigh_hash(struct hash_bucket *bucket, void *ctxt); +void zebra_evpn_print_dad_neigh_hash_detail(struct hash_bucket *bucket, + void *ctxt); +void zebra_evpn_neigh_remote_macip_add(struct zebra_evpn *zevpn, + struct zebra_vrf *zvrf, + const struct ipaddr *ipaddr, + struct zebra_mac *mac, + struct in_addr vtep_ip, uint8_t flags, + uint32_t seq); +int zebra_evpn_neigh_gw_macip_add(struct interface *ifp, + struct zebra_evpn *zevpn, struct ipaddr *ip, + struct zebra_mac *mac); +void zebra_evpn_neigh_remote_uninstall(struct zebra_evpn *zevpn, + struct zebra_vrf *zvrf, + struct zebra_neigh *n, + struct zebra_mac *mac, + const struct ipaddr *ipaddr); +int zebra_evpn_neigh_del_ip(struct zebra_evpn *zevpn, const struct ipaddr *ip); + + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_EVPN_NEIGH_H */ diff --git a/zebra/zebra_evpn_vxlan.h b/zebra/zebra_evpn_vxlan.h new file mode 100644 index 0000000..3884a1e --- /dev/null +++ b/zebra/zebra_evpn_vxlan.h @@ -0,0 +1,71 @@ +/* + * Zebra EVPN for VxLAN code + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +/* Get the VRR interface for SVI if any */ +static inline struct interface * +zebra_get_vrr_intf_for_svi(struct interface *ifp) +{ + struct zebra_vrf *zvrf = NULL; + struct interface *tmp_if = NULL; + struct zebra_if *zif = NULL; + + zvrf = ifp->vrf->info; + assert(zvrf); + + FOR_ALL_INTERFACES (zvrf->vrf, tmp_if) { + zif = tmp_if->info; + if (!zif) + continue; + + if (!IS_ZEBRA_IF_MACVLAN(tmp_if)) + continue; + + if (zif->link == ifp) + return tmp_if; + } + + return NULL; +} + +/* EVPN<=>vxlan_zif association */ +static inline void zevpn_vxlan_if_set(struct zebra_evpn *zevpn, + struct interface *ifp, bool set) +{ + struct zebra_if *zif; + + if (set) { + if (zevpn->vxlan_if == ifp) + return; + zevpn->vxlan_if = ifp; + } else { + if (!zevpn->vxlan_if) + return; + zevpn->vxlan_if = NULL; + } + + if (ifp) + zif = ifp->info; + else + zif = NULL; + + zebra_evpn_vxl_evpn_set(zif, zevpn, set); +} diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c new file mode 100644 index 0000000..1b27533 --- /dev/null +++ b/zebra/zebra_fpm.c @@ -0,0 +1,2075 @@ +/* + * Main implementation file for interface to Forwarding Plane Manager. + * + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "log.h" +#include "libfrr.h" +#include "stream.h" +#include "thread.h" +#include "network.h" +#include "command.h" +#include "lib/version.h" +#include "jhash.h" + +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_errors.h" + +#include "fpm/fpm.h" +#include "zebra_fpm_private.h" +#include "zebra/zebra_router.h" +#include "zebra_vxlan_private.h" + +DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO"); + +/* + * Interval at which we attempt to connect to the FPM. + */ +#define ZFPM_CONNECT_RETRY_IVL 5 + +/* + * Sizes of outgoing and incoming stream buffers for writing/reading + * FPM messages. + */ +#define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN) +#define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN) + +/* + * The maximum number of times the FPM socket write callback can call + * 'write' before it yields. + */ +#define ZFPM_MAX_WRITES_PER_RUN 10 + +/* + * Interval over which we collect statistics. + */ +#define ZFPM_STATS_IVL_SECS 10 +#define FPM_MAX_MAC_MSG_LEN 512 + +static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args); + +/* + * Structure that holds state for iterating over all route_node + * structures that are candidates for being communicated to the FPM. + */ +struct zfpm_rnodes_iter { + rib_tables_iter_t tables_iter; + route_table_iter_t iter; +}; + +/* + * Statistics. + */ +struct zfpm_stats { + unsigned long connect_calls; + unsigned long connect_no_sock; + + unsigned long read_cb_calls; + + unsigned long write_cb_calls; + unsigned long write_calls; + unsigned long partial_writes; + unsigned long max_writes_hit; + unsigned long t_write_yields; + + unsigned long nop_deletes_skipped; + unsigned long route_adds; + unsigned long route_dels; + + unsigned long updates_triggered; + unsigned long redundant_triggers; + + unsigned long dests_del_after_update; + + unsigned long t_conn_down_starts; + unsigned long t_conn_down_dests_processed; + unsigned long t_conn_down_yields; + unsigned long t_conn_down_finishes; + + unsigned long t_conn_up_starts; + unsigned long t_conn_up_dests_processed; + unsigned long t_conn_up_yields; + unsigned long t_conn_up_aborts; + unsigned long t_conn_up_finishes; +}; + +/* + * States for the FPM state machine. + */ +enum zfpm_state { + + /* + * In this state we are not yet ready to connect to the FPM. This + * can happen when this module is disabled, or if we're cleaning up + * after a connection has gone down. + */ + ZFPM_STATE_IDLE, + + /* + * Ready to talk to the FPM and periodically trying to connect to + * it. + */ + ZFPM_STATE_ACTIVE, + + /* + * In the middle of bringing up a TCP connection. Specifically, + * waiting for a connect() call to complete asynchronously. + */ + ZFPM_STATE_CONNECTING, + + /* + * TCP connection to the FPM is up. + */ + ZFPM_STATE_ESTABLISHED + +}; + +/* + * Message format to be used to communicate with the FPM. + */ +enum zfpm_msg_format { + ZFPM_MSG_FORMAT_NONE, + ZFPM_MSG_FORMAT_NETLINK, + ZFPM_MSG_FORMAT_PROTOBUF, +}; + +/* + * Globals. + */ +struct zfpm_glob { + + /* + * True if the FPM module has been enabled. + */ + int enabled; + + /* + * Message format to be used to communicate with the fpm. + */ + enum zfpm_msg_format message_format; + + struct thread_master *master; + + enum zfpm_state state; + + in_addr_t fpm_server; + /* + * Port on which the FPM is running. + */ + int fpm_port; + + /* + * List of rib_dest_t structures to be processed + */ + TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q; + + /* + * List of fpm_mac_info structures to be processed + */ + TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q; + + /* + * Hash table of fpm_mac_info_t entries + * + * While adding fpm_mac_info_t for a MAC to the mac_q, + * it is possible that another fpm_mac_info_t node for the this MAC + * is already present in the queue. + * This is possible in the case of consecutive add->delete operations. + * To avoid such duplicate insertions in the mac_q, + * define a hash table for fpm_mac_info_t which can be looked up + * to see if an fpm_mac_info_t node for a MAC is already present + * in the mac_q. + */ + struct hash *fpm_mac_info_table; + + /* + * Stream socket to the FPM. + */ + int sock; + + /* + * Buffers for messages to/from the FPM. + */ + struct stream *obuf; + struct stream *ibuf; + + /* + * Threads for I/O. + */ + struct thread *t_connect; + struct thread *t_write; + struct thread *t_read; + + /* + * Thread to clean up after the TCP connection to the FPM goes down + * and the state that belongs to it. + */ + struct thread *t_conn_down; + + struct { + struct zfpm_rnodes_iter iter; + } t_conn_down_state; + + /* + * Thread to take actions once the TCP conn to the FPM comes up, and + * the state that belongs to it. + */ + struct thread *t_conn_up; + + struct { + struct zfpm_rnodes_iter iter; + } t_conn_up_state; + + unsigned long connect_calls; + time_t last_connect_call_time; + + /* + * Stats from the start of the current statistics interval up to + * now. These are the counters we typically update in the code. + */ + struct zfpm_stats stats; + + /* + * Statistics that were gathered in the last collection interval. + */ + struct zfpm_stats last_ivl_stats; + + /* + * Cumulative stats from the last clear to the start of the current + * statistics interval. + */ + struct zfpm_stats cumulative_stats; + + /* + * Stats interval timer. + */ + struct thread *t_stats; + + /* + * If non-zero, the last time when statistics were cleared. + */ + time_t last_stats_clear_time; + + /* + * Flag to track the MAC dump status to FPM + */ + bool fpm_mac_dump_done; +}; + +static struct zfpm_glob zfpm_glob_space; +static struct zfpm_glob *zfpm_g = &zfpm_glob_space; + +static int zfpm_trigger_update(struct route_node *rn, const char *reason); + +static void zfpm_read_cb(struct thread *thread); +static void zfpm_write_cb(struct thread *thread); + +static void zfpm_set_state(enum zfpm_state state, const char *reason); +static void zfpm_start_connect_timer(const char *reason); +static void zfpm_start_stats_timer(void); +static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac); + +static const char ipv4_ll_buf[16] = "169.254.0.1"; +union g_addr ipv4ll_gateway; + +/* + * zfpm_thread_should_yield + */ +static inline int zfpm_thread_should_yield(struct thread *t) +{ + return thread_should_yield(t); +} + +/* + * zfpm_state_to_str + */ +static const char *zfpm_state_to_str(enum zfpm_state state) +{ + switch (state) { + + case ZFPM_STATE_IDLE: + return "idle"; + + case ZFPM_STATE_ACTIVE: + return "active"; + + case ZFPM_STATE_CONNECTING: + return "connecting"; + + case ZFPM_STATE_ESTABLISHED: + return "established"; + + default: + return "unknown"; + } +} + +/* + * zfpm_get_elapsed_time + * + * Returns the time elapsed (in seconds) since the given time. + */ +static time_t zfpm_get_elapsed_time(time_t reference) +{ + time_t now; + + now = monotime(NULL); + + if (now < reference) { + assert(0); + return 0; + } + + return now - reference; +} + +/* + * zfpm_rnodes_iter_init + */ +static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter) +{ + memset(iter, 0, sizeof(*iter)); + rib_tables_iter_init(&iter->tables_iter); + + /* + * This is a hack, but it makes implementing 'next' easier by + * ensuring that route_table_iter_next() will return NULL the first + * time we call it. + */ + route_table_iter_init(&iter->iter, NULL); + route_table_iter_cleanup(&iter->iter); +} + +/* + * zfpm_rnodes_iter_next + */ +static inline struct route_node * +zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter) +{ + struct route_node *rn; + struct route_table *table; + + while (1) { + rn = route_table_iter_next(&iter->iter); + if (rn) + return rn; + + /* + * We've made our way through this table, go to the next one. + */ + route_table_iter_cleanup(&iter->iter); + + table = rib_tables_iter_next(&iter->tables_iter); + + if (!table) + return NULL; + + route_table_iter_init(&iter->iter, table); + } + + return NULL; +} + +/* + * zfpm_rnodes_iter_pause + */ +static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter) +{ + route_table_iter_pause(&iter->iter); +} + +/* + * zfpm_rnodes_iter_cleanup + */ +static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter) +{ + route_table_iter_cleanup(&iter->iter); + rib_tables_iter_cleanup(&iter->tables_iter); +} + +/* + * zfpm_stats_init + * + * Initialize a statistics block. + */ +static inline void zfpm_stats_init(struct zfpm_stats *stats) +{ + memset(stats, 0, sizeof(*stats)); +} + +/* + * zfpm_stats_reset + */ +static inline void zfpm_stats_reset(struct zfpm_stats *stats) +{ + zfpm_stats_init(stats); +} + +/* + * zfpm_stats_copy + */ +static inline void zfpm_stats_copy(const struct zfpm_stats *src, + struct zfpm_stats *dest) +{ + memcpy(dest, src, sizeof(*dest)); +} + +/* + * zfpm_stats_compose + * + * Total up the statistics in two stats structures ('s1 and 's2') and + * return the result in the third argument, 'result'. Note that the + * pointer 'result' may be the same as 's1' or 's2'. + * + * For simplicity, the implementation below assumes that the stats + * structure is composed entirely of counters. This can easily be + * changed when necessary. + */ +static void zfpm_stats_compose(const struct zfpm_stats *s1, + const struct zfpm_stats *s2, + struct zfpm_stats *result) +{ + const unsigned long *p1, *p2; + unsigned long *result_p; + int i, num_counters; + + p1 = (const unsigned long *)s1; + p2 = (const unsigned long *)s2; + result_p = (unsigned long *)result; + + num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long)); + + for (i = 0; i < num_counters; i++) { + result_p[i] = p1[i] + p2[i]; + } +} + +/* + * zfpm_read_on + */ +static inline void zfpm_read_on(void) +{ + assert(!zfpm_g->t_read); + assert(zfpm_g->sock >= 0); + + thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock, + &zfpm_g->t_read); +} + +/* + * zfpm_write_on + */ +static inline void zfpm_write_on(void) +{ + assert(!zfpm_g->t_write); + assert(zfpm_g->sock >= 0); + + thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock, + &zfpm_g->t_write); +} + +/* + * zfpm_read_off + */ +static inline void zfpm_read_off(void) +{ + THREAD_OFF(zfpm_g->t_read); +} + +/* + * zfpm_write_off + */ +static inline void zfpm_write_off(void) +{ + THREAD_OFF(zfpm_g->t_write); +} + +static inline void zfpm_connect_off(void) +{ + THREAD_OFF(zfpm_g->t_connect); +} + +/* + * zfpm_conn_up_thread_cb + * + * Callback for actions to be taken when the connection to the FPM + * comes up. + */ +static void zfpm_conn_up_thread_cb(struct thread *thread) +{ + struct route_node *rnode; + struct zfpm_rnodes_iter *iter; + rib_dest_t *dest; + + iter = &zfpm_g->t_conn_up_state.iter; + + if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) { + zfpm_debug( + "Connection not up anymore, conn_up thread aborting"); + zfpm_g->stats.t_conn_up_aborts++; + goto done; + } + + if (!zfpm_g->fpm_mac_dump_done) { + /* Enqueue FPM updates for all the RMAC entries */ + hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table, + NULL); + /* mark dump done so that its not repeated after yield */ + zfpm_g->fpm_mac_dump_done = true; + } + + while ((rnode = zfpm_rnodes_iter_next(iter))) { + dest = rib_dest_from_rnode(rnode); + + if (dest) { + zfpm_g->stats.t_conn_up_dests_processed++; + zfpm_trigger_update(rnode, NULL); + } + + /* + * Yield if need be. + */ + if (!zfpm_thread_should_yield(thread)) + continue; + + zfpm_g->stats.t_conn_up_yields++; + zfpm_rnodes_iter_pause(iter); + thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, + NULL, 0, &zfpm_g->t_conn_up); + return; + } + + zfpm_g->stats.t_conn_up_finishes++; + +done: + zfpm_rnodes_iter_cleanup(iter); +} + +/* + * zfpm_connection_up + * + * Called when the connection to the FPM comes up. + */ +static void zfpm_connection_up(const char *detail) +{ + assert(zfpm_g->sock >= 0); + zfpm_read_on(); + zfpm_write_on(); + zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail); + + /* + * Start thread to push existing routes to the FPM. + */ + THREAD_OFF(zfpm_g->t_conn_up); + + zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter); + zfpm_g->fpm_mac_dump_done = false; + + zfpm_debug("Starting conn_up thread"); + + thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0, + &zfpm_g->t_conn_up); + zfpm_g->stats.t_conn_up_starts++; +} + +/* + * zfpm_connect_check + * + * Check if an asynchronous connect() to the FPM is complete. + */ +static void zfpm_connect_check(void) +{ + int status; + socklen_t slen; + int ret; + + zfpm_read_off(); + zfpm_write_off(); + + slen = sizeof(status); + ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status, + &slen); + + if (ret >= 0 && status == 0) { + zfpm_connection_up("async connect complete"); + return; + } + + /* + * getsockopt() failed or indicated an error on the socket. + */ + close(zfpm_g->sock); + zfpm_g->sock = -1; + + zfpm_start_connect_timer("getsockopt() after async connect failed"); + return; +} + +/* + * zfpm_conn_down_thread_cb + * + * Callback that is invoked to clean up state after the TCP connection + * to the FPM goes down. + */ +static void zfpm_conn_down_thread_cb(struct thread *thread) +{ + struct route_node *rnode; + struct zfpm_rnodes_iter *iter; + rib_dest_t *dest; + struct fpm_mac_info_t *mac = NULL; + + assert(zfpm_g->state == ZFPM_STATE_IDLE); + + /* + * Delink and free all fpm_mac_info_t nodes + * in the mac_q and fpm_mac_info_hash + */ + while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL) + zfpm_mac_info_del(mac); + + zfpm_g->t_conn_down = NULL; + + iter = &zfpm_g->t_conn_down_state.iter; + + while ((rnode = zfpm_rnodes_iter_next(iter))) { + dest = rib_dest_from_rnode(rnode); + + if (dest) { + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) { + TAILQ_REMOVE(&zfpm_g->dest_q, dest, + fpm_q_entries); + } + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM); + + zfpm_g->stats.t_conn_down_dests_processed++; + + /* + * Check if the dest should be deleted. + */ + rib_gc_dest(rnode); + } + + /* + * Yield if need be. + */ + if (!zfpm_thread_should_yield(thread)) + continue; + + zfpm_g->stats.t_conn_down_yields++; + zfpm_rnodes_iter_pause(iter); + zfpm_g->t_conn_down = NULL; + thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, + NULL, 0, &zfpm_g->t_conn_down); + return; + } + + zfpm_g->stats.t_conn_down_finishes++; + zfpm_rnodes_iter_cleanup(iter); + + /* + * Start the process of connecting to the FPM again. + */ + zfpm_start_connect_timer("cleanup complete"); +} + +/* + * zfpm_connection_down + * + * Called when the connection to the FPM has gone down. + */ +static void zfpm_connection_down(const char *detail) +{ + if (!detail) + detail = "unknown"; + + assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED); + + zlog_info("connection to the FPM has gone down: %s", detail); + + zfpm_read_off(); + zfpm_write_off(); + + stream_reset(zfpm_g->ibuf); + stream_reset(zfpm_g->obuf); + + if (zfpm_g->sock >= 0) { + close(zfpm_g->sock); + zfpm_g->sock = -1; + } + + /* + * Start thread to clean up state after the connection goes down. + */ + assert(!zfpm_g->t_conn_down); + zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter); + zfpm_g->t_conn_down = NULL; + thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0, + &zfpm_g->t_conn_down); + zfpm_g->stats.t_conn_down_starts++; + + zfpm_set_state(ZFPM_STATE_IDLE, detail); +} + +/* + * zfpm_read_cb + */ +static void zfpm_read_cb(struct thread *thread) +{ + size_t already; + struct stream *ibuf; + uint16_t msg_len; + fpm_msg_hdr_t *hdr; + + zfpm_g->stats.read_cb_calls++; + + /* + * Check if async connect is now done. + */ + if (zfpm_g->state == ZFPM_STATE_CONNECTING) { + zfpm_connect_check(); + return; + } + + assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED); + assert(zfpm_g->sock >= 0); + + ibuf = zfpm_g->ibuf; + + already = stream_get_endp(ibuf); + if (already < FPM_MSG_HDR_LEN) { + ssize_t nbyte; + + nbyte = stream_read_try(ibuf, zfpm_g->sock, + FPM_MSG_HDR_LEN - already); + if (nbyte == 0 || nbyte == -1) { + if (nbyte == -1) { + char buffer[1024]; + + snprintf(buffer, sizeof(buffer), + "closed socket in read(%d): %s", errno, + safe_strerror(errno)); + zfpm_connection_down(buffer); + } else + zfpm_connection_down("closed socket in read"); + return; + } + + if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already)) + goto done; + + already = FPM_MSG_HDR_LEN; + } + + stream_set_getp(ibuf, 0); + + hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf); + + if (!fpm_msg_hdr_ok(hdr)) { + zfpm_connection_down("invalid message header"); + return; + } + + msg_len = fpm_msg_len(hdr); + + /* + * Read out the rest of the packet. + */ + if (already < msg_len) { + ssize_t nbyte; + + nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already); + + if (nbyte == 0 || nbyte == -1) { + if (nbyte == -1) { + char buffer[1024]; + + snprintf(buffer, sizeof(buffer), + "failed to read message(%d) %s", errno, + safe_strerror(errno)); + zfpm_connection_down(buffer); + } else + zfpm_connection_down("failed to read message"); + return; + } + + if (nbyte != (ssize_t)(msg_len - already)) + goto done; + } + + /* + * Just throw it away for now. + */ + stream_reset(ibuf); + +done: + zfpm_read_on(); +} + +static bool zfpm_updates_pending(void) +{ + if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q))) + return true; + + return false; +} + +/* + * zfpm_writes_pending + * + * Returns true if we may have something to write to the FPM. + */ +static int zfpm_writes_pending(void) +{ + + /* + * Check if there is any data in the outbound buffer that has not + * been written to the socket yet. + */ + if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf)) + return 1; + + /* + * Check if there are any updates scheduled on the outbound queues. + */ + if (zfpm_updates_pending()) + return 1; + + return 0; +} + +/* + * zfpm_encode_route + * + * Encode a message to the FPM with information about the given route. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re, + char *in_buf, size_t in_buf_len, + fpm_msg_type_e *msg_type) +{ + size_t len; +#ifdef HAVE_NETLINK + int cmd; +#endif + len = 0; + + *msg_type = FPM_MSG_TYPE_NONE; + + switch (zfpm_g->message_format) { + + case ZFPM_MSG_FORMAT_PROTOBUF: +#ifdef HAVE_PROTOBUF + len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf, + in_buf_len); + *msg_type = FPM_MSG_TYPE_PROTOBUF; +#endif + break; + + case ZFPM_MSG_FORMAT_NETLINK: +#ifdef HAVE_NETLINK + *msg_type = FPM_MSG_TYPE_NETLINK; + cmd = re ? RTM_NEWROUTE : RTM_DELROUTE; + len = zfpm_netlink_encode_route(cmd, dest, re, in_buf, + in_buf_len); + assert(fpm_msg_align(len) == len); + *msg_type = FPM_MSG_TYPE_NETLINK; +#endif /* HAVE_NETLINK */ + break; + + default: + break; + } + + return len; +} + +/* + * zfpm_route_for_update + * + * Returns the re that is to be sent to the FPM for a given dest. + */ +struct route_entry *zfpm_route_for_update(rib_dest_t *dest) +{ + return dest->selected_fib; +} + +/* + * Define an enum for return codes for queue processing functions + * + * FPM_WRITE_STOP: This return code indicates that the write buffer is full. + * Stop processing all the queues and empty the buffer by writing its content + * to the socket. + * + * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is + * empty or we have processed enough updates from this queue. + * So, move on to the next queue. + */ +enum { + FPM_WRITE_STOP = 0, + FPM_GOTO_NEXT_Q = 1 +}; + +#define FPM_QUEUE_PROCESS_LIMIT 10000 + +/* + * zfpm_build_route_updates + * + * Process the dest_q queue and write FPM messages to the outbound buffer. + */ +static int zfpm_build_route_updates(void) +{ + struct stream *s; + rib_dest_t *dest; + unsigned char *buf, *data, *buf_end; + size_t msg_len; + size_t data_len; + fpm_msg_hdr_t *hdr; + struct route_entry *re; + int is_add, write_msg; + fpm_msg_type_e msg_type; + uint16_t q_limit; + + if (TAILQ_EMPTY(&zfpm_g->dest_q)) + return FPM_GOTO_NEXT_Q; + + s = zfpm_g->obuf; + q_limit = FPM_QUEUE_PROCESS_LIMIT; + + do { + /* + * Make sure there is enough space to write another message. + */ + if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN) + return FPM_WRITE_STOP; + + buf = STREAM_DATA(s) + stream_get_endp(s); + buf_end = buf + STREAM_WRITEABLE(s); + + dest = TAILQ_FIRST(&zfpm_g->dest_q); + if (!dest) + return FPM_GOTO_NEXT_Q; + + assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)); + + hdr = (fpm_msg_hdr_t *)buf; + hdr->version = FPM_PROTO_VERSION; + + data = fpm_msg_data(hdr); + + re = zfpm_route_for_update(dest); + is_add = re ? 1 : 0; + + write_msg = 1; + + /* + * If this is a route deletion, and we have not sent the route + * to + * the FPM previously, skip it. + */ + if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) { + write_msg = 0; + zfpm_g->stats.nop_deletes_skipped++; + } + + if (write_msg) { + data_len = zfpm_encode_route(dest, re, (char *)data, + buf_end - data, &msg_type); + + if (data_len) { + hdr->msg_type = msg_type; + msg_len = fpm_data_len_to_msg_len(data_len); + hdr->msg_len = htons(msg_len); + stream_forward_endp(s, msg_len); + + if (is_add) + zfpm_g->stats.route_adds++; + else + zfpm_g->stats.route_dels++; + } else { + zlog_err("%s: Encoding Prefix: %pRN No valid nexthops", + __func__, dest->rnode); + } + } + + /* + * Remove the dest from the queue, and reset the flag. + */ + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries); + + if (is_add) { + SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM); + } else { + UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM); + } + + /* + * Delete the destination if necessary. + */ + if (rib_gc_dest(dest->rnode)) + zfpm_g->stats.dests_del_after_update++; + + q_limit--; + if (q_limit == 0) { + /* + * We have processed enough updates in this queue. + * Now yield for other queues. + */ + return FPM_GOTO_NEXT_Q; + } + } while (true); +} + +/* + * zfpm_encode_mac + * + * Encode a message to FPM with information about the given MAC. + * + * Returns the number of bytes written to the buffer. + */ +static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, + size_t in_buf_len, fpm_msg_type_e *msg_type) +{ + size_t len = 0; + + *msg_type = FPM_MSG_TYPE_NONE; + + switch (zfpm_g->message_format) { + + case ZFPM_MSG_FORMAT_NONE: + break; + case ZFPM_MSG_FORMAT_NETLINK: +#ifdef HAVE_NETLINK + len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len); + assert(fpm_msg_align(len) == len); + *msg_type = FPM_MSG_TYPE_NETLINK; +#endif /* HAVE_NETLINK */ + break; + case ZFPM_MSG_FORMAT_PROTOBUF: + break; + } + return len; +} + +static int zfpm_build_mac_updates(void) +{ + struct stream *s; + struct fpm_mac_info_t *mac; + unsigned char *buf, *data, *buf_end; + fpm_msg_hdr_t *hdr; + size_t data_len, msg_len; + fpm_msg_type_e msg_type; + uint16_t q_limit; + + if (TAILQ_EMPTY(&zfpm_g->mac_q)) + return FPM_GOTO_NEXT_Q; + + s = zfpm_g->obuf; + q_limit = FPM_QUEUE_PROCESS_LIMIT; + + do { + /* Make sure there is enough space to write another message. */ + if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN) + return FPM_WRITE_STOP; + + buf = STREAM_DATA(s) + stream_get_endp(s); + buf_end = buf + STREAM_WRITEABLE(s); + + mac = TAILQ_FIRST(&zfpm_g->mac_q); + if (!mac) + return FPM_GOTO_NEXT_Q; + + /* Check for no-op */ + if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) { + zfpm_g->stats.nop_deletes_skipped++; + zfpm_mac_info_del(mac); + continue; + } + + hdr = (fpm_msg_hdr_t *)buf; + hdr->version = FPM_PROTO_VERSION; + + data = fpm_msg_data(hdr); + data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data, + &msg_type); + assert(data_len); + + hdr->msg_type = msg_type; + msg_len = fpm_data_len_to_msg_len(data_len); + hdr->msg_len = htons(msg_len); + stream_forward_endp(s, msg_len); + + /* Remove the MAC from the queue, and delete it. */ + zfpm_mac_info_del(mac); + + q_limit--; + if (q_limit == 0) { + /* + * We have processed enough updates in this queue. + * Now yield for other queues. + */ + return FPM_GOTO_NEXT_Q; + } + } while (1); +} + +/* + * zfpm_build_updates + * + * Process the outgoing queues and write messages to the outbound + * buffer. + */ +static void zfpm_build_updates(void) +{ + struct stream *s; + + s = zfpm_g->obuf; + assert(stream_empty(s)); + + do { + /* + * Stop processing the queues if zfpm_g->obuf is full + * or we do not have more updates to process + */ + if (zfpm_build_mac_updates() == FPM_WRITE_STOP) + break; + if (zfpm_build_route_updates() == FPM_WRITE_STOP) + break; + } while (zfpm_updates_pending()); +} + +/* + * zfpm_write_cb + */ +static void zfpm_write_cb(struct thread *thread) +{ + struct stream *s; + int num_writes; + + zfpm_g->stats.write_cb_calls++; + + /* + * Check if async connect is now done. + */ + if (zfpm_g->state == ZFPM_STATE_CONNECTING) { + zfpm_connect_check(); + return; + } + + assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED); + assert(zfpm_g->sock >= 0); + + num_writes = 0; + + do { + int bytes_to_write, bytes_written; + + s = zfpm_g->obuf; + + /* + * If the stream is empty, try fill it up with data. + */ + if (stream_empty(s)) { + zfpm_build_updates(); + } + + bytes_to_write = stream_get_endp(s) - stream_get_getp(s); + if (!bytes_to_write) + break; + + bytes_written = + write(zfpm_g->sock, stream_pnt(s), bytes_to_write); + zfpm_g->stats.write_calls++; + num_writes++; + + if (bytes_written < 0) { + if (ERRNO_IO_RETRY(errno)) + break; + + zfpm_connection_down("failed to write to socket"); + return; + } + + if (bytes_written != bytes_to_write) { + + /* + * Partial write. + */ + stream_forward_getp(s, bytes_written); + zfpm_g->stats.partial_writes++; + break; + } + + /* + * We've written out the entire contents of the stream. + */ + stream_reset(s); + + if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) { + zfpm_g->stats.max_writes_hit++; + break; + } + + if (zfpm_thread_should_yield(thread)) { + zfpm_g->stats.t_write_yields++; + break; + } + } while (1); + + if (zfpm_writes_pending()) + zfpm_write_on(); +} + +/* + * zfpm_connect_cb + */ +static void zfpm_connect_cb(struct thread *t) +{ + int sock, ret; + struct sockaddr_in serv; + + assert(zfpm_g->state == ZFPM_STATE_ACTIVE); + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) { + zlog_err("Failed to create socket for connect(): %s", + strerror(errno)); + zfpm_g->stats.connect_no_sock++; + return; + } + + set_nonblocking(sock); + + /* Make server socket. */ + memset(&serv, 0, sizeof(serv)); + serv.sin_family = AF_INET; + serv.sin_port = htons(zfpm_g->fpm_port); +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + serv.sin_len = sizeof(struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ + if (!zfpm_g->fpm_server) + serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + else + serv.sin_addr.s_addr = (zfpm_g->fpm_server); + + /* + * Connect to the FPM. + */ + zfpm_g->connect_calls++; + zfpm_g->stats.connect_calls++; + zfpm_g->last_connect_call_time = monotime(NULL); + + ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv)); + if (ret >= 0) { + zfpm_g->sock = sock; + zfpm_connection_up("connect succeeded"); + return; + } + + if (errno == EINPROGRESS) { + zfpm_g->sock = sock; + zfpm_read_on(); + zfpm_write_on(); + zfpm_set_state(ZFPM_STATE_CONNECTING, + "async connect in progress"); + return; + } + + zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno)); + close(sock); + + /* + * Restart timer for retrying connection. + */ + zfpm_start_connect_timer("connect() failed"); +} + +/* + * zfpm_set_state + * + * Move state machine into the given state. + */ +static void zfpm_set_state(enum zfpm_state state, const char *reason) +{ + enum zfpm_state cur_state = zfpm_g->state; + + if (!reason) + reason = "Unknown"; + + if (state == cur_state) + return; + + zfpm_debug("beginning state transition %s -> %s. Reason: %s", + zfpm_state_to_str(cur_state), zfpm_state_to_str(state), + reason); + + switch (state) { + + case ZFPM_STATE_IDLE: + assert(cur_state == ZFPM_STATE_ESTABLISHED); + break; + + case ZFPM_STATE_ACTIVE: + assert(cur_state == ZFPM_STATE_IDLE + || cur_state == ZFPM_STATE_CONNECTING); + assert(zfpm_g->t_connect); + break; + + case ZFPM_STATE_CONNECTING: + assert(zfpm_g->sock); + assert(cur_state == ZFPM_STATE_ACTIVE); + assert(zfpm_g->t_read); + assert(zfpm_g->t_write); + break; + + case ZFPM_STATE_ESTABLISHED: + assert(cur_state == ZFPM_STATE_ACTIVE + || cur_state == ZFPM_STATE_CONNECTING); + assert(zfpm_g->sock); + assert(zfpm_g->t_read); + assert(zfpm_g->t_write); + break; + } + + zfpm_g->state = state; +} + +/* + * zfpm_calc_connect_delay + * + * Returns the number of seconds after which we should attempt to + * reconnect to the FPM. + */ +static long zfpm_calc_connect_delay(void) +{ + time_t elapsed; + + /* + * Return 0 if this is our first attempt to connect. + */ + if (zfpm_g->connect_calls == 0) { + return 0; + } + + elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time); + + if (elapsed > ZFPM_CONNECT_RETRY_IVL) { + return 0; + } + + return ZFPM_CONNECT_RETRY_IVL - elapsed; +} + +/* + * zfpm_start_connect_timer + */ +static void zfpm_start_connect_timer(const char *reason) +{ + long delay_secs; + + assert(!zfpm_g->t_connect); + assert(zfpm_g->sock < 0); + + assert(zfpm_g->state == ZFPM_STATE_IDLE + || zfpm_g->state == ZFPM_STATE_ACTIVE + || zfpm_g->state == ZFPM_STATE_CONNECTING); + + delay_secs = zfpm_calc_connect_delay(); + zfpm_debug("scheduling connect in %ld seconds", delay_secs); + + thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs, + &zfpm_g->t_connect); + zfpm_set_state(ZFPM_STATE_ACTIVE, reason); +} + +/* + * zfpm_is_enabled + * + * Returns true if the zebra FPM module has been enabled. + */ +static inline int zfpm_is_enabled(void) +{ + return zfpm_g->enabled; +} + +/* + * zfpm_conn_is_up + * + * Returns true if the connection to the FPM is up. + */ +static inline int zfpm_conn_is_up(void) +{ + if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) + return 0; + + assert(zfpm_g->sock >= 0); + + return 1; +} + +/* + * zfpm_trigger_update + * + * The zebra code invokes this function to indicate that we should + * send an update to the FPM about the given route_node. + */ +static int zfpm_trigger_update(struct route_node *rn, const char *reason) +{ + rib_dest_t *dest; + + /* + * Ignore if the connection is down. We will update the FPM about + * all destinations once the connection comes up. + */ + if (!zfpm_conn_is_up()) + return 0; + + dest = rib_dest_from_rnode(rn); + + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) { + zfpm_g->stats.redundant_triggers++; + return 0; + } + + if (reason) { + zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p, + reason); + } + + SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries); + zfpm_g->stats.updates_triggered++; + + /* + * Make sure that writes are enabled. + */ + if (zfpm_g->t_write) + return 0; + + zfpm_write_on(); + return 0; +} + +/* + * zfpm_trigger_remove + * + * The zebra code invokes this function to indicate that we should + * send an remove to the FPM about the given route_node. + */ + +static int zfpm_trigger_remove(struct route_node *rn) +{ + rib_dest_t *dest; + + if (!zfpm_conn_is_up()) + return 0; + + dest = rib_dest_from_rnode(rn); + if (!CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) + return 0; + + zfpm_debug("%pRN Removing from update queue shutting down", rn); + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries); + + return 0; +} + +/* + * Generate Key for FPM MAC info hash entry + */ +static unsigned int zfpm_mac_info_hash_keymake(const void *p) +{ + struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p; + uint32_t mac_key; + + mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a); + + return jhash_2words(mac_key, fpm_mac->vni, 0); +} + +/* + * Compare function for FPM MAC info hash lookup + */ +static bool zfpm_mac_info_cmp(const void *p1, const void *p2) +{ + const struct fpm_mac_info_t *fpm_mac1 = p1; + const struct fpm_mac_info_t *fpm_mac2 = p2; + + if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN) + != 0) + return false; + if (fpm_mac1->vni != fpm_mac2->vni) + return false; + + return true; +} + +/* + * Lookup FPM MAC info hash entry. + */ +static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key) +{ + return hash_lookup(zfpm_g->fpm_mac_info_table, key); +} + +/* + * Callback to allocate fpm_mac_info_t structure. + */ +static void *zfpm_mac_info_alloc(void *p) +{ + const struct fpm_mac_info_t *key = p; + struct fpm_mac_info_t *fpm_mac; + + fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t)); + + memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN); + fpm_mac->vni = key->vni; + + return (void *)fpm_mac; +} + +/* + * Delink and free fpm_mac_info_t. + */ +static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac) +{ + hash_release(zfpm_g->fpm_mac_info_table, fpm_mac); + TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries); + XFREE(MTYPE_FPM_MAC_INFO, fpm_mac); +} + +/* + * zfpm_trigger_rmac_update + * + * Zebra code invokes this function to indicate that we should + * send an update to FPM for given MAC entry. + * + * This function checks if we already have enqueued an update for this RMAC, + * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update. + */ +static int zfpm_trigger_rmac_update(struct zebra_mac *rmac, + struct zebra_l3vni *zl3vni, bool delete, + const char *reason) +{ + struct fpm_mac_info_t *fpm_mac, key; + struct interface *vxlan_if, *svi_if; + bool mac_found = false; + + /* + * Ignore if the connection is down. We will update the FPM about + * all destinations once the connection comes up. + */ + if (!zfpm_conn_is_up()) + return 0; + + if (reason) { + zfpm_debug("triggering update to FPM - Reason: %s - %pEA", + reason, &rmac->macaddr); + } + + vxlan_if = zl3vni_map_to_vxlan_if(zl3vni); + svi_if = zl3vni_map_to_svi_if(zl3vni); + + memset(&key, 0, sizeof(key)); + + memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN); + key.vni = zl3vni->vni; + + /* Check if this MAC is already present in the queue. */ + fpm_mac = zfpm_mac_info_lookup(&key); + + if (fpm_mac) { + mac_found = true; + + /* + * If the enqueued op is "add" and current op is "delete", + * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag. + * While processing FPM queue, we will silently delete this + * MAC entry without sending any update for this MAC. + */ + if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) && + delete == 1) { + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM); + UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM); + return 0; + } + } else + fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key, + zfpm_mac_info_alloc); + + fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr; + fpm_mac->zebra_flags = rmac->flags; + fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0; + fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0; + + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM); + if (delete) + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM); + else + UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM); + + if (!mac_found) + TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries); + + zfpm_g->stats.updates_triggered++; + + /* If writes are already enabled, return. */ + if (zfpm_g->t_write) + return 0; + + zfpm_write_on(); + return 0; +} + +/* + * This function is called when the FPM connections is established. + * Iterate over all the RMAC entries for the given L3VNI + * and enqueue the RMAC for FPM processing. + */ +static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *bucket, + void *args) +{ + struct zebra_mac *zrmac = (struct zebra_mac *)bucket->data; + struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)args; + + zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added"); +} + +/* + * This function is called when the FPM connections is established. + * This function iterates over all the L3VNIs to trigger + * FPM updates for RMACs currently available. + */ +static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args) +{ + struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)bucket->data; + + hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper, + (void *)zl3vni); +} + +/* + * struct zfpm_statsimer_cb + */ +static void zfpm_stats_timer_cb(struct thread *t) +{ + zfpm_g->t_stats = NULL; + + /* + * Remember the stats collected in the last interval for display + * purposes. + */ + zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats); + + /* + * Add the current set of stats into the cumulative statistics. + */ + zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats, + &zfpm_g->cumulative_stats); + + /* + * Start collecting stats afresh over the next interval. + */ + zfpm_stats_reset(&zfpm_g->stats); + + zfpm_start_stats_timer(); +} + +/* + * zfpm_stop_stats_timer + */ +static void zfpm_stop_stats_timer(void) +{ + if (!zfpm_g->t_stats) + return; + + zfpm_debug("Stopping existing stats timer"); + THREAD_OFF(zfpm_g->t_stats); +} + +/* + * zfpm_start_stats_timer + */ +void zfpm_start_stats_timer(void) +{ + assert(!zfpm_g->t_stats); + + thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0, + ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats); +} + +/* + * Helper macro for zfpm_show_stats() below. + */ +#define ZFPM_SHOW_STAT(counter) \ + do { \ + vty_out(vty, "%-40s %10lu %16lu\n", #counter, \ + total_stats.counter, zfpm_g->last_ivl_stats.counter); \ + } while (0) + +/* + * zfpm_show_stats + */ +static void zfpm_show_stats(struct vty *vty) +{ + struct zfpm_stats total_stats; + time_t elapsed; + + vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total", + ZFPM_STATS_IVL_SECS); + + /* + * Compute the total stats up to this instant. + */ + zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats, + &total_stats); + + ZFPM_SHOW_STAT(connect_calls); + ZFPM_SHOW_STAT(connect_no_sock); + ZFPM_SHOW_STAT(read_cb_calls); + ZFPM_SHOW_STAT(write_cb_calls); + ZFPM_SHOW_STAT(write_calls); + ZFPM_SHOW_STAT(partial_writes); + ZFPM_SHOW_STAT(max_writes_hit); + ZFPM_SHOW_STAT(t_write_yields); + ZFPM_SHOW_STAT(nop_deletes_skipped); + ZFPM_SHOW_STAT(route_adds); + ZFPM_SHOW_STAT(route_dels); + ZFPM_SHOW_STAT(updates_triggered); + ZFPM_SHOW_STAT(redundant_triggers); + ZFPM_SHOW_STAT(dests_del_after_update); + ZFPM_SHOW_STAT(t_conn_down_starts); + ZFPM_SHOW_STAT(t_conn_down_dests_processed); + ZFPM_SHOW_STAT(t_conn_down_yields); + ZFPM_SHOW_STAT(t_conn_down_finishes); + ZFPM_SHOW_STAT(t_conn_up_starts); + ZFPM_SHOW_STAT(t_conn_up_dests_processed); + ZFPM_SHOW_STAT(t_conn_up_yields); + ZFPM_SHOW_STAT(t_conn_up_aborts); + ZFPM_SHOW_STAT(t_conn_up_finishes); + + if (!zfpm_g->last_stats_clear_time) + return; + + elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time); + + vty_out(vty, "\nStats were cleared %lu seconds ago\n", + (unsigned long)elapsed); +} + +/* + * zfpm_clear_stats + */ +static void zfpm_clear_stats(struct vty *vty) +{ + if (!zfpm_is_enabled()) { + vty_out(vty, "The FPM module is not enabled...\n"); + return; + } + + zfpm_stats_reset(&zfpm_g->stats); + zfpm_stats_reset(&zfpm_g->last_ivl_stats); + zfpm_stats_reset(&zfpm_g->cumulative_stats); + + zfpm_stop_stats_timer(); + zfpm_start_stats_timer(); + + zfpm_g->last_stats_clear_time = monotime(NULL); + + vty_out(vty, "Cleared FPM stats\n"); +} + +/* + * show_zebra_fpm_stats + */ +DEFUN (show_zebra_fpm_stats, + show_zebra_fpm_stats_cmd, + "show zebra fpm stats", + SHOW_STR + ZEBRA_STR + "Forwarding Path Manager information\n" + "Statistics\n") +{ + zfpm_show_stats(vty); + return CMD_SUCCESS; +} + +/* + * clear_zebra_fpm_stats + */ +DEFUN (clear_zebra_fpm_stats, + clear_zebra_fpm_stats_cmd, + "clear zebra fpm stats", + CLEAR_STR + ZEBRA_STR + "Clear Forwarding Path Manager information\n" + "Statistics\n") +{ + zfpm_clear_stats(vty); + return CMD_SUCCESS; +} + +/* + * update fpm connection information + */ +DEFUN ( fpm_remote_ip, + fpm_remote_ip_cmd, + "fpm connection ip A.B.C.D port (1-65535)", + "fpm connection remote ip and port\n" + "Remote fpm server ip A.B.C.D\n" + "Enter ip ") +{ + + in_addr_t fpm_server; + uint32_t port_no; + + fpm_server = inet_addr(argv[3]->arg); + if (fpm_server == INADDR_NONE) + return CMD_ERR_INCOMPLETE; + + port_no = atoi(argv[5]->arg); + if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT) + return CMD_ERR_INCOMPLETE; + + zfpm_g->fpm_server = fpm_server; + zfpm_g->fpm_port = port_no; + + + return CMD_SUCCESS; +} + +DEFUN ( no_fpm_remote_ip, + no_fpm_remote_ip_cmd, + "no fpm connection ip A.B.C.D port (1-65535)", + "fpm connection remote ip and port\n" + "Connection\n" + "Remote fpm server ip A.B.C.D\n" + "Enter ip ") +{ + if (zfpm_g->fpm_server != inet_addr(argv[4]->arg) + || zfpm_g->fpm_port != atoi(argv[6]->arg)) + return CMD_ERR_NO_MATCH; + + zfpm_g->fpm_server = FPM_DEFAULT_IP; + zfpm_g->fpm_port = FPM_DEFAULT_PORT; + + return CMD_SUCCESS; +} + +/* + * zfpm_init_message_format + */ +static inline void zfpm_init_message_format(const char *format) +{ + int have_netlink, have_protobuf; + +#ifdef HAVE_NETLINK + have_netlink = 1; +#else + have_netlink = 0; +#endif + +#ifdef HAVE_PROTOBUF + have_protobuf = 1; +#else + have_protobuf = 0; +#endif + + zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE; + + if (!format) { + if (have_netlink) { + zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK; + } else if (have_protobuf) { + zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF; + } + return; + } + + if (!strcmp("netlink", format)) { + if (!have_netlink) { + flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE, + "FPM netlink message format is not available"); + return; + } + zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK; + return; + } + + if (!strcmp("protobuf", format)) { + if (!have_protobuf) { + flog_err( + EC_ZEBRA_PROTOBUF_NOT_AVAILABLE, + "FPM protobuf message format is not available"); + return; + } + flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE, + "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case."); + zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF; + return; + } + + flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'", + format); +} + +/** + * fpm_remote_srv_write + * + * Module to write remote fpm connection + * + * Returns ZERO on success. + */ + +static int fpm_remote_srv_write(struct vty *vty) +{ + struct in_addr in; + + in.s_addr = zfpm_g->fpm_server; + + if ((zfpm_g->fpm_server != FPM_DEFAULT_IP + && zfpm_g->fpm_server != INADDR_ANY) + || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0)) + vty_out(vty, "fpm connection ip %pI4 port %d\n", &in, + zfpm_g->fpm_port); + + return 0; +} + + +static int fpm_remote_srv_write(struct vty *vty); +/* Zebra node */ +static struct cmd_node zebra_node = { + .name = "zebra", + .node = ZEBRA_NODE, + .parent_node = CONFIG_NODE, + .prompt = "", + .config_write = fpm_remote_srv_write, +}; + + +/** + * zfpm_init + * + * One-time initialization of the Zebra FPM module. + * + * @param[in] port port at which FPM is running. + * @param[in] enable true if the zebra FPM module should be enabled + * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'. + * + * Returns true on success. + */ +static int zfpm_init(struct thread_master *master) +{ + int enable = 1; + uint16_t port = 0; + const char *format = THIS_MODULE->load_args; + + memset(zfpm_g, 0, sizeof(*zfpm_g)); + zfpm_g->master = master; + TAILQ_INIT(&zfpm_g->dest_q); + TAILQ_INIT(&zfpm_g->mac_q); + + /* Create hash table for fpm_mac_info_t enties */ + zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake, + zfpm_mac_info_cmp, + "FPM MAC info hash table"); + + zfpm_g->sock = -1; + zfpm_g->state = ZFPM_STATE_IDLE; + + zfpm_stats_init(&zfpm_g->stats); + zfpm_stats_init(&zfpm_g->last_ivl_stats); + zfpm_stats_init(&zfpm_g->cumulative_stats); + + memset(&ipv4ll_gateway, 0, sizeof(ipv4ll_gateway)); + if (inet_pton(AF_INET, ipv4_ll_buf, &ipv4ll_gateway.ipv4) != 1) + zlog_warn("inet_pton failed for %s", ipv4_ll_buf); + + install_node(&zebra_node); + install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd); + install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd); + install_element(CONFIG_NODE, &fpm_remote_ip_cmd); + install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd); + + zfpm_init_message_format(format); + + /* + * Disable FPM interface if no suitable format is available. + */ + if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE) + enable = 0; + + zfpm_g->enabled = enable; + + if (!zfpm_g->fpm_server) + zfpm_g->fpm_server = FPM_DEFAULT_IP; + + if (!port) + port = FPM_DEFAULT_PORT; + + zfpm_g->fpm_port = port; + + zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE); + zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE); + + zfpm_start_stats_timer(); + zfpm_start_connect_timer("initialized"); + return 0; +} + +static int zfpm_fini(void) +{ + zfpm_write_off(); + zfpm_read_off(); + zfpm_connect_off(); + + zfpm_stop_stats_timer(); + + hook_unregister(rib_update, zfpm_trigger_update); + return 0; +} + +static int zebra_fpm_module_init(void) +{ + hook_register(rib_update, zfpm_trigger_update); + hook_register(rib_shutdown, zfpm_trigger_remove); + hook_register(zebra_rmac_update, zfpm_trigger_rmac_update); + hook_register(frr_late_init, zfpm_init); + hook_register(frr_early_fini, zfpm_fini); + return 0; +} + +FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION, + .description = "zebra FPM (Forwarding Plane Manager) module", + .init = zebra_fpm_module_init, +); diff --git a/zebra/zebra_fpm_dt.c b/zebra/zebra_fpm_dt.c new file mode 100644 index 0000000..e392722 --- /dev/null +++ b/zebra/zebra_fpm_dt.c @@ -0,0 +1,274 @@ +/* + * zebra_fpm_dt.c + * + * @copyright Copyright (C) 2016 Sproute Networks, Inc. + * + * @author Avneesh Sachdev <avneesh@sproute.com> + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Developer tests for the zebra code that interfaces with the + * forwarding plane manager. + * + * The functions here are built into developer builds of zebra (when + * DEV_BUILD is defined), and can be called via the 'invoke' cli + * command. + * + * For example: + * + * # invoke zebra function zfpm_dt_benchmark_protobuf_encode 100000 + * + */ + +#include <zebra.h> +#include "log.h" +#include "vrf.h" + +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" + +#include "zebra_fpm_private.h" + +#include "qpb/qpb_allocator.h" +#include "qpb/linear_allocator.h" + +#ifdef HAVE_PROTOBUF +#include "qpb/qpb.h" +#include "fpm/fpm.pb-c.h" +#endif + +/* + * Externs. + */ +extern int zfpm_dt_benchmark_netlink_encode(int argc, const char **argv); +extern int zfpm_dt_benchmark_protobuf_encode(int argc, const char **argv); +extern int zfpm_dt_benchmark_protobuf_decode(int argc, const char **argv); + +/* + * zfpm_dt_find_route + * + * Selects a suitable rib destination for fpm interface tests. + */ +static int zfpm_dt_find_route(rib_dest_t **dest_p, struct route_entry **re_p) +{ + struct route_node *rnode; + route_table_iter_t iter; + struct route_table *table; + rib_dest_t *dest; + struct route_entry *re; + int ret; + + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, VRF_DEFAULT); + if (!table) + return 0; + + route_table_iter_init(&iter, table); + while ((rnode = route_table_iter_next(&iter))) { + dest = rib_dest_from_rnode(rnode); + + if (!dest) + continue; + + re = zfpm_route_for_update(dest); + if (!re) + continue; + + if (nexthop_group_active_nexthop_num(&(re->nhe->nhg)) == 0) + continue; + + *dest_p = dest; + *re_p = re; + ret = 1; + goto done; + } + + ret = 0; + +done: + route_table_iter_cleanup(&iter); + return ret; +} +#ifdef HAVE_NETLINK + +/* + * zfpm_dt_benchmark_netlink_encode + */ +int zfpm_dt_benchmark_netlink_encode(int argc, const char **argv) +{ + int times, i, len; + rib_dest_t *dest; + struct route_entry *re; + char buf[4096]; + + times = 100000; + if (argc > 0) { + times = atoi(argv[0]); + } + + if (!zfpm_dt_find_route(&dest, &re)) { + return 1; + } + + for (i = 0; i < times; i++) { + len = zfpm_netlink_encode_route(RTM_NEWROUTE, dest, re, buf, + sizeof(buf)); + if (len <= 0) { + return 2; + } + } + return 0; +} + +#endif /* HAVE_NETLINK */ + +#ifdef HAVE_PROTOBUF + +/* + * zfpm_dt_benchmark_protobuf_encode + */ +int zfpm_dt_benchmark_protobuf_encode(int argc, const char **argv) +{ + int times, i, len; + rib_dest_t *dest; + struct route_entry *re; + uint8_t buf[4096]; + + times = 100000; + if (argc > 0) { + times = atoi(argv[0]); + } + + if (!zfpm_dt_find_route(&dest, &re)) { + return 1; + } + + for (i = 0; i < times; i++) { + len = zfpm_protobuf_encode_route(dest, re, buf, sizeof(buf)); + if (len <= 0) { + return 2; + } + } + return 0; +} + +/* + * zfpm_dt_log_fpm_message + */ +static void zfpm_dt_log_fpm_message(Fpm__Message *msg) +{ + Fpm__AddRoute *add_route; + Fpm__Nexthop *nexthop; + struct prefix prefix; + uint8_t family, nh_family; + uint if_index; + char *if_name; + size_t i; + char buf[INET6_ADDRSTRLEN]; + char addr_buf[PREFIX_STRLEN]; + union g_addr nh_addr; + + if (msg->type != FPM__MESSAGE__TYPE__ADD_ROUTE) + return; + + zfpm_debug("Add route message"); + add_route = msg->add_route; + + if (!qpb_address_family_get(add_route->address_family, &family)) + return; + + if (!qpb_l3_prefix_get(add_route->key->prefix, family, &prefix)) + return; + + zfpm_debug("Vrf id: %d, Prefix: %s/%d, Metric: %d", add_route->vrf_id, + inet_ntop(family, &prefix.u.prefix, buf, sizeof(buf)), + prefix.prefixlen, add_route->metric); + + /* + * Go over nexthops. + */ + for (i = 0; i < add_route->n_nexthops; i++) { + nexthop = add_route->nexthops[i]; + if (!qpb_if_identifier_get(nexthop->if_id, &if_index, &if_name)) + continue; + + if (nexthop->address) + qpb_l3_address_get(nexthop->address, &nh_family, + &nh_addr); + + zfpm_debug("Nexthop - if_index: %d (%s), gateway: %s, ", + if_index, if_name ? if_name : "name not specified", + nexthop->address ? + inet_ntop(AF_INET, &nh_addr.ipv4, + addr_buf, sizeof(addr_buf)) : "None"); + } +} + +/* + * zfpm_dt_benchmark_protobuf_decode + */ +int zfpm_dt_benchmark_protobuf_decode(int argc, const char **argv) +{ + int times, i, len; + rib_dest_t *dest; + struct route_entry *re; + uint8_t msg_buf[4096]; + QPB_DECLARE_STACK_ALLOCATOR(allocator, 8192); + Fpm__Message *fpm_msg; + + QPB_INIT_STACK_ALLOCATOR(allocator); + + times = 100000; + if (argc > 0) + times = atoi(argv[0]); + + if (!zfpm_dt_find_route(&dest, &re)) + return 1; + + /* + * Encode the route into the message buffer once only. + */ + len = zfpm_protobuf_encode_route(dest, re, msg_buf, sizeof(msg_buf)); + if (len <= 0) + return 2; + + // Decode once, and display the decoded message + fpm_msg = fpm__message__unpack(&allocator, len, msg_buf); + + if (fpm_msg) { + zfpm_dt_log_fpm_message(fpm_msg); + QPB_RESET_STACK_ALLOCATOR(allocator); + } + + /* + * Decode encoded message the specified number of times. + */ + for (i = 0; i < times; i++) { + fpm_msg = fpm__message__unpack(&allocator, len, msg_buf); + + if (!fpm_msg) + return 3; + + // fpm__message__free_unpacked(msg, NULL); + QPB_RESET_STACK_ALLOCATOR(allocator); + } + return 0; +} + +#endif /* HAVE_PROTOBUF */ diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c new file mode 100644 index 0000000..ca89725 --- /dev/null +++ b/zebra/zebra_fpm_netlink.c @@ -0,0 +1,640 @@ +/* + * Code for encoding/decoding FPM messages that are in netlink format. + * + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +#include "log.h" +#include "rib.h" +#include "vty.h" +#include "prefix.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "nexthop.h" + +#include "zebra/zebra_fpm_private.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/interface.h" + +/* + * af_addr_size + * + * The size of an address in a given address family. + */ +static size_t af_addr_size(uint8_t af) +{ + switch (af) { + + case AF_INET: + return 4; + case AF_INET6: + return 16; + default: + assert(0); + return 16; + } +} + +/* + * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well. + * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types + * So, we cannot use these values for VxLAN encap. + */ +enum fpm_nh_encap_type_t { + FPM_NH_ENCAP_NONE = 0, + FPM_NH_ENCAP_VXLAN = 100, + FPM_NH_ENCAP_MAX, +}; + +/* + * fpm_nh_encap_type_to_str + */ +static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type) +{ + switch (encap_type) { + case FPM_NH_ENCAP_NONE: + return "none"; + + case FPM_NH_ENCAP_VXLAN: + return "VxLAN"; + + case FPM_NH_ENCAP_MAX: + return "invalid"; + } + + return "invalid"; +} + +struct vxlan_encap_info_t { + vni_t vni; +}; + +enum vxlan_encap_info_type_t { + VXLAN_VNI = 0, +}; + +struct fpm_nh_encap_info_t { + enum fpm_nh_encap_type_t encap_type; + union { + struct vxlan_encap_info_t vxlan_encap; + }; +}; + +/* + * netlink_nh_info + * + * Holds information about a single nexthop for netlink. These info + * structures are transient and may contain pointers into rib + * data structures for convenience. + */ +struct netlink_nh_info { + /* Weight of the nexthop ( for unequal cost ECMP ) */ + uint8_t weight; + uint32_t if_index; + union g_addr *gateway; + + /* + * Information from the struct nexthop from which this nh was + * derived. For debug purposes only. + */ + int recursive; + enum nexthop_types_t type; + struct fpm_nh_encap_info_t encap_info; +}; + +/* + * netlink_route_info + * + * A structure for holding information for a netlink route message. + */ +struct netlink_route_info { + uint32_t nlmsg_pid; + uint16_t nlmsg_type; + uint8_t rtm_type; + uint32_t rtm_table; + uint8_t rtm_protocol; + uint8_t af; + struct prefix *prefix; + uint32_t *metric; + unsigned int num_nhs; + + /* + * Nexthop structures + */ + struct netlink_nh_info nhs[MULTIPATH_NUM]; + union g_addr *pref_src; +}; + +/* + * netlink_route_info_add_nh + * + * Add information about the given nexthop to the given route info + * structure. + * + * Returns true if a nexthop was added, false otherwise. + */ +static int netlink_route_info_add_nh(struct netlink_route_info *ri, + struct nexthop *nexthop, + struct route_entry *re) +{ + struct netlink_nh_info nhi; + union g_addr *src; + struct zebra_vrf *zvrf = NULL; + struct interface *ifp = NULL, *link_if = NULL; + struct zebra_if *zif = NULL; + vni_t vni = 0; + + memset(&nhi, 0, sizeof(nhi)); + src = NULL; + + if (ri->num_nhs >= (int)array_size(ri->nhs)) + return 0; + + nhi.recursive = nexthop->rparent ? 1 : 0; + nhi.type = nexthop->type; + nhi.if_index = nexthop->ifindex; + nhi.weight = nexthop->weight; + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + nhi.gateway = &nexthop->gate; + if (nexthop->src.ipv4.s_addr != INADDR_ANY) + src = &nexthop->src; + } + + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + /* Special handling for IPv4 route with IPv6 Link Local next hop + */ + if (ri->af == AF_INET) + nhi.gateway = &ipv4ll_gateway; + else + nhi.gateway = &nexthop->gate; + } + + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) { + if (nexthop->src.ipv4.s_addr != INADDR_ANY) + src = &nexthop->src; + } + + if (!nhi.gateway && nhi.if_index == 0) + return 0; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN)) { + nhi.encap_info.encap_type = FPM_NH_ENCAP_VXLAN; + + /* Extract VNI id for the nexthop SVI interface */ + zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id); + if (zvrf) { + ifp = if_lookup_by_index_per_ns(zvrf->zns, + nexthop->ifindex); + if (ifp) { + zif = (struct zebra_if *)ifp->info; + if (zif) { + if (IS_ZEBRA_IF_BRIDGE(ifp)) + link_if = ifp; + else if (IS_ZEBRA_IF_VLAN(ifp)) + link_if = + if_lookup_by_index_per_ns( + zvrf->zns, + zif->link_ifindex); + if (link_if) + vni = vni_id_from_svi(ifp, + link_if); + } + } + } + + nhi.encap_info.vxlan_encap.vni = vni; + } + + /* + * We have a valid nhi. Copy the structure over to the route_info. + */ + ri->nhs[ri->num_nhs] = nhi; + ri->num_nhs++; + + if (src && !ri->pref_src) + ri->pref_src = src; + + return 1; +} + +/* + * netlink_proto_from_route_type + */ +static uint8_t netlink_proto_from_route_type(int type) +{ + switch (type) { + case ZEBRA_ROUTE_KERNEL: + case ZEBRA_ROUTE_CONNECT: + return RTPROT_KERNEL; + + default: + return RTPROT_ZEBRA; + } +} + +/* + * netlink_route_info_fill + * + * Fill out the route information object from the given route. + * + * Returns true on success and false on failure. + */ +static int netlink_route_info_fill(struct netlink_route_info *ri, int cmd, + rib_dest_t *dest, struct route_entry *re) +{ + struct nexthop *nexthop; + struct rib_table_info *table_info = + rib_table_info(rib_dest_table(dest)); + struct zebra_vrf *zvrf = table_info->zvrf; + + memset(ri, 0, sizeof(*ri)); + + ri->prefix = rib_dest_prefix(dest); + ri->af = rib_dest_af(dest); + + if (zvrf && zvrf->zns) + ri->nlmsg_pid = zvrf->zns->netlink_dplane_out.snl.nl_pid; + + ri->nlmsg_type = cmd; + ri->rtm_table = table_info->table_id; + ri->rtm_protocol = RTPROT_UNSPEC; + + /* + * An RTM_DELROUTE need not be accompanied by any nexthops, + * particularly in our communication with the FPM. + */ + if (cmd == RTM_DELROUTE && !re) + return 1; + + if (!re) { + zfpm_debug("%s: Expected non-NULL re pointer", __func__); + return 0; + } + + ri->rtm_protocol = netlink_proto_from_route_type(re->type); + ri->rtm_type = RTN_UNICAST; + ri->metric = &re->metric; + + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) { + if (ri->num_nhs >= zrouter.multipath_num) + break; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { + switch (nexthop->bh_type) { + case BLACKHOLE_ADMINPROHIB: + ri->rtm_type = RTN_PROHIBIT; + break; + case BLACKHOLE_REJECT: + ri->rtm_type = RTN_UNREACHABLE; + break; + case BLACKHOLE_NULL: + default: + ri->rtm_type = RTN_BLACKHOLE; + break; + } + } + + if ((cmd == RTM_NEWROUTE + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + || (cmd == RTM_DELROUTE + && CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED))) { + netlink_route_info_add_nh(ri, nexthop, re); + } + } + + if (ri->num_nhs == 0) { + switch (ri->rtm_type) { + case RTN_PROHIBIT: + case RTN_UNREACHABLE: + case RTN_BLACKHOLE: + break; + default: + /* If there is no useful nexthop then return. */ + zfpm_debug( + "netlink_encode_route(): No useful nexthop."); + return 0; + } + } + + return 1; +} + +/* + * netlink_route_info_encode + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +static int netlink_route_info_encode(struct netlink_route_info *ri, + char *in_buf, size_t in_buf_len) +{ + size_t bytelen; + unsigned int nexthop_num = 0; + size_t buf_offset; + struct netlink_nh_info *nhi; + enum fpm_nh_encap_type_t encap; + struct rtattr *nest, *inner_nest; + struct rtnexthop *rtnh; + struct vxlan_encap_info_t *vxlan; + struct in6_addr ipv6; + + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[1]; + } * req; + + req = (void *)in_buf; + + buf_offset = ((char *)req->buf) - ((char *)req); + + if (in_buf_len < buf_offset) { + assert(0); + return 0; + } + + memset(req, 0, buf_offset); + + bytelen = af_addr_size(ri->af); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req->n.nlmsg_pid = ri->nlmsg_pid; + req->n.nlmsg_type = ri->nlmsg_type; + req->r.rtm_family = ri->af; + + /* + * rtm_table field is a uchar field which can accommodate table_id less + * than 256. + * To support table id greater than 255, if the table_id is greater than + * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute + * with 32 bit value as the table_id. + */ + if (ri->rtm_table < 256) + req->r.rtm_table = ri->rtm_table; + else { + req->r.rtm_table = RT_TABLE_UNSPEC; + nl_attr_put32(&req->n, in_buf_len, RTA_TABLE, ri->rtm_table); + } + + req->r.rtm_dst_len = ri->prefix->prefixlen; + req->r.rtm_protocol = ri->rtm_protocol; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; + + nl_attr_put(&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix, + bytelen); + + req->r.rtm_type = ri->rtm_type; + + /* Metric. */ + if (ri->metric) + nl_attr_put32(&req->n, in_buf_len, RTA_PRIORITY, *ri->metric); + + if (ri->num_nhs == 0) + goto done; + + if (ri->num_nhs == 1) { + nhi = &ri->nhs[0]; + + if (nhi->gateway) { + if (nhi->type == NEXTHOP_TYPE_IPV4_IFINDEX + && ri->af == AF_INET6) { + ipv4_to_ipv4_mapped_ipv6(&ipv6, + nhi->gateway->ipv4); + nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY, + &ipv6, bytelen); + } else + nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY, + nhi->gateway, bytelen); + } + + if (nhi->if_index) { + nl_attr_put32(&req->n, in_buf_len, RTA_OIF, + nhi->if_index); + } + + encap = nhi->encap_info.encap_type; + switch (encap) { + case FPM_NH_ENCAP_NONE: + case FPM_NH_ENCAP_MAX: + break; + case FPM_NH_ENCAP_VXLAN: + nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE, + encap); + vxlan = &nhi->encap_info.vxlan_encap; + nest = nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP); + nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI, + vxlan->vni); + nl_attr_nest_end(&req->n, nest); + break; + } + + goto done; + } + + /* + * Multipath case. + */ + nest = nl_attr_nest(&req->n, in_buf_len, RTA_MULTIPATH); + + for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++) { + rtnh = nl_attr_rtnh(&req->n, in_buf_len); + nhi = &ri->nhs[nexthop_num]; + + if (nhi->gateway) + nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY, + nhi->gateway, bytelen); + + if (nhi->if_index) { + rtnh->rtnh_ifindex = nhi->if_index; + } + + rtnh->rtnh_hops = nhi->weight; + + encap = nhi->encap_info.encap_type; + switch (encap) { + case FPM_NH_ENCAP_NONE: + case FPM_NH_ENCAP_MAX: + break; + case FPM_NH_ENCAP_VXLAN: + nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE, + encap); + vxlan = &nhi->encap_info.vxlan_encap; + inner_nest = + nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP); + nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI, + vxlan->vni); + nl_attr_nest_end(&req->n, inner_nest); + break; + } + + nl_attr_rtnh_end(&req->n, rtnh); + } + + nl_attr_nest_end(&req->n, nest); + assert(nest->rta_len > RTA_LENGTH(0)); + +done: + + if (ri->pref_src) { + nl_attr_put(&req->n, in_buf_len, RTA_PREFSRC, &ri->pref_src, + bytelen); + } + + assert(req->n.nlmsg_len < in_buf_len); + return req->n.nlmsg_len; +} + +/* + * zfpm_log_route_info + * + * Helper function to log the information in a route_info structure. + */ +static void zfpm_log_route_info(struct netlink_route_info *ri, + const char *label) +{ + struct netlink_nh_info *nhi; + unsigned int i; + char buf[PREFIX_STRLEN]; + + zfpm_debug("%s : %s %pFX, Proto: %s, Metric: %u", label, + nl_msg_type_to_str(ri->nlmsg_type), ri->prefix, + nl_rtproto_to_str(ri->rtm_protocol), + ri->metric ? *ri->metric : 0); + + for (i = 0; i < ri->num_nhs; i++) { + nhi = &ri->nhs[i]; + + if (ri->af == AF_INET) + inet_ntop(AF_INET, &nhi->gateway, buf, sizeof(buf)); + else + inet_ntop(AF_INET6, &nhi->gateway, buf, sizeof(buf)); + + zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s", + nhi->if_index, buf, nhi->recursive ? "yes" : "no", + nexthop_type_to_str(nhi->type), + fpm_nh_encap_type_to_str(nhi->encap_info.encap_type) + ); + } +} + +/* + * zfpm_netlink_encode_route + * + * Create a netlink message corresponding to the given route in the + * given buffer space. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, struct route_entry *re, + char *in_buf, size_t in_buf_len) +{ + struct netlink_route_info ri_space, *ri; + + ri = &ri_space; + + if (!netlink_route_info_fill(ri, cmd, dest, re)) + return 0; + + zfpm_log_route_info(ri, __func__); + + return netlink_route_info_encode(ri, in_buf, in_buf_len); +} + +/* + * zfpm_netlink_encode_mac + * + * Create a netlink message corresponding to the given MAC. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, + size_t in_buf_len) +{ + size_t buf_offset; + + struct macmsg { + struct nlmsghdr hdr; + struct ndmsg ndm; + char buf[0]; + } *req; + req = (void *)in_buf; + + buf_offset = offsetof(struct macmsg, buf); + if (in_buf_len < buf_offset) + return 0; + memset(req, 0, buf_offset); + + /* Construct nlmsg header */ + req->hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req->hdr.nlmsg_type = CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) ? + RTM_DELNEIGH : RTM_NEWNEIGH; + req->hdr.nlmsg_flags = NLM_F_REQUEST; + if (req->hdr.nlmsg_type == RTM_NEWNEIGH) + req->hdr.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + + /* Construct ndmsg */ + req->ndm.ndm_family = AF_BRIDGE; + req->ndm.ndm_ifindex = mac->vxlan_if; + + req->ndm.ndm_state = NUD_REACHABLE; + req->ndm.ndm_flags |= NTF_SELF | NTF_MASTER; + if (CHECK_FLAG(mac->zebra_flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW))) + req->ndm.ndm_state |= NUD_NOARP; + else + req->ndm.ndm_flags |= NTF_EXT_LEARNED; + + /* Add attributes */ + nl_attr_put(&req->hdr, in_buf_len, NDA_LLADDR, &mac->macaddr, 6); + nl_attr_put(&req->hdr, in_buf_len, NDA_DST, &mac->r_vtep_ip, 4); + nl_attr_put32(&req->hdr, in_buf_len, NDA_MASTER, mac->svi_if); + nl_attr_put32(&req->hdr, in_buf_len, NDA_VNI, mac->vni); + + assert(req->hdr.nlmsg_len < in_buf_len); + + zfpm_debug("Tx %s family %s ifindex %u MAC %pEA DEST %pI4", + nl_msg_type_to_str(req->hdr.nlmsg_type), + nl_family_to_str(req->ndm.ndm_family), req->ndm.ndm_ifindex, + &mac->macaddr, &mac->r_vtep_ip); + + return req->hdr.nlmsg_len; +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/zebra_fpm_private.h b/zebra/zebra_fpm_private.h new file mode 100644 index 0000000..13415c7 --- /dev/null +++ b/zebra/zebra_fpm_private.h @@ -0,0 +1,106 @@ +/* + * Private header file for the zebra FPM module. + * + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_FPM_PRIVATE_H +#define _ZEBRA_FPM_PRIVATE_H + +#include "zebra/debug.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L + +#define zfpm_debug(...) \ + do { \ + if (IS_ZEBRA_DEBUG_FPM) \ + zlog_debug("FPM: " __VA_ARGS__); \ + } while (0) + +#elif defined __GNUC__ + +#define zfpm_debug(_args...) \ + do { \ + if (IS_ZEBRA_DEBUG_FPM) \ + zlog_debug("FPM: " _args); \ + } while (0) + +#else +static inline void zfpm_debug(const char *format, ...) +{ + return; +} +#endif + +/* This structure contains the MAC addresses enqueued for FPM processing. */ +struct fpm_mac_info_t { + struct ethaddr macaddr; + uint32_t zebra_flags; /* Could be used to build FPM messages */ + vni_t vni; + ifindex_t vxlan_if; + ifindex_t svi_if; /* L2 or L3 Bridge interface */ + struct in_addr r_vtep_ip; /* Remote VTEP IP */ + + /* Linkage to put MAC on the FPM processing queue. */ + TAILQ_ENTRY(fpm_mac_info_t) fpm_mac_q_entries; + + uint8_t fpm_flags; + +#define ZEBRA_MAC_UPDATE_FPM 0x1 /* This flag indicates if we want to upadte + * data plane for this MAC. If a MAC is added + * and then deleted immediately, we do not want + * to update data plane for such operation. + * Unset the ZEBRA_MAC_UPDATE_FPM flag in this + * case. FPM thread while processing the queue + * node will check this flag and dequeue the + * node silently without sending any update to + * the data plane. + */ +#define ZEBRA_MAC_DELETE_FPM 0x2 /* This flag is set if it is a delete operation + * for the MAC. + */ +}; + +/* + * Externs + */ +extern int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, + struct route_entry *re, char *in_buf, + size_t in_buf_len); + +extern int zfpm_protobuf_encode_route(rib_dest_t *dest, struct route_entry *re, + uint8_t *in_buf, size_t in_buf_len); + +extern int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, + size_t in_buf_len); + +extern struct route_entry *zfpm_route_for_update(rib_dest_t *dest); + +extern union g_addr ipv4ll_gateway; + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_FPM_PRIVATE_H */ diff --git a/zebra/zebra_fpm_protobuf.c b/zebra/zebra_fpm_protobuf.c new file mode 100644 index 0000000..4b31cc0 --- /dev/null +++ b/zebra/zebra_fpm_protobuf.c @@ -0,0 +1,302 @@ +/* + * zebra_fpm_protobuf.c + * + * @copyright Copyright (C) 2016 Sproute Networks, Inc. + * + * @author Avneesh Sachdev <avneesh@sproute.com> + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <zebra.h> + +#include "log.h" +#include "rib.h" +#include "zserv.h" +#include "zebra_vrf.h" + +#include "qpb/qpb.pb-c.h" +#include "qpb/qpb.h" +#include "qpb/qpb_allocator.h" +#include "qpb/linear_allocator.h" +#include "fpm/fpm_pb.h" + +#include "zebra_router.h" +#include "zebra_fpm_private.h" + +/* + * create_delete_route_message + */ +static Fpm__DeleteRoute *create_delete_route_message(qpb_allocator_t *allocator, + rib_dest_t *dest, + struct route_entry *re) +{ + Fpm__DeleteRoute *msg; + + msg = QPB_ALLOC(allocator, typeof(*msg)); + if (!msg) { + assert(0); + return NULL; + } + + fpm__delete_route__init(msg); + msg->vrf_id = zvrf_id(rib_dest_vrf(dest)); + + qpb_address_family_set(&msg->address_family, rib_dest_af(dest)); + + /* + * XXX Hardcode subaddress family for now. + */ + msg->sub_address_family = QPB__SUB_ADDRESS_FAMILY__UNICAST; + msg->key = fpm_route_key_create(allocator, rib_dest_prefix(dest)); + if (!msg->key) { + assert(0); + return NULL; + } + + return msg; +} + +/* + * add_nexthop + */ +static inline int add_nexthop(qpb_allocator_t *allocator, Fpm__AddRoute *msg, + rib_dest_t *dest, struct nexthop *nexthop) +{ + uint32_t if_index; + union g_addr *gateway, *src; + + gateway = src = NULL; + + if_index = nexthop->ifindex; + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + gateway = &nexthop->gate; + if (nexthop->src.ipv4.s_addr != INADDR_ANY) + src = &nexthop->src; + } + + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { + gateway = &nexthop->gate; + } + + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) { + if (nexthop->src.ipv4.s_addr != INADDR_ANY) + src = &nexthop->src; + } + + if (!gateway && if_index == 0) + return 0; + + /* + * We have a valid nexthop. + */ + { + Fpm__Nexthop *pb_nh; + pb_nh = QPB_ALLOC(allocator, typeof(*pb_nh)); + if (!pb_nh) { + assert(0); + return 0; + } + + fpm__nexthop__init(pb_nh); + + if (if_index != 0) { + pb_nh->if_id = + qpb_if_identifier_create(allocator, if_index); + } + + if (gateway) { + pb_nh->address = qpb_l3_address_create( + allocator, gateway, rib_dest_af(dest)); + } + + msg->nexthops[msg->n_nexthops++] = pb_nh; + } + + // TODO: Use src. + (void)src; + + return 1; +} + +/* + * create_add_route_message + */ +static Fpm__AddRoute *create_add_route_message(qpb_allocator_t *allocator, + rib_dest_t *dest, + struct route_entry *re) +{ + Fpm__AddRoute *msg; + struct nexthop *nexthop; + uint num_nhs, u; + struct nexthop *nexthops[MULTIPATH_NUM]; + + msg = QPB_ALLOC(allocator, typeof(*msg)); + if (!msg) { + assert(0); + return NULL; + } + + fpm__add_route__init(msg); + + msg->vrf_id = zvrf_id(rib_dest_vrf(dest)); + + qpb_address_family_set(&msg->address_family, rib_dest_af(dest)); + + /* + * XXX Hardcode subaddress family for now. + */ + msg->sub_address_family = QPB__SUB_ADDRESS_FAMILY__UNICAST; + msg->key = fpm_route_key_create(allocator, rib_dest_prefix(dest)); + qpb_protocol_set(&msg->protocol, re->type); + msg->has_route_type = 1; + msg->route_type = FPM__ROUTE_TYPE__NORMAL; + msg->metric = re->metric; + + /* + * Figure out the set of nexthops to be added to the message. + */ + num_nhs = 0; + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) { + if (num_nhs >= zrouter.multipath_num) + break; + + if (num_nhs >= array_size(nexthops)) + break; + + if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { + switch (nexthop->bh_type) { + case BLACKHOLE_REJECT: + msg->route_type = FPM__ROUTE_TYPE__UNREACHABLE; + break; + case BLACKHOLE_NULL: + default: + msg->route_type = FPM__ROUTE_TYPE__BLACKHOLE; + break; + } + return msg; + } + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + nexthops[num_nhs] = nexthop; + num_nhs++; + } + + if (!num_nhs) { + zfpm_debug("netlink_encode_route(): No useful nexthop."); + assert(0); + return NULL; + } + + /* + * And add them to the message. + */ + if (!(msg->nexthops = qpb_alloc_ptr_array(allocator, num_nhs))) { + assert(0); + return NULL; + } + + msg->n_nexthops = 0; + for (u = 0; u < num_nhs; u++) { + if (!add_nexthop(allocator, msg, dest, nexthops[u])) { + assert(0); + return NULL; + } + } + + assert(msg->n_nexthops == num_nhs); + + return msg; +} + +/* + * create_route_message + */ +static Fpm__Message *create_route_message(qpb_allocator_t *allocator, + rib_dest_t *dest, + struct route_entry *re) +{ + Fpm__Message *msg; + + msg = QPB_ALLOC(allocator, typeof(*msg)); + if (!msg) { + assert(0); + return NULL; + } + + fpm__message__init(msg); + + if (!re) { + msg->has_type = 1; + msg->type = FPM__MESSAGE__TYPE__DELETE_ROUTE; + msg->delete_route = + create_delete_route_message(allocator, dest, re); + if (!msg->delete_route) { + assert(0); + return NULL; + } + return msg; + } + + msg->has_type = 1; + msg->type = FPM__MESSAGE__TYPE__ADD_ROUTE; + msg->add_route = create_add_route_message(allocator, dest, re); + if (!msg->add_route) { + assert(0); + return NULL; + } + + return msg; +} + +/* + * zfpm_protobuf_encode_route + * + * Create a protobuf message corresponding to the given route in the + * given buffer space. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +int zfpm_protobuf_encode_route(rib_dest_t *dest, struct route_entry *re, + uint8_t *in_buf, size_t in_buf_len) +{ + Fpm__Message *msg; + QPB_DECLARE_STACK_ALLOCATOR(allocator, 4096); + size_t len; + + QPB_INIT_STACK_ALLOCATOR(allocator); + + msg = create_route_message(&allocator, dest, re); + if (!msg) { + assert(0); + return 0; + } + + len = fpm__message__pack(msg, in_buf); + assert(len <= in_buf_len); + + QPB_RESET_STACK_ALLOCATOR(allocator); + return len; +} diff --git a/zebra/zebra_gr.c b/zebra/zebra_gr.c new file mode 100644 index 0000000..56d0df5 --- /dev/null +++ b/zebra/zebra_gr.c @@ -0,0 +1,676 @@ +/* + * Zebra GR related helper functions. + * + * Portions: + * Copyright (C) 2019 VMware, Inc. + * et al. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include <libgen.h> + +#include "lib/prefix.h" +#include "lib/command.h" +#include "lib/if.h" +#include "lib/thread.h" +#include "lib/stream.h" +#include "lib/memory.h" +#include "lib/table.h" +#include "lib/network.h" +#include "lib/sockunion.h" +#include "lib/log.h" +#include "lib/zclient.h" +#include "lib/privs.h" +#include "lib/network.h" +#include "lib/buffer.h" +#include "lib/nexthop.h" +#include "lib/vrf.h" +#include "lib/libfrr.h" +#include "lib/sockopt.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/zapi_msg.h" + + +/* + * Forward declaration. + */ +static struct zserv *zebra_gr_find_stale_client(struct zserv *client); +static void zebra_gr_route_stale_delete_timer_expiry(struct thread *thread); +static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info); +static void zebra_gr_process_client_stale_routes(struct zserv *client, + vrf_id_t vrf_id); + +/* + * Debug macros. + */ +#define LOG_GR(msg, ...) \ + do { \ + if (IS_ZEBRA_DEBUG_EVENT) \ + zlog_debug(msg, ##__VA_ARGS__); \ + } while (0) + + +/* + * Client connection functions + */ + +/* + * Function to clean all the stale clients, + * function will also clean up all per instance + * capabilities that are exchanged. + */ +void zebra_gr_stale_client_cleanup(struct list *client_list) +{ + struct listnode *node, *nnode; + struct zserv *s_client = NULL; + struct client_gr_info *info, *ninfo; + + /* Find the stale client */ + for (ALL_LIST_ELEMENTS(client_list, node, nnode, s_client)) { + + LOG_GR("%s: Stale client %s is being deleted", __func__, + zebra_route_string(s_client->proto)); + + TAILQ_FOREACH_SAFE (info, &s_client->gr_info_queue, gr_info, + ninfo) { + + /* Cancel the stale timer */ + if (info->t_stale_removal != NULL) { + THREAD_OFF(info->t_stale_removal); + info->t_stale_removal = NULL; + /* Process the stale routes */ + thread_execute( + zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, + info, 1); + } + } + } +} + +/* + * A helper function to create client info. + */ +static struct client_gr_info *zebra_gr_client_info_create(struct zserv *client) +{ + struct client_gr_info *info; + + info = XCALLOC(MTYPE_TMP, sizeof(struct client_gr_info)); + + TAILQ_INSERT_TAIL(&(client->gr_info_queue), info, gr_info); + return info; +} + +/* + * A helper function to delete and destroy client info. + */ +static void zebra_gr_client_info_delte(struct zserv *client, + struct client_gr_info *info) +{ + TAILQ_REMOVE(&(client->gr_info_queue), info, gr_info); + + THREAD_OFF(info->t_stale_removal); + + XFREE(MTYPE_TMP, info->current_prefix); + + LOG_GR("%s: Instance info is being deleted for client %s", __func__, + zebra_route_string(client->proto)); + + /* Delete all the stale routes. */ + info->do_delete = true; + zebra_gr_delete_stale_routes(info); + + XFREE(MTYPE_TMP, info); +} + +/* + * Function to handle client when it disconnect. + */ +int32_t zebra_gr_client_disconnect(struct zserv *client) +{ + struct zserv *stale_client; + struct timeval tv; + struct client_gr_info *info = NULL; + + /* Find the stale client */ + stale_client = zebra_gr_find_stale_client(client); + + /* + * We should never be here. + */ + if (stale_client) { + LOG_GR("%s: Stale client %s exist, we should not be here!", + __func__, zebra_route_string(client->proto)); + assert(0); + } + + client->restart_time = monotime(&tv); + + /* For all the GR instance start the stale removal timer. */ + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (ZEBRA_CLIENT_GR_ENABLED(info->capabilities) + && (info->t_stale_removal == NULL)) { + thread_add_timer( + zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, info, + info->stale_removal_time, + &info->t_stale_removal); + info->current_afi = AFI_IP; + info->stale_client_ptr = client; + info->stale_client = true; + LOG_GR("%s: Client %s Stale timer update to %d", + __func__, zebra_route_string(client->proto), + info->stale_removal_time); + } + } + + listnode_add(zrouter.stale_client_list, client); + + return 0; +} + +/* + * Function to delete stale client + */ +static void zebra_gr_delete_stale_client(struct client_gr_info *info) +{ + struct client_gr_info *bgp_info; + struct zserv *s_client = NULL; + + s_client = info->stale_client_ptr; + + if (!s_client || !info->stale_client) + return; + + /* + * If there are bgp instances with the stale delete timer pending + * then stale client is not deleted + */ + if ((s_client->gr_instance_count > 0) && info->gr_enable) + s_client->gr_instance_count--; + + TAILQ_REMOVE(&(s_client->gr_info_queue), info, gr_info); + + LOG_GR("%s: Client %s gr count %d", __func__, + zebra_route_string(s_client->proto), + s_client->gr_instance_count); + + TAILQ_FOREACH (bgp_info, &s_client->gr_info_queue, gr_info) { + if (bgp_info->t_stale_removal != NULL) + return; + } + + LOG_GR("%s: Client %s is being deleted", __func__, + zebra_route_string(s_client->proto)); + + TAILQ_INIT(&(s_client->gr_info_queue)); + listnode_delete(zrouter.stale_client_list, s_client); + if (info->stale_client) + XFREE(MTYPE_TMP, s_client); + XFREE(MTYPE_TMP, info); +} + +/* + * Function to find stale client. + */ +static struct zserv *zebra_gr_find_stale_client(struct zserv *client) +{ + struct listnode *node, *nnode; + struct zserv *stale_client; + + /* Find the stale client */ + for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode, + stale_client)) { + if (client->proto == stale_client->proto + && client->instance == stale_client->instance) { + return stale_client; + } + } + + return NULL; +} + +/* + * Function to handle reconnect of client post restart. + */ +void zebra_gr_client_reconnect(struct zserv *client) +{ + struct listnode *node, *nnode; + struct zserv *old_client = NULL; + struct client_gr_info *info = NULL; + + /* Find the stale client */ + for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode, + old_client)) { + if (client->proto == old_client->proto + && client->instance == old_client->instance) + break; + } + + /* Copy the timers */ + if (!old_client) + return; + + client->gr_instance_count = old_client->gr_instance_count; + client->restart_time = old_client->restart_time; + + LOG_GR("%s : old client %s, gr_instance_count %d", __func__, + zebra_route_string(old_client->proto), + old_client->gr_instance_count); + + if (TAILQ_FIRST(&old_client->gr_info_queue)) { + TAILQ_CONCAT(&client->gr_info_queue, &old_client->gr_info_queue, + gr_info); + TAILQ_INIT(&old_client->gr_info_queue); + } + + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + info->stale_client_ptr = client; + info->stale_client = false; + } + + /* Delete the stale client */ + listnode_delete(zrouter.stale_client_list, old_client); + /* Delete old client */ + XFREE(MTYPE_TMP, old_client); +} + +/* + * Functions to deal with capabilities + */ + +/* + * Update the graceful restart information + * for the client instance. + * This function handles all the capabilities that are received. + */ +static void zebra_client_update_info(struct zserv *client, struct zapi_cap *api) +{ + struct client_gr_info *info = NULL; + + /* Find the bgp information for the specified vrf id */ + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (info->vrf_id == api->vrf_id) + break; + } + + + /* + * If the command is delete, then cancel the stale timer and + * delete the bgp info + */ + switch (api->cap) { + case ZEBRA_CLIENT_GR_DISABLE: + if (!info) + return; + + LOG_GR("%s: Client %s instance GR disabled count %d", __func__, + zebra_route_string(client->proto), + client->gr_instance_count); + + if ((info->gr_enable) && (client->gr_instance_count > 0)) + client->gr_instance_count--; + + zebra_gr_client_info_delte(client, info); + break; + case ZEBRA_CLIENT_GR_CAPABILITIES: + /* Allocate bgp info */ + if (!info) + info = zebra_gr_client_info_create(client); + + /* Update other parameters */ + if (!info->gr_enable) { + client->gr_instance_count++; + + LOG_GR("%s: Cient %s GR enabled count %d", __func__, + zebra_route_string(client->proto), + client->gr_instance_count); + + info->capabilities = api->cap; + info->stale_removal_time = api->stale_removal_time; + info->vrf_id = api->vrf_id; + info->gr_enable = true; + } + break; + case ZEBRA_CLIENT_RIB_STALE_TIME: + LOG_GR("%s: Client %s stale time update event", __func__, + zebra_route_string(client->proto)); + + /* Update the stale removal timer */ + if (info && info->t_stale_removal == NULL) { + + LOG_GR("%s: Stale time: %d is now update to: %d", + __func__, info->stale_removal_time, + api->stale_removal_time); + + info->stale_removal_time = api->stale_removal_time; + } + + break; + case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE: + LOG_GR( + "%s: Client %s route update complete for AFI %d, SAFI %d", + __func__, zebra_route_string(client->proto), api->afi, + api->safi); + if (info) + info->route_sync[api->afi][api->safi] = true; + break; + case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING: + LOG_GR("%s: Client %s route update pending for AFI %d, SAFI %d", + __func__, zebra_route_string(client->proto), api->afi, + api->safi); + if (info) + info->af_enabled[api->afi][api->safi] = true; + break; + } +} + +/* + * Handler for capabilities that are received from client. + */ +static void zebra_client_capabilities_handler(struct zserv *client, + struct zapi_cap *api) +{ + switch (api->cap) { + case ZEBRA_CLIENT_GR_CAPABILITIES: + case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING: + case ZEBRA_CLIENT_GR_DISABLE: + case ZEBRA_CLIENT_RIB_STALE_TIME: + /* + * For all the cases we need to update the client info. + */ + zebra_client_update_info(client, api); + break; + case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE: + /* + * After client info has been updated delete all + * stale routes + */ + zebra_client_update_info(client, api); + zebra_gr_process_client_stale_routes(client, api->vrf_id); + break; + } +} + +/* + * Function to decode and call appropriate functions + * to handle client capabilities. + */ +void zread_client_capabilities(ZAPI_HANDLER_ARGS) +{ + struct zapi_cap api; + struct stream *s; + + s = msg; + + if (zapi_capabilities_decode(s, &api)) { + LOG_GR("%s: Error in reading capabilities for client %s", + __func__, zebra_route_string(client->proto)); + return; + } + + /* GR only for dynamic clients */ + if (client->proto <= ZEBRA_ROUTE_CONNECT) { + LOG_GR("%s: GR capabilities for client %s not supported", + __func__, zebra_route_string(client->proto)); + return; + } + /* Call the capabilities handler */ + zebra_client_capabilities_handler(client, &api); +} + + +/* + * Stale route handling + */ + +/* + * Delete all the stale routes that have not been refreshed + * post restart. + */ +static void zebra_gr_route_stale_delete_timer_expiry(struct thread *thread) +{ + struct client_gr_info *info; + int32_t cnt = 0; + struct zserv *client; + + info = THREAD_ARG(thread); + info->t_stale_removal = NULL; + client = (struct zserv *)info->stale_client_ptr; + + /* Set the flag to indicate all stale route deletion */ + if (thread->u.val == 1) + info->do_delete = true; + + cnt = zebra_gr_delete_stale_routes(info); + + /* Restart the timer */ + if (cnt > 0) { + LOG_GR("%s: Client %s processed %d routes. Start timer again", + __func__, zebra_route_string(client->proto), cnt); + + thread_add_timer(zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, info, + ZEBRA_DEFAULT_STALE_UPDATE_DELAY, + &info->t_stale_removal); + } else { + /* No routes to delete for the VRF */ + LOG_GR("%s: Client %s all stale routes processed", __func__, + zebra_route_string(client->proto)); + + XFREE(MTYPE_TMP, info->current_prefix); + info->current_afi = 0; + zebra_gr_delete_stale_client(info); + } +} + + +/* + * Function to process to check if route entry is stale + * or has been updated. + */ +static void zebra_gr_process_route_entry(struct zserv *client, + struct route_node *rn, + struct route_entry *re) +{ + if ((client == NULL) || (rn == NULL) || (re == NULL)) + return; + + /* If the route is not refreshed after restart, delete the entry */ + if (re->uptime < client->restart_time) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: Client %s stale route %pFX is deleted", + __func__, zebra_route_string(client->proto), + &rn->p); + rib_delnode(rn, re); + } +} + +/* + * This function walks through the route table for all vrf and deletes + * the stale routes for the restarted client specified by the protocol + * type + */ +static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info, + struct zebra_vrf *zvrf) +{ + struct route_node *rn, *curr; + struct route_entry *re; + struct route_entry *next; + struct route_table *table; + int32_t n = 0; + afi_t afi, curr_afi; + uint8_t proto; + uint16_t instance; + struct zserv *s_client; + + if ((info == NULL) || (zvrf == NULL)) + return -1; + + s_client = info->stale_client_ptr; + if (s_client == NULL) { + LOG_GR("%s: Stale client not present", __func__); + return -1; + } + + proto = s_client->proto; + instance = s_client->instance; + curr_afi = info->current_afi; + + LOG_GR("%s: Client %s stale routes are being deleted", __func__, + zebra_route_string(proto)); + + /* Process routes for all AFI */ + for (afi = curr_afi; afi < AFI_MAX; afi++) { + table = zvrf->table[afi][SAFI_UNICAST]; + + if (table) { + /* + * If the current prefix is NULL then get the first + * route entry in the table + */ + if (info->current_prefix == NULL) { + rn = route_top(table); + if (rn == NULL) + continue; + curr = rn; + } else + /* Get the next route entry */ + curr = route_table_get_next( + table, info->current_prefix); + + for (rn = curr; rn; rn = srcdest_route_next(rn)) { + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (CHECK_FLAG(re->status, + ROUTE_ENTRY_REMOVED)) + continue; + /* If the route refresh is received + * after restart then do not delete + * the route + */ + if (re->type == proto + && re->instance == instance) { + zebra_gr_process_route_entry( + s_client, rn, re); + n++; + } + + /* If the max route count is reached + * then timer thread will be restarted + * Store the current prefix and afi + */ + if ((n >= ZEBRA_MAX_STALE_ROUTE_COUNT) + && (info->do_delete == false)) { + info->current_afi = afi; + info->current_prefix = XCALLOC( + MTYPE_TMP, + sizeof(struct prefix)); + prefix_copy( + info->current_prefix, + &rn->p); + return n; + } + } + } + } + /* + * Reset the current prefix to indicate processing completion + * of the current AFI + */ + XFREE(MTYPE_TMP, info->current_prefix); + } + return 0; +} + +/* + * Delete the stale routes when client is restarted and routes are not + * refreshed within the stale timeout + */ +static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + uint64_t cnt = 0; + + if (info == NULL) + return -1; + + /* Get the current VRF */ + vrf = vrf_lookup_by_id(info->vrf_id); + if (vrf == NULL) { + LOG_GR("%s: Invalid VRF %d", __func__, info->vrf_id); + return -1; + } + + zvrf = vrf->info; + if (zvrf == NULL) { + LOG_GR("%s: Invalid VRF entry %d", __func__, info->vrf_id); + return -1; + } + + cnt = zebra_gr_delete_stale_route(info, zvrf); + return cnt; +} + +/* + * This function checks if route update for all AFI, SAFI is completed + * and cancels the stale timer + */ +static void zebra_gr_process_client_stale_routes(struct zserv *client, + vrf_id_t vrf_id) +{ + struct client_gr_info *info = NULL; + afi_t afi; + safi_t safi; + + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (info->vrf_id == vrf_id) + break; + } + + if (info == NULL) + return; + + /* Check if route update completed for all AFI, SAFI */ + FOREACH_AFI_SAFI_NSF (afi, safi) { + if (info->af_enabled[afi][safi]) { + if (!info->route_sync[afi][safi]) { + LOG_GR("%s: Client %s route update not completed for AFI %d, SAFI %d", + __func__, + zebra_route_string(client->proto), afi, + safi); + return; + } + } + } + + /* + * Route update completed for all AFI, SAFI + * Cancel the stale timer and process the routes + */ + if (info->t_stale_removal) { + LOG_GR("%s: Client %s canceled stale delete timer vrf %d", + __func__, zebra_route_string(client->proto), + info->vrf_id); + THREAD_OFF(info->t_stale_removal); + thread_execute(zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, info, + 0); + } +} diff --git a/zebra/zebra_l2.c b/zebra/zebra_l2.c new file mode 100644 index 0000000..8a9f3df --- /dev/null +++ b/zebra/zebra_l2.c @@ -0,0 +1,551 @@ +/* + * Zebra Layer-2 interface handling code + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "if.h" +#include "prefix.h" +#include "table.h" +#include "memory.h" +#include "log.h" +#include "linklist.h" +#include "stream.h" +#include "hash.h" +#include "jhash.h" + +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zebra_ns.h" +#include "zebra/zserv.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt_netlink.h" +#include "zebra/interface.h" +#include "zebra/zebra_l2.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" + +/* definitions */ + +/* static function declarations */ + +/* Private functions */ +static void map_slaves_to_bridge(struct interface *br_if, int link, + bool update_slave, uint8_t chgflags) +{ + struct vrf *vrf; + struct interface *ifp; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = br_if->vrf->info; + assert(zvrf); + zns = zvrf->zns; + assert(zns); + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + FOR_ALL_INTERFACES (vrf, ifp) { + struct zebra_if *zif; + struct zebra_l2info_brslave *br_slave; + + if (ifp->ifindex == IFINDEX_INTERNAL || !ifp->info) + continue; + if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) + continue; + + /* NOTE: This assumes 'zebra_l2info_brslave' is the + * first field + * for any L2 interface. + */ + zif = (struct zebra_if *)ifp->info; + br_slave = &zif->brslave_info; + + if (link) { + if (br_slave->bridge_ifindex == br_if->ifindex + && br_slave->ns_id == zns->ns_id) { + br_slave->br_if = br_if; + if (update_slave) { + zebra_l2if_update_bridge_slave( + ifp, + br_slave->bridge_ifindex, + br_slave->ns_id, + chgflags); + } + } + } else { + if (br_slave->br_if == br_if) + br_slave->br_if = NULL; + } + } + } +} + +/* Public functions */ +void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave, + struct zebra_ns *zns) +{ + struct interface *br_if; + + /* TODO: Handle change of master */ + assert(zns); + br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(zns->ns_id), + br_slave->bridge_ifindex); + if (br_if) + br_slave->br_if = br_if; +} + +void zebra_l2_unmap_slave_from_bridge(struct zebra_l2info_brslave *br_slave) +{ + br_slave->br_if = NULL; +} + +/* If any of the bond members are in bypass state the bond is placed + * in bypass state + */ +static void zebra_l2_bond_lacp_bypass_eval(struct zebra_if *bond_zif) +{ + struct listnode *node; + struct zebra_if *bond_mbr; + bool old_bypass = !!(bond_zif->flags & ZIF_FLAG_LACP_BYPASS); + bool new_bypass = false; + + if (bond_zif->bond_info.mbr_zifs) { + for (ALL_LIST_ELEMENTS_RO(bond_zif->bond_info.mbr_zifs, node, + bond_mbr)) { + if (bond_mbr->flags & ZIF_FLAG_LACP_BYPASS) { + new_bypass = true; + break; + } + } + } + + if (old_bypass == new_bypass) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond %s lacp bypass changed to %s", + bond_zif->ifp->name, new_bypass ? "on" : "off"); + + if (new_bypass) + bond_zif->flags |= ZIF_FLAG_LACP_BYPASS; + else + bond_zif->flags &= ~ZIF_FLAG_LACP_BYPASS; + + if (bond_zif->es_info.es) + zebra_evpn_es_bypass_update(bond_zif->es_info.es, bond_zif->ifp, + new_bypass); +} + +/* Returns true if member was newly linked to bond */ +void zebra_l2_map_slave_to_bond(struct zebra_if *zif, vrf_id_t vrf_id) +{ + struct interface *bond_if; + struct zebra_if *bond_zif; + struct zebra_l2info_bondslave *bond_slave = &zif->bondslave_info; + + bond_if = if_lookup_by_index(bond_slave->bond_ifindex, vrf_id); + if (bond_if == bond_slave->bond_if) + return; + + /* unlink the slave from the old master */ + zebra_l2_unmap_slave_from_bond(zif); + + /* If the bond is present and ready link the bond-member + * to it + */ + if (bond_if && (bond_zif = bond_if->info)) { + if (bond_zif->bond_info.mbr_zifs) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond mbr %s linked to %s", + zif->ifp->name, bond_if->name); + bond_slave->bond_if = bond_if; + /* link the slave to the new bond master */ + listnode_add(bond_zif->bond_info.mbr_zifs, zif); + /* inherit protodown flags from the es-bond */ + if (zebra_evpn_is_es_bond(bond_if)) + zebra_evpn_mh_update_protodown_bond_mbr( + zif, false /*clear*/, __func__); + zebra_l2_bond_lacp_bypass_eval(bond_zif); + } + } else { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond mbr %s link to bond skipped", + zif->ifp->name); + } +} + +void zebra_l2_unmap_slave_from_bond(struct zebra_if *zif) +{ + struct zebra_l2info_bondslave *bond_slave = &zif->bondslave_info; + struct zebra_if *bond_zif; + + if (!bond_slave->bond_if) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond mbr %s unlink from bond skipped", + zif->ifp->name); + return; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond mbr %s un-linked from %s", zif->ifp->name, + bond_slave->bond_if->name); + + /* unlink the slave from the bond master */ + bond_zif = bond_slave->bond_if->info; + /* clear protodown flags */ + if (zebra_evpn_is_es_bond(bond_zif->ifp)) + zebra_evpn_mh_update_protodown_bond_mbr(zif, true /*clear*/, + __func__); + listnode_delete(bond_zif->bond_info.mbr_zifs, zif); + bond_slave->bond_if = NULL; + zebra_l2_bond_lacp_bypass_eval(bond_zif); +} + +void zebra_l2if_update_bond(struct interface *ifp, bool add) +{ + struct zebra_if *zif; + struct zebra_l2info_bond *bond; + + zif = ifp->info; + assert(zif); + bond = &zif->bond_info; + + if (add) { + if (!bond->mbr_zifs) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond %s mbr list create", + ifp->name); + bond->mbr_zifs = list_new(); + } + } else { + struct listnode *node; + struct listnode *nnode; + struct zebra_if *bond_mbr; + + if (!bond->mbr_zifs) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond %s mbr list delete", ifp->name); + for (ALL_LIST_ELEMENTS(bond->mbr_zifs, node, nnode, bond_mbr)) + zebra_l2_unmap_slave_from_bond(bond_mbr); + + list_delete(&bond->mbr_zifs); + } +} + +/* + * Handle Bridge interface add or update. Update relevant info, + * map slaves (if any) to the bridge. + */ +void zebra_l2_bridge_add_update(struct interface *ifp, + struct zebra_l2info_bridge *bridge_info, + int add) +{ + struct zebra_if *zif; + + zif = ifp->info; + assert(zif); + + /* Copy over the L2 information. */ + memcpy(&zif->l2info.br, bridge_info, sizeof(*bridge_info)); + + /* Link all slaves to this bridge */ + map_slaves_to_bridge(ifp, 1, false, ZEBRA_BRIDGE_NO_ACTION); +} + +/* + * Handle Bridge interface delete. + */ +void zebra_l2_bridge_del(struct interface *ifp) +{ + /* Unlink all slaves to this bridge */ + map_slaves_to_bridge(ifp, 0, false, ZEBRA_BRIDGE_NO_ACTION); +} + +void zebra_l2if_update_bridge(struct interface *ifp, uint8_t chgflags) +{ + if (!chgflags) + return; + map_slaves_to_bridge(ifp, 1, true, chgflags); +} + +/* + * Update L2 info for a VLAN interface. Only relevant parameter is the + * VLAN Id and this cannot change. + */ +void zebra_l2_vlanif_update(struct interface *ifp, + struct zebra_l2info_vlan *vlan_info) +{ + struct zebra_if *zif; + + zif = ifp->info; + assert(zif); + + /* Copy over the L2 information. */ + memcpy(&zif->l2info.vl, vlan_info, sizeof(*vlan_info)); +} + +/* + * Update L2 info for a GRE interface. This is called upon interface + * addition as well as update. Upon add/update, need to inform + * clients about GRE information. + */ +void zebra_l2_greif_add_update(struct interface *ifp, + struct zebra_l2info_gre *gre_info, int add) +{ + struct zebra_if *zif; + struct in_addr old_vtep_ip; + + zif = ifp->info; + assert(zif); + + if (add) { + memcpy(&zif->l2info.gre, gre_info, sizeof(*gre_info)); + return; + } + + old_vtep_ip = zif->l2info.gre.vtep_ip; + if (IPV4_ADDR_SAME(&old_vtep_ip, &gre_info->vtep_ip)) + return; + + zif->l2info.gre.vtep_ip = gre_info->vtep_ip; +} + +/* + * Update L2 info for a VxLAN interface. This is called upon interface + * addition as well as update. Upon add, need to invoke the VNI create + * function. Upon update, the params of interest are the local tunnel + * IP and VLAN mapping, but the latter is handled separately. + */ +void zebra_l2_vxlanif_add_update(struct interface *ifp, + struct zebra_l2info_vxlan *vxlan_info, int add) +{ + struct zebra_if *zif; + struct in_addr old_vtep_ip; + uint16_t chgflags = 0; + + zif = ifp->info; + assert(zif); + + if (add) { + memcpy(&zif->l2info.vxl, vxlan_info, sizeof(*vxlan_info)); + zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif); + zebra_vxlan_if_add(ifp); + return; + } + + old_vtep_ip = zif->l2info.vxl.vtep_ip; + + if (!IPV4_ADDR_SAME(&old_vtep_ip, &vxlan_info->vtep_ip)) { + chgflags |= ZEBRA_VXLIF_LOCAL_IP_CHANGE; + zif->l2info.vxl.vtep_ip = vxlan_info->vtep_ip; + } + + if (!IPV4_ADDR_SAME(&zif->l2info.vxl.mcast_grp, + &vxlan_info->mcast_grp)) { + chgflags |= ZEBRA_VXLIF_MCAST_GRP_CHANGE; + zif->l2info.vxl.mcast_grp = vxlan_info->mcast_grp; + } + + if (chgflags) + zebra_vxlan_if_update(ifp, chgflags); +} + +/* + * Handle change to VLAN to VNI mapping. + */ +void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp, + vlanid_t access_vlan) +{ + struct zebra_if *zif; + vlanid_t old_access_vlan; + + zif = ifp->info; + assert(zif); + + old_access_vlan = zif->l2info.vxl.access_vlan; + if (old_access_vlan == access_vlan) + return; + + zif->l2info.vxl.access_vlan = access_vlan; + + zebra_evpn_vl_vxl_deref(old_access_vlan, zif); + zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif); + zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_VLAN_CHANGE); +} + +/* + * Handle VxLAN interface delete. + */ +void zebra_l2_vxlanif_del(struct interface *ifp) +{ + struct zebra_if *zif; + + zif = ifp->info; + assert(zif); + + zebra_evpn_vl_vxl_deref(zif->l2info.vxl.access_vlan, zif); + zebra_vxlan_if_del(ifp); +} + +/* + * Map or unmap interface from bridge. + * NOTE: It is currently assumped that an interface has to be unmapped + * from a bridge before it can be mapped to another bridge. + */ +void zebra_l2if_update_bridge_slave(struct interface *ifp, + ifindex_t bridge_ifindex, ns_id_t ns_id, + uint8_t chgflags) +{ + struct zebra_if *zif; + ifindex_t old_bridge_ifindex; + ns_id_t old_ns_id; + struct zebra_vrf *zvrf; + + zif = ifp->info; + assert(zif); + + zvrf = ifp->vrf->info; + if (!zvrf) + return; + + if (zif->zif_type == ZEBRA_IF_VXLAN + && chgflags != ZEBRA_BRIDGE_NO_ACTION) { + if (chgflags & ZEBRA_BRIDGE_MASTER_MAC_CHANGE) + zebra_vxlan_if_update(ifp, + ZEBRA_VXLIF_MASTER_MAC_CHANGE); + if (chgflags & ZEBRA_BRIDGE_MASTER_UP) + zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_MASTER_CHANGE); + } + old_bridge_ifindex = zif->brslave_info.bridge_ifindex; + old_ns_id = zif->brslave_info.ns_id; + if (old_bridge_ifindex == bridge_ifindex && + old_ns_id == zif->brslave_info.ns_id) + return; + + zif->brslave_info.ns_id = ns_id; + zif->brslave_info.bridge_ifindex = bridge_ifindex; + /* Set up or remove link with master */ + if (bridge_ifindex != IFINDEX_INTERNAL) { + zebra_l2_map_slave_to_bridge(&zif->brslave_info, zvrf->zns); + /* In the case of VxLAN, invoke the handler for EVPN. */ + if (zif->zif_type == ZEBRA_IF_VXLAN) + zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_MASTER_CHANGE); + if (zif->es_info.es) + zebra_evpn_es_local_br_port_update(zif); + } else if (old_bridge_ifindex != IFINDEX_INTERNAL) { + /* + * In the case of VxLAN, invoke the handler for EVPN. + * Note that this should be done *prior* + * to unmapping the interface from the bridge. + */ + if (zif->zif_type == ZEBRA_IF_VXLAN) + zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_MASTER_CHANGE); + if (zif->es_info.es) + zebra_evpn_es_local_br_port_update(zif); + zebra_l2_unmap_slave_from_bridge(&zif->brslave_info); + } +} + +void zebra_l2if_update_bond_slave(struct interface *ifp, ifindex_t bond_ifindex, + bool new_bypass) +{ + struct zebra_if *zif; + ifindex_t old_bond_ifindex; + bool old_bypass; + struct zebra_l2info_bondslave *bond_mbr; + + zif = ifp->info; + assert(zif); + + old_bypass = !!(zif->flags & ZIF_FLAG_LACP_BYPASS); + if (old_bypass != new_bypass) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bond-mbr %s lacp bypass changed to %s", + zif->ifp->name, new_bypass ? "on" : "off"); + + if (new_bypass) + zif->flags |= ZIF_FLAG_LACP_BYPASS; + else + zif->flags &= ~ZIF_FLAG_LACP_BYPASS; + + bond_mbr = &zif->bondslave_info; + if (bond_mbr->bond_if) { + struct zebra_if *bond_zif = bond_mbr->bond_if->info; + + zebra_l2_bond_lacp_bypass_eval(bond_zif); + } + } + + old_bond_ifindex = zif->bondslave_info.bond_ifindex; + if (old_bond_ifindex == bond_ifindex) + return; + + zif->bondslave_info.bond_ifindex = bond_ifindex; + + /* Set up or remove link with master */ + if (bond_ifindex != IFINDEX_INTERNAL) + zebra_l2_map_slave_to_bond(zif, ifp->vrf->vrf_id); + else if (old_bond_ifindex != IFINDEX_INTERNAL) + zebra_l2_unmap_slave_from_bond(zif); +} + +void zebra_vlan_bitmap_compute(struct interface *ifp, + uint32_t vid_start, uint16_t vid_end) +{ + uint32_t vid; + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + assert(zif); + + for (vid = vid_start; vid <= vid_end; ++vid) + bf_set_bit(zif->vlan_bitmap, vid); +} + +void zebra_vlan_mbr_re_eval(struct interface *ifp, bitfield_t old_vlan_bitmap) +{ + uint32_t vid; + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + assert(zif); + + if (!bf_cmp(zif->vlan_bitmap, old_vlan_bitmap)) + /* no change */ + return; + + bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + /* if not already set create new reference */ + if (!bf_test_index(old_vlan_bitmap, vid)) + zebra_evpn_vl_mbr_ref(vid, zif); + + /* also clear from the old vlan bitmap */ + bf_release_index(old_vlan_bitmap, vid); + } + + /* any bits remaining in the old vlan bitmap are stale references */ + bf_for_each_set_bit(old_vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + zebra_evpn_vl_mbr_deref(vid, zif); + } +} diff --git a/zebra/zebra_l2.h b/zebra/zebra_l2.h new file mode 100644 index 0000000..1c3e981 --- /dev/null +++ b/zebra/zebra_l2.h @@ -0,0 +1,144 @@ +/* + * Zebra Layer-2 interface Data structures and definitions + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_L2_H +#define _ZEBRA_L2_H + +#include <zebra.h> + +#include "if.h" +#include "vlan.h" +#include "vxlan.h" +#include "zebra/zebra_vrf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZEBRA_BRIDGE_NO_ACTION (0) +#define ZEBRA_BRIDGE_MASTER_MAC_CHANGE (1 << 1) +#define ZEBRA_BRIDGE_MASTER_UP (1 << 2) + +/* zebra L2 interface information - bridge slave (linkage to bridge) */ +struct zebra_l2info_brslave { + ifindex_t bridge_ifindex; /* Bridge Master */ + struct interface *br_if; /* Pointer to master */ + ns_id_t ns_id; /* network namespace where bridge is */ +}; + +struct zebra_l2info_bond { + struct list *mbr_zifs; /* slaves using this bond as a master */ +}; + +/* zebra L2 interface information - bridge interface */ +struct zebra_l2info_bridge { + uint8_t vlan_aware; /* VLAN-aware bridge? */ +}; + +/* zebra L2 interface information - VLAN interface */ +struct zebra_l2info_vlan { + vlanid_t vid; /* VLAN id */ +}; + +/* zebra L2 interface information - GRE interface */ +struct zebra_l2info_gre { + struct in_addr vtep_ip; /* IFLA_GRE_LOCAL */ + struct in_addr vtep_ip_remote; /* IFLA_GRE_REMOTE */ + uint32_t ikey; + uint32_t okey; + ifindex_t ifindex_link; /* Interface index of interface + * linked with GRE + */ + ns_id_t link_nsid; +}; + +/* zebra L2 interface information - VXLAN interface */ +struct zebra_l2info_vxlan { + vni_t vni; /* VNI */ + struct in_addr vtep_ip; /* Local tunnel IP */ + vlanid_t access_vlan; /* Access VLAN - for VLAN-aware bridge. */ + struct in_addr mcast_grp; + ifindex_t ifindex_link; /* Interface index of interface + * linked with VXLAN + */ + ns_id_t link_nsid; +}; + +struct zebra_l2info_bondslave { + ifindex_t bond_ifindex; /* Bridge Master */ + struct interface *bond_if; /* Pointer to master */ +}; + +union zebra_l2if_info { + struct zebra_l2info_bridge br; + struct zebra_l2info_vlan vl; + struct zebra_l2info_vxlan vxl; + struct zebra_l2info_gre gre; +}; + +/* NOTE: These macros are to be invoked only in the "correct" context. + * IOW, the macro VNI_FROM_ZEBRA_IF() will assume the interface is + * of type ZEBRA_IF_VXLAN. + */ +#define VNI_FROM_ZEBRA_IF(zif) (zif)->l2info.vxl.vni +#define VLAN_ID_FROM_ZEBRA_IF(zif) (zif)->l2info.vl.vid + +#define IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) ((zif)->l2info.br.vlan_aware == 1) + +extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave, + struct zebra_ns *zns); +extern void +zebra_l2_unmap_slave_from_bridge(struct zebra_l2info_brslave *br_slave); +extern void zebra_l2_bridge_add_update(struct interface *ifp, + struct zebra_l2info_bridge *bridge_info, + int add); +extern void zebra_l2_bridge_del(struct interface *ifp); +extern void zebra_l2_vlanif_update(struct interface *ifp, + struct zebra_l2info_vlan *vlan_info); +extern void zebra_l2_greif_add_update(struct interface *ifp, + struct zebra_l2info_gre *vxlan_info, + int add); +extern void zebra_l2_vxlanif_add_update(struct interface *ifp, + struct zebra_l2info_vxlan *vxlan_info, + int add); +extern void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp, + vlanid_t access_vlan); +extern void zebra_l2_greif_del(struct interface *ifp); +extern void zebra_l2_vxlanif_del(struct interface *ifp); +extern void zebra_l2if_update_bridge_slave(struct interface *ifp, + ifindex_t bridge_ifindex, + ns_id_t ns_id, uint8_t chgflags); + +extern void zebra_l2if_update_bond_slave(struct interface *ifp, + ifindex_t bond_ifindex, bool bypass); +extern void zebra_vlan_bitmap_compute(struct interface *ifp, + uint32_t vid_start, uint16_t vid_end); +extern void zebra_vlan_mbr_re_eval(struct interface *ifp, + bitfield_t vlan_bitmap); +extern void zebra_l2if_update_bond(struct interface *ifp, bool add); +extern void zebra_l2if_update_bridge(struct interface *ifp, uint8_t chgflags); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_L2_H */ diff --git a/zebra/zebra_mlag.c b/zebra/zebra_mlag.c new file mode 100644 index 0000000..2042f94 --- /dev/null +++ b/zebra/zebra_mlag.c @@ -0,0 +1,1213 @@ +/* Zebra Mlag Code. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include "zebra.h" + +#include "command.h" +#include "hook.h" +#include "frr_pthread.h" +#include "mlag.h" + +#include "zebra/zebra_mlag.h" +#include "zebra/zebra_mlag_vty.h" +#include "zebra/zebra_router.h" +#include "zebra/zapi_msg.h" +#include "zebra/debug.h" + +#ifdef HAVE_PROTOBUF_VERSION_3 +#include "mlag/mlag.pb-c.h" +#endif + +DEFINE_HOOK(zebra_mlag_private_write_data, + (uint8_t *data, uint32_t len), (data, len)); +DEFINE_HOOK(zebra_mlag_private_monitor_state, (), ()); +DEFINE_HOOK(zebra_mlag_private_open_channel, (), ()); +DEFINE_HOOK(zebra_mlag_private_close_channel, (), ()); +DEFINE_HOOK(zebra_mlag_private_cleanup_data, (), ()); + +#define ZEBRA_MLAG_METADATA_LEN 4 +#define ZEBRA_MLAG_MSG_BCAST 0xFFFFFFFF + +uint8_t mlag_wr_buffer[ZEBRA_MLAG_BUF_LIMIT]; +uint8_t mlag_rd_buffer[ZEBRA_MLAG_BUF_LIMIT]; + +static bool test_mlag_in_progress; + +static int zebra_mlag_signal_write_thread(void); +static void zebra_mlag_terminate_pthread(struct thread *event); +static void zebra_mlag_post_data_from_main_thread(struct thread *thread); +static void zebra_mlag_publish_process_state(struct zserv *client, + zebra_message_types_t msg_type); + +/**********************MLAG Interaction***************************************/ + +/* + * API to post the Registration to MLAGD + * MLAG will not process any messages with out the registration + */ +void zebra_mlag_send_register(void) +{ + struct stream *s = NULL; + + s = stream_new(sizeof(struct mlag_msg)); + + stream_putl(s, MLAG_REGISTER); + stream_putw(s, MLAG_MSG_NULL_PAYLOAD); + stream_putw(s, MLAG_MSG_NO_BATCH); + stream_fifo_push_safe(zrouter.mlag_info.mlag_fifo, s); + zebra_mlag_signal_write_thread(); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Enqueued MLAG Register to MLAG Thread ", + __func__); +} + +/* + * API to post the De-Registration to MLAGD + * MLAG will not process any messages after the de-registration + */ +void zebra_mlag_send_deregister(void) +{ + struct stream *s = NULL; + + s = stream_new(sizeof(struct mlag_msg)); + + stream_putl(s, MLAG_DEREGISTER); + stream_putw(s, MLAG_MSG_NULL_PAYLOAD); + stream_putw(s, MLAG_MSG_NO_BATCH); + stream_fifo_push_safe(zrouter.mlag_info.mlag_fifo, s); + zebra_mlag_signal_write_thread(); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Enqueued MLAG De-Register to MLAG Thread ", + __func__); +} + +/* + * API To handle MLAG Received data + * Decodes the data using protobuf and enqueue to main thread + * main thread publish this to clients based on client subscription + */ +void zebra_mlag_process_mlag_data(uint8_t *data, uint32_t len) +{ + struct stream *s = NULL; + int msg_type = 0; + + s = stream_new(ZEBRA_MLAG_BUF_LIMIT); + /* + * Place holder we need the message type first + */ + stream_putl(s, msg_type); + msg_type = zebra_mlag_protobuf_decode_message(s, data, len); + + if (msg_type <= 0) { + /* Something went wrong in decoding */ + stream_free(s); + zlog_err("%s: failed to process mlag data-%d, %u", __func__, + msg_type, len); + return; + } + + /* + * additional four bytes are for message type + */ + stream_putl_at(s, 0, msg_type); + thread_add_event(zrouter.master, zebra_mlag_post_data_from_main_thread, + s, 0, NULL); +} + +/**********************End of MLAG Interaction********************************/ + +/************************MLAG Thread Processing*******************************/ + +/* + * after posting every 'ZEBRA_MLAG_POST_LIMIT' packets, MLAG Thread will be + * yielded to give CPU for other threads + */ +#define ZEBRA_MLAG_POST_LIMIT 100 + +/* + * This thread reads the clients data from the Global queue and encodes with + * protobuf and pass on to the MLAG socket. + */ +static void zebra_mlag_client_msg_handler(struct thread *event) +{ + struct stream *s; + uint32_t wr_count = 0; + uint32_t msg_type = 0; + uint32_t max_count = 0; + int len = 0; + + wr_count = stream_fifo_count_safe(zrouter.mlag_info.mlag_fifo); + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug(":%s: Processing MLAG write, %u messages in queue", + __func__, wr_count); + + max_count = MIN(wr_count, ZEBRA_MLAG_POST_LIMIT); + + for (wr_count = 0; wr_count < max_count; wr_count++) { + s = stream_fifo_pop_safe(zrouter.mlag_info.mlag_fifo); + if (!s) { + zlog_debug(":%s: Got a NULL Messages, some thing wrong", + __func__); + break; + } + + /* + * Encode the data now + */ + len = zebra_mlag_protobuf_encode_client_data(s, &msg_type); + + /* + * write to MCLAGD + */ + if (len > 0) { + hook_call(zebra_mlag_private_write_data, + mlag_wr_buffer, len); + + /* + * If message type is De-register, send a signal to main + * thread, so that necessary cleanup will be done by + * main thread. + */ + if (msg_type == MLAG_DEREGISTER) { + thread_add_event(zrouter.master, + zebra_mlag_terminate_pthread, + NULL, 0, NULL); + } + } + + stream_free(s); + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug(":%s: Posted %d messages to MLAGD", __func__, + wr_count); + /* + * Currently there is only message write task is enqueued to this + * thread, yielding was added for future purpose, so that this thread + * can server other tasks also and in case FIFO is empty, this task will + * be schedule when main thread adds some messages + */ + if (wr_count >= ZEBRA_MLAG_POST_LIMIT) + zebra_mlag_signal_write_thread(); +} + +/* + * API to handle the process state. + * In case of Down, Zebra keep monitoring the MLAG state. + * all the state Notifications will be published to clients + */ +void zebra_mlag_handle_process_state(enum zebra_mlag_state state) +{ + if (state == MLAG_UP) { + zrouter.mlag_info.connected = true; + zebra_mlag_publish_process_state(NULL, ZEBRA_MLAG_PROCESS_UP); + zebra_mlag_send_register(); + } else if (state == MLAG_DOWN) { + zrouter.mlag_info.connected = false; + zebra_mlag_publish_process_state(NULL, ZEBRA_MLAG_PROCESS_DOWN); + hook_call(zebra_mlag_private_monitor_state); + } +} + +/***********************End of MLAG Thread processing*************************/ + +/*************************Multi-entratnt Api's********************************/ + +/* + * Provider api to signal that work/events are available + * for the Zebra MLAG Write pthread. + * This API is called from 2 pthreads.. + * 1) by main thread when client posts a MLAG Message + * 2) by MLAG Thread, in case of yield + * though this api, is called from two threads we don't need any locking + * because Thread task enqueue is thread safe means internally it had + * necessary protection + */ +static int zebra_mlag_signal_write_thread(void) +{ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug(":%s: Scheduling MLAG write", __func__); + /* + * This api will be called from Both main & MLAG Threads. + * main thread writes, "zrouter.mlag_info.th_master" only + * during Zebra Init/after MLAG thread is destroyed. + * so it is safe to use without any locking + */ + thread_add_event(zrouter.mlag_info.th_master, + zebra_mlag_client_msg_handler, NULL, 0, + &zrouter.mlag_info.t_write); + return 0; +} + +/* + * API will be used to publish the MLAG state to interested clients + * In case client is passed, state is posted only for that client, + * otherwise to all interested clients + * this api can be called from two threads. + * 1) from main thread: when client is passed + * 2) from MLAG Thread: when client is NULL + * + * In second case, to avoid global data access data will be post to Main + * thread, so that actual posting to clients will happen from Main thread. + */ +static void zebra_mlag_publish_process_state(struct zserv *client, + zebra_message_types_t msg_type) +{ + struct stream *s; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Publishing MLAG process state:%s to %s Client", + __func__, + (msg_type == ZEBRA_MLAG_PROCESS_UP) ? "UP" : "DOWN", + (client) ? "one" : "all"); + + if (client) { + s = stream_new(ZEBRA_HEADER_SIZE); + zclient_create_header(s, msg_type, VRF_DEFAULT); + zserv_send_message(client, s); + return; + } + + + /* + * additional four bytes are for mesasge type + */ + s = stream_new(ZEBRA_HEADER_SIZE + ZEBRA_MLAG_METADATA_LEN); + stream_putl(s, ZEBRA_MLAG_MSG_BCAST); + zclient_create_header(s, msg_type, VRF_DEFAULT); + thread_add_event(zrouter.master, zebra_mlag_post_data_from_main_thread, + s, 0, NULL); +} + +/**************************End of Multi-entrant Apis**************************/ + +/***********************Zebra Main thread processing**************************/ + +/* + * To avoid data corruption, messages will be post to clients only from + * main thread, because for that access was needed for clients list. + * so instead of forcing the locks, messages will be posted from main thread. + */ +static void zebra_mlag_post_data_from_main_thread(struct thread *thread) +{ + struct stream *s = THREAD_ARG(thread); + struct stream *zebra_s = NULL; + struct listnode *node; + struct zserv *client; + uint32_t msg_type = 0; + uint32_t msg_len = 0; + + if (!s) + return; + + STREAM_GETL(s, msg_type); + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: Posting MLAG data for msg_type:0x%x to interested clients", + __func__, msg_type); + + msg_len = s->endp - ZEBRA_MLAG_METADATA_LEN; + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (client->mlag_updates_interested == true) { + if (msg_type != ZEBRA_MLAG_MSG_BCAST + && !CHECK_FLAG(client->mlag_reg_mask1, + (1 << msg_type))) { + continue; + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: Posting MLAG data of length-%d to client:%d ", + __func__, msg_len, client->proto); + + zebra_s = stream_new(msg_len); + STREAM_GET(zebra_s->data, s, msg_len); + zebra_s->endp = msg_len; + stream_putw_at(zebra_s, 0, msg_len); + + /* + * This stream will be enqueued to client_obuf, it will + * be freed after posting to client socket. + */ + zserv_send_message(client, zebra_s); + zebra_s = NULL; + } + } + + stream_free(s); + return; +stream_failure: + stream_free(s); + if (zebra_s) + stream_free(zebra_s); +} + +/* + * Start the MLAG Thread, this will be used to write client data on to + * MLAG Process and to read the data from MLAG and post to clients. + * when all clients are un-registered, this Thread will be + * suspended. + */ +static void zebra_mlag_spawn_pthread(void) +{ + /* Start MLAG write pthread */ + + struct frr_pthread_attr pattr = {.start = + frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop}; + + zrouter.mlag_info.zebra_pth_mlag = + frr_pthread_new(&pattr, "Zebra MLAG thread", "Zebra MLAG"); + + zrouter.mlag_info.th_master = zrouter.mlag_info.zebra_pth_mlag->master; + + + /* Enqueue an initial event to the Newly spawn MLAG pthread */ + zebra_mlag_signal_write_thread(); + + frr_pthread_run(zrouter.mlag_info.zebra_pth_mlag, NULL); +} + +/* + * all clients are un-registered for MLAG Updates, terminate the + * MLAG write thread + */ +static void zebra_mlag_terminate_pthread(struct thread *event) +{ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Zebra MLAG write thread terminate called"); + + if (zrouter.mlag_info.clients_interested_cnt) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Zebra MLAG: still some clients are interested"); + return; + } + + frr_pthread_stop(zrouter.mlag_info.zebra_pth_mlag, NULL); + + /* Destroy pthread */ + frr_pthread_destroy(zrouter.mlag_info.zebra_pth_mlag); + zrouter.mlag_info.zebra_pth_mlag = NULL; + zrouter.mlag_info.th_master = NULL; + zrouter.mlag_info.t_read = NULL; + zrouter.mlag_info.t_write = NULL; + + /* + * Send Notification to clean private data + */ + hook_call(zebra_mlag_private_cleanup_data); +} + +/* + * API to register zebra client for MLAG Updates + */ +void zebra_mlag_client_register(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + uint32_t reg_mask = 0; + int rc = 0; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Received MLAG Registration from client-proto:%d", + client->proto); + + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETL(s, reg_mask); + + if (client->mlag_updates_interested == true) { + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Client is registered, existing mask: 0x%x, new mask: 0x%x", + client->mlag_reg_mask1, reg_mask); + if (client->mlag_reg_mask1 != reg_mask) + client->mlag_reg_mask1 = reg_mask; + /* + * Client might missed MLAG-UP Notification, post-it again + */ + zebra_mlag_publish_process_state(client, ZEBRA_MLAG_PROCESS_UP); + return; + } + + + client->mlag_updates_interested = true; + client->mlag_reg_mask1 = reg_mask; + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Registering for MLAG Updates with mask: 0x%x, ", + client->mlag_reg_mask1); + + zrouter.mlag_info.clients_interested_cnt++; + + if (zrouter.mlag_info.clients_interested_cnt == 1) { + /* + * First-client for MLAG Updates,open the communication channel + * with MLAG + */ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "First client, opening the channel with MLAG"); + + zebra_mlag_spawn_pthread(); + rc = hook_call(zebra_mlag_private_open_channel); + if (rc < 0) { + /* + * For some reason, zebra not able to open the + * comm-channel with MLAG, so post MLAG-DOWN to client. + * later when the channel is open, zebra will send + * MLAG-UP + */ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Fail to open channel with MLAG,rc:%d, post Proto-down", + rc); + zebra_mlag_publish_process_state( + client, ZEBRA_MLAG_PROCESS_DOWN); + } + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Client Registered successfully for MLAG Updates"); + + if (zrouter.mlag_info.connected == true) + zebra_mlag_publish_process_state(client, ZEBRA_MLAG_PROCESS_UP); +stream_failure: + return; +} + +/* + * API to un-register for MLAG Updates + */ +void zebra_mlag_client_unregister(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Received MLAG De-Registration from client-proto:%d", + client->proto); + + if (client->mlag_updates_interested == false) + /* Unexpected */ + return; + + client->mlag_updates_interested = false; + client->mlag_reg_mask1 = 0; + zrouter.mlag_info.clients_interested_cnt--; + + if (zrouter.mlag_info.clients_interested_cnt == 0) { + /* + * No-client is interested for MLAG Updates,close the + * communication channel with MLAG + */ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Last client for MLAG, close the channel "); + + /* + * Clean up flow: + * ============= + * 1) main thread calls socket close which posts De-register + * to MLAG write thread + * 2) after MLAG write thread posts De-register it sends a + * signal back to main thread to do the thread cleanup + * this was mainly to make sure De-register is posted to MCLAGD. + */ + hook_call(zebra_mlag_private_close_channel); + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Client De-Registered successfully for MLAG Updates"); +} + +/* + * Does following things. + * 1) allocated new local stream, and copies the client data and enqueue + * to MLAG Thread + * 2) MLAG Thread after dequeing, encode the client data using protobuf + * and write on to MLAG + */ +void zebra_mlag_forward_client_msg(ZAPI_HANDLER_ARGS) +{ + struct stream *zebra_s; + struct stream *mlag_s; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Received Client MLAG Data from client-proto:%d", + client->proto); + + /* Get input stream. */ + zebra_s = msg; + mlag_s = stream_new(zebra_s->endp); + + /* + * Client data is | Zebra Header + MLAG Data | + * we need to enqueue only the MLAG data, skipping Zebra Header + */ + stream_put(mlag_s, zebra_s->data + zebra_s->getp, + STREAM_READABLE(zebra_s)); + stream_fifo_push_safe(zrouter.mlag_info.mlag_fifo, mlag_s); + zebra_mlag_signal_write_thread(); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Enqueued Client:%d data to MLAG Thread ", + __func__, client->proto); +} + +/***********************End of Zebra Main thread processing*************/ + +enum mlag_role zebra_mlag_get_role(void) +{ + return zrouter.mlag_info.role; +} + +int32_t zebra_mlag_test_mlag_internal(const char *none, const char *primary, + const char *secondary) +{ + enum mlag_role orig = zrouter.mlag_info.role; + char buf1[MLAG_ROLE_STRSIZE], buf2[MLAG_ROLE_STRSIZE]; + + if (none) + zrouter.mlag_info.role = MLAG_ROLE_NONE; + if (primary) + zrouter.mlag_info.role = MLAG_ROLE_PRIMARY; + if (secondary) + zrouter.mlag_info.role = MLAG_ROLE_SECONDARY; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Test: Changing role from %s to %s", + mlag_role2str(orig, buf1, sizeof(buf1)), + mlag_role2str(orig, buf2, sizeof(buf2))); + + if (orig != zrouter.mlag_info.role) { + zsend_capabilities_all_clients(); + if (zrouter.mlag_info.role != MLAG_ROLE_NONE) { + if (zrouter.mlag_info.clients_interested_cnt == 0 + && !test_mlag_in_progress) { + if (zrouter.mlag_info.zebra_pth_mlag == NULL) + zebra_mlag_spawn_pthread(); + zrouter.mlag_info.clients_interested_cnt++; + test_mlag_in_progress = true; + hook_call(zebra_mlag_private_open_channel); + } + } else { + if (test_mlag_in_progress) { + test_mlag_in_progress = false; + zrouter.mlag_info.clients_interested_cnt--; + hook_call(zebra_mlag_private_close_channel); + } + } + } + + return CMD_SUCCESS; +} + +void zebra_mlag_init(void) +{ + zebra_mlag_vty_init(); + + /* + * Intialiaze the MLAG Global variables + * write thread will be created during actual registration with MCLAG + */ + zrouter.mlag_info.clients_interested_cnt = 0; + zrouter.mlag_info.connected = false; + zrouter.mlag_info.timer_running = false; + zrouter.mlag_info.mlag_fifo = stream_fifo_new(); + zrouter.mlag_info.zebra_pth_mlag = NULL; + zrouter.mlag_info.th_master = NULL; + zrouter.mlag_info.t_read = NULL; + zrouter.mlag_info.t_write = NULL; + test_mlag_in_progress = false; + zebra_mlag_reset_read_buffer(); +} + +void zebra_mlag_terminate(void) +{ +} + + +/* + * + * ProtoBuf Encoding APIs + */ + +#ifdef HAVE_PROTOBUF_VERSION_3 + +DEFINE_MTYPE_STATIC(ZEBRA, MLAG_PBUF, "ZEBRA MLAG PROTOBUF"); + +int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) +{ + ZebraMlagHeader hdr = ZEBRA_MLAG__HEADER__INIT; + struct mlag_msg mlag_msg; + uint8_t tmp_buf[ZEBRA_MLAG_BUF_LIMIT]; + int len = 0; + int n_len = 0; + int rc = 0; + char buf[ZLOG_FILTER_LENGTH_MAX]; + size_t length; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Entering..", __func__); + + rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length); + if (rc) + return rc; + + memset(tmp_buf, 0, ZEBRA_MLAG_BUF_LIMIT); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Mlag ProtoBuf encoding of message:%s, len:%d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + mlag_msg.data_len); + *msg_type = mlag_msg.msg_type; + switch (mlag_msg.msg_type) { + case MLAG_MROUTE_ADD: { + struct mlag_mroute_add msg; + ZebraMlagMrouteAdd pay_load = ZEBRA_MLAG_MROUTE_ADD__INIT; + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_add(s, &msg, &length); + if (rc) + return rc; + + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load.vrf_name = XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.vrf_name, msg.vrf_name, vrf_name_len); + pay_load.source_ip = msg.source_ip; + pay_load.group_ip = msg.group_ip; + pay_load.cost_to_rp = msg.cost_to_rp; + pay_load.owner_id = msg.owner_id; + pay_load.am_i_dr = msg.am_i_dr; + pay_load.am_i_dual_active = msg.am_i_dual_active; + pay_load.vrf_id = msg.vrf_id; + + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load.intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.intf_name, msg.intf_name, + vrf_name_len); + } + + len = zebra_mlag_mroute_add__pack(&pay_load, tmp_buf); + XFREE(MTYPE_MLAG_PBUF, pay_load.vrf_name); + if (msg.owner_id == MLAG_OWNER_INTERFACE) + XFREE(MTYPE_MLAG_PBUF, pay_load.intf_name); + } break; + case MLAG_MROUTE_DEL: { + struct mlag_mroute_del msg; + ZebraMlagMrouteDel pay_load = ZEBRA_MLAG_MROUTE_DEL__INIT; + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_del(s, &msg, &length); + if (rc) + return rc; + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load.vrf_name = XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.vrf_name, msg.vrf_name, vrf_name_len); + pay_load.source_ip = msg.source_ip; + pay_load.group_ip = msg.group_ip; + pay_load.owner_id = msg.owner_id; + pay_load.vrf_id = msg.vrf_id; + + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load.intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.intf_name, msg.intf_name, + vrf_name_len); + } + + len = zebra_mlag_mroute_del__pack(&pay_load, tmp_buf); + XFREE(MTYPE_MLAG_PBUF, pay_load.vrf_name); + if (msg.owner_id == MLAG_OWNER_INTERFACE) + XFREE(MTYPE_MLAG_PBUF, pay_load.intf_name); + } break; + case MLAG_MROUTE_ADD_BULK: { + struct mlag_mroute_add msg; + ZebraMlagMrouteAddBulk Bulk_msg = + ZEBRA_MLAG_MROUTE_ADD_BULK__INIT; + ZebraMlagMrouteAdd **pay_load = NULL; + bool cleanup = false; + uint32_t i, actual; + + Bulk_msg.n_mroute_add = mlag_msg.msg_cnt; + pay_load = XMALLOC(MTYPE_MLAG_PBUF, sizeof(ZebraMlagMrouteAdd *) + * mlag_msg.msg_cnt); + + for (i = 0, actual = 0; i < mlag_msg.msg_cnt; i++, actual++) { + + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_add(s, &msg, &length); + if (rc) { + cleanup = true; + break; + } + pay_load[i] = XMALLOC(MTYPE_MLAG_PBUF, + sizeof(ZebraMlagMrouteAdd)); + zebra_mlag_mroute_add__init(pay_load[i]); + + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load[i]->vrf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load[i]->vrf_name, msg.vrf_name, + vrf_name_len); + pay_load[i]->source_ip = msg.source_ip; + pay_load[i]->group_ip = msg.group_ip; + pay_load[i]->cost_to_rp = msg.cost_to_rp; + pay_load[i]->owner_id = msg.owner_id; + pay_load[i]->am_i_dr = msg.am_i_dr; + pay_load[i]->am_i_dual_active = msg.am_i_dual_active; + pay_load[i]->vrf_id = msg.vrf_id; + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load[i]->intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + + strlcpy(pay_load[i]->intf_name, msg.intf_name, + vrf_name_len); + } + } + if (!cleanup) { + Bulk_msg.mroute_add = pay_load; + len = zebra_mlag_mroute_add_bulk__pack(&Bulk_msg, + tmp_buf); + } + + for (i = 0; i < actual; i++) { + /* + * The mlag_lib_decode_mroute_add can + * fail to properly decode and cause nothing + * to be allocated. Prevent a crash + */ + if (!pay_load[i]) + continue; + + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); + if (pay_load[i]->owner_id == MLAG_OWNER_INTERFACE + && pay_load[i]->intf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->intf_name); + XFREE(MTYPE_MLAG_PBUF, pay_load[i]); + } + XFREE(MTYPE_MLAG_PBUF, pay_load); + if (cleanup) + return -1; + } break; + case MLAG_MROUTE_DEL_BULK: { + struct mlag_mroute_del msg; + ZebraMlagMrouteDelBulk Bulk_msg = + ZEBRA_MLAG_MROUTE_DEL_BULK__INIT; + ZebraMlagMrouteDel **pay_load = NULL; + bool cleanup = false; + uint32_t i, actual; + + Bulk_msg.n_mroute_del = mlag_msg.msg_cnt; + pay_load = XMALLOC(MTYPE_MLAG_PBUF, sizeof(ZebraMlagMrouteDel *) + * mlag_msg.msg_cnt); + + for (i = 0, actual = 0; i < mlag_msg.msg_cnt; i++, actual++) { + + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_del(s, &msg, &length); + if (rc) { + cleanup = true; + break; + } + + pay_load[i] = XMALLOC(MTYPE_MLAG_PBUF, + sizeof(ZebraMlagMrouteDel)); + zebra_mlag_mroute_del__init(pay_load[i]); + + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load[i]->vrf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + + strlcpy(pay_load[i]->vrf_name, msg.vrf_name, + vrf_name_len); + pay_load[i]->source_ip = msg.source_ip; + pay_load[i]->group_ip = msg.group_ip; + pay_load[i]->owner_id = msg.owner_id; + pay_load[i]->vrf_id = msg.vrf_id; + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load[i]->intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + + strlcpy(pay_load[i]->intf_name, msg.intf_name, + vrf_name_len); + } + } + if (!cleanup) { + Bulk_msg.mroute_del = pay_load; + len = zebra_mlag_mroute_del_bulk__pack(&Bulk_msg, + tmp_buf); + } + + for (i = 0; i < actual; i++) { + /* + * The mlag_lib_decode_mroute_add can + * fail to properly decode and cause nothing + * to be allocated. Prevent a crash + */ + if (!pay_load[i]) + continue; + + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); + if (pay_load[i]->owner_id == MLAG_OWNER_INTERFACE + && pay_load[i]->intf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->intf_name); + XFREE(MTYPE_MLAG_PBUF, pay_load[i]); + } + XFREE(MTYPE_MLAG_PBUF, pay_load); + if (cleanup) + return -1; + } break; + default: + break; + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: length of Mlag ProtoBuf encoded message:%s, %d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + len); + hdr.type = (ZebraMlagHeader__MessageType)mlag_msg.msg_type; + if (len != 0) { + hdr.data.len = len; + hdr.data.data = XMALLOC(MTYPE_MLAG_PBUF, len); + memcpy(hdr.data.data, tmp_buf, len); + } + + /* + * ProtoBuf Infra will not support to demarc the pointers whem multiple + * messages are posted inside a single Buffer. + * 2 -solutions exist to solve this + * 1. add Unenoced length at the beginning of every message, this will + * be used to point to next message in the buffer + * 2. another solution is defining all messages insides another message + * But this will permit only 32 messages. this can be extended with + * multiple levels. + * for simplicity we are going with solution-1. + */ + len = zebra_mlag__header__pack(&hdr, + (mlag_wr_buffer + ZEBRA_MLAG_LEN_SIZE)); + n_len = htonl(len); + memcpy(mlag_wr_buffer, &n_len, ZEBRA_MLAG_LEN_SIZE); + len += ZEBRA_MLAG_LEN_SIZE; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: length of Mlag ProtoBuf message:%s with Header %d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + len); + XFREE(MTYPE_MLAG_PBUF, hdr.data.data); + + return len; +} + +static void zebra_fill_protobuf_msg(struct stream *s, char *name, int len) +{ + int str_len = strlen(name) + 1; + + stream_put(s, name, str_len); + /* Fill the rest with Null Character for aligning */ + stream_put(s, NULL, len - str_len); +} + +int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, + uint32_t len) +{ + uint32_t msg_type; + ZebraMlagHeader *hdr; + char buf[80]; + + hdr = zebra_mlag__header__unpack(NULL, len, data); + if (hdr == NULL) + return -1; + + /* + * ADD The MLAG Header + */ + zclient_create_header(s, ZEBRA_MLAG_FORWARD_MSG, VRF_DEFAULT); + + msg_type = hdr->type; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Mlag ProtoBuf decoding of message:%s", __func__, + mlag_lib_msgid_to_str(msg_type, buf, 80)); + + /* + * Internal MLAG Message-types & MLAG.proto message types should + * always match, otherwise there can be decoding errors + * To avoid exposing clients with Protobuf flags, using internal + * message-types + */ + stream_putl(s, hdr->type); + + if (hdr->data.len == 0) { + /* NULL Payload */ + stream_putw(s, MLAG_MSG_NULL_PAYLOAD); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + } else { + switch (msg_type) { + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_STATUS_UPDATE: { + ZebraMlagStatusUpdate *msg = NULL; + + msg = zebra_mlag_status_update__unpack( + NULL, hdr->data.len, hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_status)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + zebra_fill_protobuf_msg(s, msg->peerlink, + INTERFACE_NAMSIZ); + stream_putl(s, msg->my_role); + stream_putl(s, msg->peer_state); + zebra_mlag_status_update__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_VXLAN_UPDATE: { + ZebraMlagVxlanUpdate *msg = NULL; + + msg = zebra_mlag_vxlan_update__unpack( + NULL, hdr->data.len, hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_vxlan)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_putl(s, msg->anycast_ip); + stream_putl(s, msg->local_ip); + zebra_mlag_vxlan_update__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_ADD: { + ZebraMlagMrouteAdd *msg = NULL; + + msg = zebra_mlag_mroute_add__unpack(NULL, hdr->data.len, + hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_mroute_add)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + zebra_fill_protobuf_msg(s, msg->vrf_name, VRF_NAMSIZ); + + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->cost_to_rp); + stream_putl(s, msg->owner_id); + stream_putc(s, msg->am_i_dr); + stream_putc(s, msg->am_i_dual_active); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + zebra_fill_protobuf_msg(s, msg->intf_name, + INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + zebra_mlag_mroute_add__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_DEL: { + ZebraMlagMrouteDel *msg = NULL; + + msg = zebra_mlag_mroute_del__unpack(NULL, hdr->data.len, + hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_mroute_del)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + zebra_fill_protobuf_msg(s, msg->vrf_name, VRF_NAMSIZ); + + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->owner_id); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + zebra_fill_protobuf_msg(s, msg->intf_name, + INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + zebra_mlag_mroute_del__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_ADD_BULK: { + ZebraMlagMrouteAddBulk *Bulk_msg = NULL; + ZebraMlagMrouteAdd *msg = NULL; + size_t i, length_spot; + + Bulk_msg = zebra_mlag_mroute_add_bulk__unpack( + NULL, hdr->data.len, hdr->data.data); + if (Bulk_msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, (Bulk_msg->n_mroute_add + * sizeof(struct mlag_mroute_add))); + /* No. of msgs in Batch */ + length_spot = stream_putw(s, Bulk_msg->n_mroute_add); + + /* Actual Data */ + for (i = 0; i < Bulk_msg->n_mroute_add; i++) { + if (STREAM_SIZE(s) + < VRF_NAMSIZ + 22 + INTERFACE_NAMSIZ) { + zlog_warn( + "We have received more messages than we can parse at this point in time: %zu", + Bulk_msg->n_mroute_add); + break; + } + + msg = Bulk_msg->mroute_add[i]; + + zebra_fill_protobuf_msg(s, msg->vrf_name, + VRF_NAMSIZ); + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->cost_to_rp); + stream_putl(s, msg->owner_id); + stream_putc(s, msg->am_i_dr); + stream_putc(s, msg->am_i_dual_active); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + zebra_fill_protobuf_msg( + s, msg->intf_name, + INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + } + + stream_putw_at(s, length_spot, i + 1); + + zebra_mlag_mroute_add_bulk__free_unpacked(Bulk_msg, + NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_DEL_BULK: { + ZebraMlagMrouteDelBulk *Bulk_msg = NULL; + ZebraMlagMrouteDel *msg = NULL; + size_t i, length_spot; + + Bulk_msg = zebra_mlag_mroute_del_bulk__unpack( + NULL, hdr->data.len, hdr->data.data); + if (Bulk_msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, (Bulk_msg->n_mroute_del + * sizeof(struct mlag_mroute_del))); + /* No. of msgs in Batch */ + length_spot = stream_putw(s, Bulk_msg->n_mroute_del); + + /* Actual Data */ + for (i = 0; i < Bulk_msg->n_mroute_del; i++) { + if (STREAM_SIZE(s) + < VRF_NAMSIZ + 16 + INTERFACE_NAMSIZ) { + zlog_warn( + "We have received more messages than we can parse at this time"); + break; + } + + msg = Bulk_msg->mroute_del[i]; + + zebra_fill_protobuf_msg(s, msg->vrf_name, + VRF_NAMSIZ); + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->owner_id); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + zebra_fill_protobuf_msg( + s, msg->intf_name, + INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + } + + stream_putw_at(s, length_spot, i + 1); + + zebra_mlag_mroute_del_bulk__free_unpacked(Bulk_msg, + NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_ZEBRA_STATUS_UPDATE: { + ZebraMlagZebraStatusUpdate *msg = NULL; + + msg = zebra_mlag_zebra_status_update__unpack( + NULL, hdr->data.len, hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_frr_status)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_putl(s, msg->peer_frrstate); + zebra_mlag_zebra_status_update__free_unpacked(msg, + NULL); + } break; + default: + break; + } + } + zebra_mlag__header__free_unpacked(hdr, NULL); + return msg_type; +} + +#else +int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) +{ + return 0; +} + +int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, + uint32_t len) +{ + return 0; +} +#endif diff --git a/zebra/zebra_mlag.h b/zebra/zebra_mlag.h new file mode 100644 index 0000000..eb96a57 --- /dev/null +++ b/zebra/zebra_mlag.h @@ -0,0 +1,78 @@ +/* Zebra mlag header. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#ifndef __ZEBRA_MLAG_H__ +#define __ZEBRA_MLAG_H__ + +#include "mlag.h" +#include "zclient.h" +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZEBRA_MLAG_BUF_LIMIT 32768 +#define ZEBRA_MLAG_LEN_SIZE 4 + +DECLARE_HOOK(zebra_mlag_private_write_data, + (uint8_t *data, uint32_t len), (data, len)); +DECLARE_HOOK(zebra_mlag_private_monitor_state, (), ()); +DECLARE_HOOK(zebra_mlag_private_open_channel, (), ()); +DECLARE_HOOK(zebra_mlag_private_close_channel, (), ()); +DECLARE_HOOK(zebra_mlag_private_cleanup_data, (), ()); + +extern uint8_t mlag_wr_buffer[ZEBRA_MLAG_BUF_LIMIT]; +extern uint8_t mlag_rd_buffer[ZEBRA_MLAG_BUF_LIMIT]; + +static inline void zebra_mlag_reset_read_buffer(void) +{ + memset(mlag_wr_buffer, 0, ZEBRA_MLAG_BUF_LIMIT); +} + +enum zebra_mlag_state { + MLAG_UP = 1, + MLAG_DOWN = 2, +}; + +void zebra_mlag_init(void); +void zebra_mlag_terminate(void); +enum mlag_role zebra_mlag_get_role(void); +void zebra_mlag_client_register(ZAPI_HANDLER_ARGS); +void zebra_mlag_client_unregister(ZAPI_HANDLER_ARGS); +void zebra_mlag_forward_client_msg(ZAPI_HANDLER_ARGS); +void zebra_mlag_send_register(void); +void zebra_mlag_send_deregister(void); +void zebra_mlag_handle_process_state(enum zebra_mlag_state state); +void zebra_mlag_process_mlag_data(uint8_t *data, uint32_t len); + +/* + * ProtoBuffer Api's + */ +int zebra_mlag_protobuf_encode_client_data(struct stream *s, + uint32_t *msg_type); +int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, + uint32_t len); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_mlag_private.c b/zebra/zebra_mlag_private.c new file mode 100644 index 0000000..50a290b --- /dev/null +++ b/zebra/zebra_mlag_private.c @@ -0,0 +1,298 @@ +/* + * This is an implementation of MLAG Functionality + * + * Module name: Zebra MLAG + * + * Author: sathesh Kumar karra <sathk@cumulusnetworks.com> + * + * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "zebra.h" + +#include "hook.h" +#include "module.h" +#include "thread.h" +#include "frr_pthread.h" +#include "libfrr.h" +#include "lib/version.h" +#include "network.h" + +#include "lib/stream.h" + +#include "zebra/debug.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_mlag.h" + +#include <sys/un.h> + + +/* + * This file will have platform specific apis to communicate with MCLAG. + * + */ + +static struct thread_master *zmlag_master; +static int mlag_socket; + +static void zebra_mlag_connect(struct thread *thread); +static void zebra_mlag_read(struct thread *thread); + +/* + * Write the data to MLAGD + */ +static int zebra_mlag_private_write_data(uint8_t *data, uint32_t len) +{ + int rc = 0; + + if (IS_ZEBRA_DEBUG_MLAG) { + zlog_debug("%s: Writing %d length Data to clag", __func__, len); + zlog_hexdump(data, len); + } + rc = write(mlag_socket, data, len); + return rc; +} + +static void zebra_mlag_sched_read(void) +{ + thread_add_read(zmlag_master, zebra_mlag_read, NULL, mlag_socket, + &zrouter.mlag_info.t_read); +} + +static void zebra_mlag_read(struct thread *thread) +{ + static uint32_t mlag_rd_buf_offset; + uint32_t *msglen; + uint32_t h_msglen; + uint32_t tot_len, curr_len = mlag_rd_buf_offset; + + /* + * Received message in sock_stream looks like below + * | len-1 (4 Bytes) | payload-1 (len-1) | + * len-2 (4 Bytes) | payload-2 (len-2) | .. + * + * Idea is read one message completely, then process, until message is + * read completely, keep on reading from the socket + */ + if (curr_len < ZEBRA_MLAG_LEN_SIZE) { + ssize_t data_len; + + data_len = read(mlag_socket, mlag_rd_buffer + curr_len, + ZEBRA_MLAG_LEN_SIZE - curr_len); + if (data_len == 0 || data_len == -1) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("MLAG connection closed socket : %d", + mlag_socket); + close(mlag_socket); + zebra_mlag_handle_process_state(MLAG_DOWN); + return; + } + mlag_rd_buf_offset += data_len; + if (data_len != (ssize_t)(ZEBRA_MLAG_LEN_SIZE - curr_len)) { + /* Try again later */ + zebra_mlag_sched_read(); + return; + } + curr_len = ZEBRA_MLAG_LEN_SIZE; + } + + /* Get the actual packet length */ + msglen = (uint32_t *)mlag_rd_buffer; + h_msglen = ntohl(*msglen); + + /* This will be the actual length of the packet */ + tot_len = h_msglen + ZEBRA_MLAG_LEN_SIZE; + + /* + * If the buffer read we are about to do is too large + * we are really really really not double plus good + * + * I'm not sure what to do here other than to bail + * We'll need to revisit this in the future. + */ + assert(tot_len < ZEBRA_MLAG_BUF_LIMIT); + + if (curr_len < tot_len) { + ssize_t data_len; + + data_len = read(mlag_socket, mlag_rd_buffer + curr_len, + tot_len - curr_len); + if (data_len == 0 || data_len == -1) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("MLAG connection closed socket : %d", + mlag_socket); + close(mlag_socket); + zebra_mlag_handle_process_state(MLAG_DOWN); + return; + } + mlag_rd_buf_offset += data_len; + if (data_len != (ssize_t)(tot_len - curr_len)) { + /* Try again later */ + zebra_mlag_sched_read(); + return; + } + } + + if (IS_ZEBRA_DEBUG_MLAG) { + zlog_debug("Received a MLAG Message from socket: %d, len:%u ", + mlag_socket, tot_len); + zlog_hexdump(mlag_rd_buffer, tot_len); + } + + tot_len -= ZEBRA_MLAG_LEN_SIZE; + + /* Process the packet */ + zebra_mlag_process_mlag_data(mlag_rd_buffer + ZEBRA_MLAG_LEN_SIZE, + tot_len); + + /* Register read thread. */ + zebra_mlag_reset_read_buffer(); + mlag_rd_buf_offset = 0; + zebra_mlag_sched_read(); +} + +static void zebra_mlag_connect(struct thread *thread) +{ + struct sockaddr_un svr = {0}; + + /* Reset the Timer-running flag */ + zrouter.mlag_info.timer_running = false; + + svr.sun_family = AF_UNIX; +#define MLAG_SOCK_NAME "/var/run/clag-zebra.socket" + strlcpy(svr.sun_path, MLAG_SOCK_NAME, sizeof(MLAG_SOCK_NAME) + 1); + + mlag_socket = socket(svr.sun_family, SOCK_STREAM, 0); + if (mlag_socket < 0) + return; + + if (connect(mlag_socket, (struct sockaddr *)&svr, sizeof(svr)) == -1) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Unable to connect to %s try again in 10 secs", + svr.sun_path); + close(mlag_socket); + zrouter.mlag_info.timer_running = true; + thread_add_timer(zmlag_master, zebra_mlag_connect, NULL, 10, + &zrouter.mlag_info.t_read); + return; + } + + set_nonblocking(mlag_socket); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Connection with MLAG is established ", + __func__); + + thread_add_read(zmlag_master, zebra_mlag_read, NULL, mlag_socket, + &zrouter.mlag_info.t_read); + /* + * Connection is established with MLAGD, post to clients + */ + zebra_mlag_handle_process_state(MLAG_UP); +} + +/* + * Currently we are doing polling later we will look for better options + */ +static int zebra_mlag_private_monitor_state(void) +{ + thread_add_event(zmlag_master, zebra_mlag_connect, NULL, 0, + &zrouter.mlag_info.t_read); + return 0; +} + +static int zebra_mlag_private_open_channel(void) +{ + zmlag_master = zrouter.mlag_info.th_master; + + if (zrouter.mlag_info.connected == true) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Zebra already connected to MLAGD", + __func__); + return 0; + } + + if (zrouter.mlag_info.timer_running == true) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: Connection retry is in progress for MLAGD", + __func__); + return 0; + } + + if (zrouter.mlag_info.clients_interested_cnt) { + /* + * Connect only if any clients are showing interest + */ + thread_add_event(zmlag_master, zebra_mlag_connect, NULL, 0, + &zrouter.mlag_info.t_read); + } + return 0; +} + +static int zebra_mlag_private_close_channel(void) +{ + if (zmlag_master == NULL) + return -1; + + if (zrouter.mlag_info.clients_interested_cnt) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: still %d clients are connected, skip", + __func__, + zrouter.mlag_info.clients_interested_cnt); + return -1; + } + + /* + * Post the De-register to MLAG, so that it can do necesasry cleanup + */ + zebra_mlag_send_deregister(); + + return 0; +} + +static int zebra_mlag_private_cleanup_data(void) +{ + zmlag_master = NULL; + zrouter.mlag_info.connected = false; + zrouter.mlag_info.timer_running = false; + + close(mlag_socket); + return 0; +} + +static int zebra_mlag_module_init(void) +{ + hook_register(zebra_mlag_private_write_data, + zebra_mlag_private_write_data); + hook_register(zebra_mlag_private_monitor_state, + zebra_mlag_private_monitor_state); + hook_register(zebra_mlag_private_open_channel, + zebra_mlag_private_open_channel); + hook_register(zebra_mlag_private_close_channel, + zebra_mlag_private_close_channel); + hook_register(zebra_mlag_private_cleanup_data, + zebra_mlag_private_cleanup_data); + return 0; +} + +FRR_MODULE_SETUP( + .name = "zebra_cumulus_mlag", + .version = FRR_VERSION, + .description = "zebra Cumulus MLAG interface", + .init = zebra_mlag_module_init, +); diff --git a/zebra/zebra_mlag_vty.c b/zebra/zebra_mlag_vty.c new file mode 100644 index 0000000..ebaaf03 --- /dev/null +++ b/zebra/zebra_mlag_vty.c @@ -0,0 +1,67 @@ +/* Zebra Mlag vty Code. + * Copyright (C) 2019 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include <zebra.h> + +#include "vty.h" +#include "command.h" + +#include "zebra_router.h" +#include "zebra_mlag_vty.h" +#include "debug.h" +#include "zapi_msg.h" + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_mlag_vty_clippy.c" +#endif + +DEFUN_HIDDEN (show_mlag, + show_mlag_cmd, + "show zebra mlag", + SHOW_STR + ZEBRA_STR + "The mlag role on this machine\n") +{ + char buf[MLAG_ROLE_STRSIZE]; + + vty_out(vty, "MLag is configured to: %s\n", + mlag_role2str(zrouter.mlag_info.role, buf, sizeof(buf))); + + return CMD_SUCCESS; +} + +DEFPY_HIDDEN(test_mlag, test_mlag_cmd, + "test zebra mlag <none$none|primary$primary|secondary$secondary>", + "Test code\n" + ZEBRA_STR + "Modify the Mlag state\n" + "Mlag is not setup on the machine\n" + "Mlag is setup to be primary\n" + "Mlag is setup to be the secondary\n") +{ + return zebra_mlag_test_mlag_internal(none, primary, secondary); +} + +void zebra_mlag_vty_init(void) +{ + install_element(VIEW_NODE, &show_mlag_cmd); + install_element(ENABLE_NODE, &test_mlag_cmd); +} diff --git a/zebra/zebra_mlag_vty.h b/zebra/zebra_mlag_vty.h new file mode 100644 index 0000000..789154d --- /dev/null +++ b/zebra/zebra_mlag_vty.h @@ -0,0 +1,39 @@ +/* Zebra Mlag vty Code. + * Copyright (C) 2019 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#ifndef __ZEBRA_MLAG_VTY_CODE__ +#define __ZEBRA_MLAG_VTY_CODE__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int32_t zebra_mlag_test_mlag_internal(const char *none, + const char *primary, + const char *secondary); + +extern void zebra_mlag_vty_init(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_mpls.c b/zebra/zebra_mpls.c new file mode 100644 index 0000000..9756d9b --- /dev/null +++ b/zebra/zebra_mpls.c @@ -0,0 +1,4079 @@ +/* Zebra MPLS code + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "prefix.h" +#include "table.h" +#include "memory.h" +#include "command.h" +#include "if.h" +#include "log.h" +#include "sockunion.h" +#include "linklist.h" +#include "thread.h" +#include "workqueue.h" +#include "prefix.h" +#include "routemap.h" +#include "stream.h" +#include "nexthop.h" +#include "termtable.h" +#include "lib/json.h" + +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/zserv.h" +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_srte.h" +#include "zebra/zebra_errors.h" + +DEFINE_MTYPE_STATIC(ZEBRA, LSP, "MPLS LSP object"); +DEFINE_MTYPE_STATIC(ZEBRA, FEC, "MPLS FEC object"); +DEFINE_MTYPE_STATIC(ZEBRA, NHLFE, "MPLS nexthop object"); + +bool mpls_enabled; +bool mpls_pw_reach_strict; /* Strict reachability checking */ + +/* static function declarations */ + +static void fec_evaluate(struct zebra_vrf *zvrf); +static uint32_t fec_derive_label_from_index(struct zebra_vrf *vrf, + struct zebra_fec *fec); +static int lsp_install(struct zebra_vrf *zvrf, mpls_label_t label, + struct route_node *rn, struct route_entry *re); +static int lsp_uninstall(struct zebra_vrf *zvrf, mpls_label_t label); +static int fec_change_update_lsp(struct zebra_vrf *zvrf, struct zebra_fec *fec, + mpls_label_t old_label); +static int fec_send(struct zebra_fec *fec, struct zserv *client); +static void fec_update_clients(struct zebra_fec *fec); +static void fec_print(struct zebra_fec *fec, struct vty *vty); +static struct zebra_fec *fec_find(struct route_table *table, struct prefix *p); +static struct zebra_fec *fec_add(struct route_table *table, struct prefix *p, + mpls_label_t label, uint32_t flags, + uint32_t label_index); +static int fec_del(struct zebra_fec *fec); + +static unsigned int label_hash(const void *p); +static bool label_cmp(const void *p1, const void *p2); +static int nhlfe_nexthop_active_ipv4(struct zebra_nhlfe *nhlfe, + struct nexthop *nexthop); +static int nhlfe_nexthop_active_ipv6(struct zebra_nhlfe *nhlfe, + struct nexthop *nexthop); +static int nhlfe_nexthop_active(struct zebra_nhlfe *nhlfe); + +static void lsp_select_best_nhlfe(struct zebra_lsp *lsp); +static void lsp_uninstall_from_kernel(struct hash_bucket *bucket, void *ctxt); +static void lsp_schedule(struct hash_bucket *bucket, void *ctxt); +static wq_item_status lsp_process(struct work_queue *wq, void *data); +static void lsp_processq_del(struct work_queue *wq, void *data); +static void lsp_processq_complete(struct work_queue *wq); +static int lsp_processq_add(struct zebra_lsp *lsp); +static void *lsp_alloc(void *p); + +/* Check whether lsp can be freed - no nhlfes, e.g., and call free api */ +static void lsp_check_free(struct hash *lsp_table, struct zebra_lsp **plsp); + +/* Free lsp; sets caller's pointer to NULL */ +static void lsp_free(struct hash *lsp_table, struct zebra_lsp **plsp); + +static char *nhlfe2str(const struct zebra_nhlfe *nhlfe, char *buf, int size); +static char *nhlfe_config_str(const struct zebra_nhlfe *nhlfe, char *buf, + int size); +static int nhlfe_nhop_match(struct zebra_nhlfe *nhlfe, + enum nexthop_types_t gtype, + const union g_addr *gate, ifindex_t ifindex); +static struct zebra_nhlfe *nhlfe_find(struct nhlfe_list_head *list, + enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, + const union g_addr *gate, + ifindex_t ifindex); +static struct zebra_nhlfe * +nhlfe_add(struct zebra_lsp *lsp, enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, const mpls_label_t *labels, + bool is_backup); +static int nhlfe_del(struct zebra_nhlfe *nhlfe); +static void nhlfe_free(struct zebra_nhlfe *nhlfe); +static void nhlfe_out_label_update(struct zebra_nhlfe *nhlfe, + struct mpls_label_stack *nh_label); +static int mpls_lsp_uninstall_all(struct hash *lsp_table, struct zebra_lsp *lsp, + enum lsp_types_t type); +static int mpls_static_lsp_uninstall_all(struct zebra_vrf *zvrf, + mpls_label_t in_label); +static void nhlfe_print(struct zebra_nhlfe *nhlfe, struct vty *vty, + const char *indent); +static void lsp_print(struct vty *vty, struct zebra_lsp *lsp); +static void mpls_lsp_uninstall_all_type(struct hash_bucket *bucket, void *ctxt); +static void mpls_ftn_uninstall_all(struct zebra_vrf *zvrf, + int afi, enum lsp_types_t lsp_type); +static int lsp_znh_install(struct zebra_lsp *lsp, enum lsp_types_t type, + const struct zapi_nexthop *znh); +static int lsp_backup_znh_install(struct zebra_lsp *lsp, enum lsp_types_t type, + const struct zapi_nexthop *znh); + +/* Static functions */ + +/* + * Handle failure in LSP install, clear flags for NHLFE. + */ +static void clear_nhlfe_installed(struct zebra_lsp *lsp) +{ + struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop; + + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } + + frr_each_safe(nhlfe_list, &lsp->backup_nhlfe_list, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } +} + +/* + * Install label forwarding entry based on labeled-route entry. + */ +static int lsp_install(struct zebra_vrf *zvrf, mpls_label_t label, + struct route_node *rn, struct route_entry *re) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop; + enum lsp_types_t lsp_type; + char buf[BUFSIZ]; + int added, changed; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return -1; + + lsp_type = lsp_type_from_re_type(re->type); + added = changed = 0; + + /* Locate or allocate LSP entry. */ + tmp_ile.in_label = label; + lsp = hash_get(lsp_table, &tmp_ile, lsp_alloc); + + /* For each active nexthop, create NHLFE. Note that we deliberately skip + * recursive nexthops right now, because intermediate hops won't + * understand + * the label advertised by the recursive nexthop (plus we don't have the + * logic yet to push multiple labels). + */ + for (nexthop = re->nhe->nhg.nexthop; + nexthop; nexthop = nexthop->next) { + /* Skip inactive and recursive entries. */ + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + nhlfe = nhlfe_find(&lsp->nhlfe_list, lsp_type, + nexthop->type, &nexthop->gate, + nexthop->ifindex); + if (nhlfe) { + /* Clear deleted flag (in case it was set) */ + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED); + if (nexthop_labels_match(nhlfe->nexthop, nexthop)) + /* No change */ + continue; + + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, BUFSIZ); + zlog_debug( + "LSP in-label %u type %d nexthop %s out-label changed", + lsp->ile.in_label, lsp_type, buf); + } + + /* Update out label, trigger processing. */ + nhlfe_out_label_update(nhlfe, nexthop->nh_label); + SET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + changed++; + } else { + /* Add LSP entry to this nexthop */ + nhlfe = nhlfe_add(lsp, lsp_type, nexthop->type, + &nexthop->gate, nexthop->ifindex, + nexthop->nh_label->num_labels, + nexthop->nh_label->label, + false /*backup*/); + if (!nhlfe) + return -1; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, BUFSIZ); + zlog_debug( + "Add LSP in-label %u type %d nexthop %s out-label %u", + lsp->ile.in_label, lsp_type, buf, + nexthop->nh_label->label[0]); + } + + lsp->addr_family = NHLFE_FAMILY(nhlfe); + + /* Mark NHLFE as changed. */ + SET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + added++; + } + } + + /* Queue LSP for processing if necessary. If no NHLFE got added (special + * case), delete the LSP entry; this case results in somewhat ugly + * logging. + */ + if (added || changed) { + if (lsp_processq_add(lsp)) + return -1; + } else { + lsp_check_free(lsp_table, &lsp); + } + + return 0; +} + +/* + * Uninstall all non-static NHLFEs of a label forwarding entry. If all + * NHLFEs are removed, the entire entry is deleted. + */ +static int lsp_uninstall(struct zebra_vrf *zvrf, mpls_label_t label) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + char buf[BUFSIZ]; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return -1; + + /* If entry is not present, exit. */ + tmp_ile.in_label = label; + lsp = hash_lookup(lsp_table, &tmp_ile); + if (!lsp || (nhlfe_list_first(&lsp->nhlfe_list) == NULL)) + return 0; + + /* Mark NHLFEs for delete or directly delete, as appropriate. */ + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + + /* Skip static NHLFEs */ + if (nhlfe->type == ZEBRA_LSP_STATIC) + continue; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, BUFSIZ); + zlog_debug( + "Del LSP in-label %u type %d nexthop %s flags 0x%x", + label, nhlfe->type, buf, nhlfe->flags); + } + + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)) { + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + SET_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED); + } else { + nhlfe_del(nhlfe); + } + } + + /* Queue LSP for processing, if needed, else delete. */ + if (CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED)) { + if (lsp_processq_add(lsp)) + return -1; + } else { + lsp_check_free(lsp_table, &lsp); + } + + return 0; +} + +/* + * This function is invoked upon change to label block configuration; it + * will walk all registered FECs with label-index and appropriately update + * their local labels and trigger client updates. + */ +static void fec_evaluate(struct zebra_vrf *zvrf) +{ + struct route_node *rn; + struct zebra_fec *fec; + uint32_t old_label, new_label; + int af; + + for (af = AFI_IP; af < AFI_MAX; af++) { + if (zvrf->fec_table[af] == NULL) + continue; + + for (rn = route_top(zvrf->fec_table[af]); rn; + rn = route_next(rn)) { + if ((fec = rn->info) == NULL) + continue; + + /* Skip configured FECs and those without a label index. + */ + if (fec->flags & FEC_FLAG_CONFIGURED + || fec->label_index == MPLS_INVALID_LABEL_INDEX) + continue; + + /* Save old label, determine new label. */ + old_label = fec->label; + new_label = + zvrf->mpls_srgb.start_label + fec->label_index; + if (new_label >= zvrf->mpls_srgb.end_label) + new_label = MPLS_INVALID_LABEL; + + /* If label has changed, update FEC and clients. */ + if (new_label == old_label) + continue; + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "Update fec %pRN new label %u upon label block", + rn, new_label); + + fec->label = new_label; + fec_update_clients(fec); + + /* Update label forwarding entries appropriately */ + fec_change_update_lsp(zvrf, fec, old_label); + } + } +} + +/* + * Derive (if possible) and update the local label for the FEC based on + * its label index. The index is "acceptable" if it falls within the + * globally configured label block (SRGB). + */ +static uint32_t fec_derive_label_from_index(struct zebra_vrf *zvrf, + struct zebra_fec *fec) +{ + uint32_t label; + + if (fec->label_index != MPLS_INVALID_LABEL_INDEX + && zvrf->mpls_srgb.start_label + && ((label = zvrf->mpls_srgb.start_label + fec->label_index) + < zvrf->mpls_srgb.end_label)) + fec->label = label; + else + fec->label = MPLS_INVALID_LABEL; + + return fec->label; +} + +/* + * There is a change for this FEC. Install or uninstall label forwarding + * entries, as appropriate. + */ +static int fec_change_update_lsp(struct zebra_vrf *zvrf, struct zebra_fec *fec, + mpls_label_t old_label) +{ + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + afi_t afi; + + /* Uninstall label forwarding entry, if previously installed. */ + if (old_label != MPLS_INVALID_LABEL + && old_label != MPLS_LABEL_IMPLICIT_NULL) + lsp_uninstall(zvrf, old_label); + + /* Install label forwarding entry corr. to new label, if needed. */ + if (fec->label == MPLS_INVALID_LABEL + || fec->label == MPLS_LABEL_IMPLICIT_NULL) + return 0; + + afi = family2afi(PREFIX_FAMILY(&fec->rn->p)); + table = zebra_vrf_table(afi, SAFI_UNICAST, zvrf_id(zvrf)); + if (!table) + return 0; + + /* See if labeled route exists. */ + rn = route_node_lookup(table, &fec->rn->p); + if (!rn) + return 0; + + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) + break; + } + + if (!re || !zebra_rib_labeled_unicast(re)) + return 0; + + if (lsp_install(zvrf, fec->label, rn, re)) + return -1; + + return 0; +} + +/* + * Inform about FEC to a registered client. + */ +static int fec_send(struct zebra_fec *fec, struct zserv *client) +{ + struct stream *s; + struct route_node *rn; + + rn = fec->rn; + + /* Get output stream. */ + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_FEC_UPDATE, VRF_DEFAULT); + + stream_putw(s, rn->p.family); + stream_put_prefix(s, &rn->p); + stream_putl(s, fec->label); + stream_putw_at(s, 0, stream_get_endp(s)); + return zserv_send_message(client, s); +} + +/* + * Update all registered clients about this FEC. Caller should've updated + * FEC and ensure no duplicate updates. + */ +static void fec_update_clients(struct zebra_fec *fec) +{ + struct listnode *node; + struct zserv *client; + + for (ALL_LIST_ELEMENTS_RO(fec->client_list, node, client)) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Update client %s", + zebra_route_string(client->proto)); + fec_send(fec, client); + } +} + + +/* + * Print a FEC-label binding entry. + */ +static void fec_print(struct zebra_fec *fec, struct vty *vty) +{ + struct route_node *rn; + struct listnode *node; + struct zserv *client; + char buf[BUFSIZ]; + + rn = fec->rn; + vty_out(vty, "%pRN\n", rn); + vty_out(vty, " Label: %s", label2str(fec->label, buf, BUFSIZ)); + if (fec->label_index != MPLS_INVALID_LABEL_INDEX) + vty_out(vty, ", Label Index: %u", fec->label_index); + vty_out(vty, "\n"); + if (!list_isempty(fec->client_list)) { + vty_out(vty, " Client list:"); + for (ALL_LIST_ELEMENTS_RO(fec->client_list, node, client)) + vty_out(vty, " %s(fd %d)", + zebra_route_string(client->proto), + client->sock); + vty_out(vty, "\n"); + } +} + +/* + * Locate FEC-label binding that matches with passed info. + */ +static struct zebra_fec *fec_find(struct route_table *table, struct prefix *p) +{ + struct route_node *rn; + + apply_mask(p); + rn = route_node_lookup(table, p); + if (!rn) + return NULL; + + route_unlock_node(rn); + return (rn->info); +} + +/* + * Add a FEC. This may be upon a client registering for a binding + * or when a binding is configured. + */ +static struct zebra_fec *fec_add(struct route_table *table, struct prefix *p, + mpls_label_t label, uint32_t flags, + uint32_t label_index) +{ + struct route_node *rn; + struct zebra_fec *fec; + + apply_mask(p); + + /* Lookup (or add) route node.*/ + rn = route_node_get(table, p); + if (!rn) + return NULL; + + fec = rn->info; + + if (!fec) { + fec = XCALLOC(MTYPE_FEC, sizeof(struct zebra_fec)); + + rn->info = fec; + fec->rn = rn; + fec->label = label; + fec->client_list = list_new(); + } else + route_unlock_node(rn); /* for the route_node_get */ + + fec->label_index = label_index; + fec->flags = flags; + + return fec; +} + +/* + * Delete a FEC. This may be upon the last client deregistering for + * a FEC and no binding exists or when the binding is deleted and there + * are no registered clients. + */ +static int fec_del(struct zebra_fec *fec) +{ + list_delete(&fec->client_list); + fec->rn->info = NULL; + route_unlock_node(fec->rn); + XFREE(MTYPE_FEC, fec); + return 0; +} + +/* + * Hash function for label. + */ +static unsigned int label_hash(const void *p) +{ + const struct zebra_ile *ile = p; + + return (jhash_1word(ile->in_label, 0)); +} + +/* + * Compare 2 LSP hash entries based on in-label. + */ +static bool label_cmp(const void *p1, const void *p2) +{ + const struct zebra_ile *ile1 = p1; + const struct zebra_ile *ile2 = p2; + + return (ile1->in_label == ile2->in_label); +} + +/* + * Check if an IPv4 nexthop for a NHLFE is active. Update nexthop based on + * the passed flag. + * NOTE: Looking only for connected routes right now. + */ +static int nhlfe_nexthop_active_ipv4(struct zebra_nhlfe *nhlfe, + struct nexthop *nexthop) +{ + struct route_table *table; + struct prefix_ipv4 p; + struct route_node *rn; + struct route_entry *match; + struct nexthop *match_nh; + + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, nexthop->vrf_id); + if (!table) + return 0; + + /* Lookup nexthop in IPv4 routing table. */ + memset(&p, 0, sizeof(p)); + p.family = AF_INET; + p.prefixlen = IPV4_MAX_BITLEN; + p.prefix = nexthop->gate.ipv4; + + rn = route_node_match(table, (struct prefix *)&p); + if (!rn) + return 0; + + route_unlock_node(rn); + + /* Locate a valid connected route. */ + RNODE_FOREACH_RE (rn, match) { + if (CHECK_FLAG(match->status, ROUTE_ENTRY_REMOVED) + || !CHECK_FLAG(match->flags, ZEBRA_FLAG_SELECTED)) + continue; + + for (match_nh = match->nhe->nhg.nexthop; match_nh; + match_nh = match_nh->next) { + if (match->type == ZEBRA_ROUTE_CONNECT + || nexthop->ifindex == match_nh->ifindex) { + nexthop->ifindex = match_nh->ifindex; + return 1; + } + } + } + + return 0; +} + + +/* + * Check if an IPv6 nexthop for a NHLFE is active. Update nexthop based on + * the passed flag. + * NOTE: Looking only for connected routes right now. + */ +static int nhlfe_nexthop_active_ipv6(struct zebra_nhlfe *nhlfe, + struct nexthop *nexthop) +{ + struct route_table *table; + struct prefix_ipv6 p; + struct route_node *rn; + struct route_entry *match; + + table = zebra_vrf_table(AFI_IP6, SAFI_UNICAST, nexthop->vrf_id); + if (!table) + return 0; + + /* Lookup nexthop in IPv6 routing table. */ + memset(&p, 0, sizeof(p)); + p.family = AF_INET6; + p.prefixlen = IPV6_MAX_BITLEN; + p.prefix = nexthop->gate.ipv6; + + rn = route_node_match(table, (struct prefix *)&p); + if (!rn) + return 0; + + route_unlock_node(rn); + + /* Locate a valid connected route. */ + RNODE_FOREACH_RE (rn, match) { + if ((match->type == ZEBRA_ROUTE_CONNECT) + && !CHECK_FLAG(match->status, ROUTE_ENTRY_REMOVED) + && CHECK_FLAG(match->flags, ZEBRA_FLAG_SELECTED)) + break; + } + + if (!match || !match->nhe->nhg.nexthop) + return 0; + + nexthop->ifindex = match->nhe->nhg.nexthop->ifindex; + return 1; +} + + +/* + * Check the nexthop reachability for a NHLFE and return if valid (reachable) + * or not. + * NOTE: Each NHLFE points to only 1 nexthop. + */ +static int nhlfe_nexthop_active(struct zebra_nhlfe *nhlfe) +{ + struct nexthop *nexthop; + struct interface *ifp; + struct zebra_ns *zns; + + nexthop = nhlfe->nexthop; + if (!nexthop) // unexpected + return 0; + + /* Check on nexthop based on type. */ + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + /* + * Lookup if this type is special. The + * NEXTHOP_TYPE_IFINDEX is a pop and + * forward into a different table for + * processing. As such this ifindex + * passed to us may be a VRF device + * which will not be in the default + * VRF. So let's look in all of them + */ + zns = zebra_ns_lookup(NS_DEFAULT); + ifp = if_lookup_by_index_per_ns(zns, nexthop->ifindex); + if (ifp && if_is_operative(ifp)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (nhlfe_nexthop_active_ipv4(nhlfe, nexthop)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + + case NEXTHOP_TYPE_IPV6: + if (nhlfe_nexthop_active_ipv6(nhlfe, nexthop)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) { + ifp = if_lookup_by_index(nexthop->ifindex, + nexthop->vrf_id); + if (ifp && if_is_operative(ifp)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + } else { + if (nhlfe_nexthop_active_ipv6(nhlfe, nexthop)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + } + break; + + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); +} + +/* + * Walk through NHLFEs for a LSP forwarding entry, verify nexthop + * reachability and select the best. Multipath entries are also + * marked. This is invoked when an LSP scheduled for processing (due + * to some change) is examined. + */ +static void lsp_select_best_nhlfe(struct zebra_lsp *lsp) +{ + struct zebra_nhlfe *nhlfe; + struct zebra_nhlfe *best; + struct nexthop *nexthop; + int changed = 0; + + if (!lsp) + return; + + best = NULL; + lsp->num_ecmp = 0; + UNSET_FLAG(lsp->flags, LSP_FLAG_CHANGED); + + /* + * First compute the best path, after checking nexthop status. We are + * only concerned with non-deleted NHLFEs. + */ + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + /* Clear selection flags. */ + UNSET_FLAG(nhlfe->flags, + (NHLFE_FLAG_SELECTED | NHLFE_FLAG_MULTIPATH)); + + if (!CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED) + && nhlfe_nexthop_active(nhlfe)) { + if (!best || (nhlfe->distance < best->distance)) + best = nhlfe; + } + } + + lsp->best_nhlfe = best; + if (!lsp->best_nhlfe) + return; + + /* + * Check the active status of backup nhlfes also + */ + frr_each_safe(nhlfe_list, &lsp->backup_nhlfe_list, nhlfe) { + if (!CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED)) + (void)nhlfe_nexthop_active(nhlfe); + } + + /* Mark best NHLFE as selected. */ + SET_FLAG(lsp->best_nhlfe->flags, NHLFE_FLAG_SELECTED); + + /* + * If best path exists, see if there is ECMP. While doing this, note if + * a + * new (uninstalled) NHLFE has been selected, an installed entry that is + * still selected has a change or an installed entry is to be removed. + */ + frr_each(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + int nh_chg, nh_sel, nh_inst; + + nexthop = nhlfe->nexthop; + if (!nexthop) // unexpected + continue; + + if (!CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) + && (nhlfe->distance == lsp->best_nhlfe->distance)) { + SET_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED); + SET_FLAG(nhlfe->flags, NHLFE_FLAG_MULTIPATH); + lsp->num_ecmp++; + } + + if (CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED) && !changed) { + nh_chg = CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + nh_sel = CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED); + nh_inst = + CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + + if ((nh_sel && !nh_inst) + || (nh_sel && nh_inst && nh_chg) + || (nh_inst && !nh_sel)) + changed = 1; + } + + /* We have finished examining, clear changed flag. */ + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + } + + if (changed) + SET_FLAG(lsp->flags, LSP_FLAG_CHANGED); +} + +/* + * Delete LSP forwarding entry from kernel, if installed. Called upon + * process exit. + */ +static void lsp_uninstall_from_kernel(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_lsp *lsp; + + lsp = (struct zebra_lsp *)bucket->data; + if (CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED)) + (void)dplane_lsp_delete(lsp); +} + +/* + * Schedule LSP forwarding entry for processing. Called upon changes + * that may impact LSPs such as nexthop / connected route changes. + */ +static void lsp_schedule(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_lsp *lsp; + + lsp = (struct zebra_lsp *)bucket->data; + + /* In the common flow, this is used when external events occur. For + * LSPs with backup nhlfes, we'll assume that the forwarding + * plane will use the backups to handle these events, until the + * owning protocol can react. + */ + if (ctxt == NULL) { + /* Skip LSPs with backups */ + if (nhlfe_list_first(&lsp->backup_nhlfe_list) != NULL) { + if (IS_ZEBRA_DEBUG_MPLS_DETAIL) + zlog_debug("%s: skip LSP in-label %u", + __func__, lsp->ile.in_label); + return; + } + } + + (void)lsp_processq_add(lsp); +} + +/* + * Process a LSP entry that is in the queue. Recalculate best NHLFE and + * any multipaths and update or delete from the kernel, as needed. + */ +static wq_item_status lsp_process(struct work_queue *wq, void *data) +{ + struct zebra_lsp *lsp; + struct zebra_nhlfe *oldbest, *newbest; + char buf[BUFSIZ], buf2[BUFSIZ]; + struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT); + enum zebra_dplane_result res; + + lsp = (struct zebra_lsp *)data; + if (!lsp) // unexpected + return WQ_SUCCESS; + + oldbest = lsp->best_nhlfe; + + /* Select best NHLFE(s) */ + lsp_select_best_nhlfe(lsp); + + newbest = lsp->best_nhlfe; + + if (IS_ZEBRA_DEBUG_MPLS) { + if (oldbest) + nhlfe2str(oldbest, buf, sizeof(buf)); + if (newbest) + nhlfe2str(newbest, buf2, sizeof(buf2)); + zlog_debug( + "Process LSP in-label %u oldbest %s newbest %s flags 0x%x ecmp# %d", + lsp->ile.in_label, oldbest ? buf : "NULL", + newbest ? buf2 : "NULL", lsp->flags, lsp->num_ecmp); + } + + if (!CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED)) { + /* Not already installed */ + if (newbest) { + + UNSET_FLAG(lsp->flags, LSP_FLAG_CHANGED); + + switch (dplane_lsp_add(lsp)) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + /* Set 'installed' flag so we will know + * that an install is in-flight. + */ + SET_FLAG(lsp->flags, LSP_FLAG_INSTALLED); + + zvrf->lsp_installs_queued++; + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_warn(EC_ZEBRA_LSP_INSTALL_FAILURE, + "LSP Install Failure: %u", + lsp->ile.in_label); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + zvrf->lsp_installs++; + break; + } + } + } else { + /* Installed, may need an update and/or delete. */ + if (!newbest) { + res = dplane_lsp_delete(lsp); + + /* We do some of the lsp cleanup immediately for + * deletes. + */ + UNSET_FLAG(lsp->flags, LSP_FLAG_INSTALLED); + clear_nhlfe_installed(lsp); + + switch (res) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + zvrf->lsp_removals_queued++; + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_warn(EC_ZEBRA_LSP_DELETE_FAILURE, + "LSP Deletion Failure: %u", + lsp->ile.in_label); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + zvrf->lsp_removals++; + break; + } + } else if (CHECK_FLAG(lsp->flags, LSP_FLAG_CHANGED)) { + struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop; + + UNSET_FLAG(lsp->flags, LSP_FLAG_CHANGED); + + /* We leave the INSTALLED flag set here + * so we know an update is in-flight. + */ + + /* + * Any NHLFE that was installed but is not + * selected now needs to have its flags updated. + */ + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if (CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED) + && !CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_SELECTED)) { + UNSET_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED); + UNSET_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB); + } + } + + switch (dplane_lsp_update(lsp)) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + zvrf->lsp_installs_queued++; + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_warn(EC_ZEBRA_LSP_INSTALL_FAILURE, + "LSP Update Failure: %u", + lsp->ile.in_label); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + zvrf->lsp_installs++; + break; + } + } + } + + return WQ_SUCCESS; +} + + +/* + * Callback upon processing completion of a LSP forwarding entry. + */ +static void lsp_processq_del(struct work_queue *wq, void *data) +{ + struct zebra_vrf *zvrf; + struct zebra_lsp *lsp; + struct hash *lsp_table; + struct zebra_nhlfe *nhlfe; + + /* If zebra is shutting down, don't delete any structs, + * just ignore this callback. The LSPs will be cleaned up + * during the shutdown processing. + */ + if (zebra_router_in_shutdown()) + return; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + assert(zvrf); + + lsp_table = zvrf->lsp_table; + if (!lsp_table) // unexpected + return; + + lsp = (struct zebra_lsp *)data; + if (!lsp) // unexpected + return; + + /* Clear flag, remove any NHLFEs marked for deletion. If no NHLFEs + * exist, + * delete LSP entry also. + */ + UNSET_FLAG(lsp->flags, LSP_FLAG_SCHEDULED); + + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED)) + nhlfe_del(nhlfe); + } + + frr_each_safe(nhlfe_list, &lsp->backup_nhlfe_list, nhlfe) { + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED)) + nhlfe_del(nhlfe); + } + + lsp_check_free(lsp_table, &lsp); +} + +/* + * Callback upon finishing the processing of all scheduled + * LSP forwarding entries. + */ +static void lsp_processq_complete(struct work_queue *wq) +{ + /* Nothing to do for now. */ +} + +/* + * Add LSP forwarding entry to queue for subsequent processing. + */ +static int lsp_processq_add(struct zebra_lsp *lsp) +{ + /* If already scheduled, exit. */ + if (CHECK_FLAG(lsp->flags, LSP_FLAG_SCHEDULED)) + return 0; + + if (zrouter.lsp_process_q == NULL) { + flog_err(EC_ZEBRA_WQ_NONEXISTENT, + "%s: work_queue does not exist!", __func__); + return -1; + } + + work_queue_add(zrouter.lsp_process_q, lsp); + SET_FLAG(lsp->flags, LSP_FLAG_SCHEDULED); + return 0; +} + +/* + * Callback to allocate LSP forwarding table entry. + */ +static void *lsp_alloc(void *p) +{ + const struct zebra_ile *ile = p; + struct zebra_lsp *lsp; + + lsp = XCALLOC(MTYPE_LSP, sizeof(struct zebra_lsp)); + lsp->ile = *ile; + nhlfe_list_init(&lsp->nhlfe_list); + nhlfe_list_init(&lsp->backup_nhlfe_list); + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Alloc LSP in-label %u", lsp->ile.in_label); + + return ((void *)lsp); +} + +/* + * Check whether lsp can be freed - no nhlfes, e.g., and call free api + */ +static void lsp_check_free(struct hash *lsp_table, struct zebra_lsp **plsp) +{ + struct zebra_lsp *lsp; + + if (plsp == NULL || *plsp == NULL) + return; + + lsp = *plsp; + + if ((nhlfe_list_first(&lsp->nhlfe_list) == NULL) && + (nhlfe_list_first(&lsp->backup_nhlfe_list) == NULL) && + !CHECK_FLAG(lsp->flags, LSP_FLAG_SCHEDULED)) + lsp_free(lsp_table, plsp); +} + +static void lsp_free_nhlfe(struct zebra_lsp *lsp) +{ + struct zebra_nhlfe *nhlfe; + + while ((nhlfe = nhlfe_list_first(&lsp->nhlfe_list))) { + nhlfe_list_del(&lsp->nhlfe_list, nhlfe); + nhlfe_free(nhlfe); + } + + while ((nhlfe = nhlfe_list_first(&lsp->backup_nhlfe_list))) { + nhlfe_list_del(&lsp->backup_nhlfe_list, nhlfe); + nhlfe_free(nhlfe); + } +} + +/* + * Dtor for an LSP: remove from ile hash, release any internal allocations, + * free LSP object. + */ +static void lsp_free(struct hash *lsp_table, struct zebra_lsp **plsp) +{ + struct zebra_lsp *lsp; + + if (plsp == NULL || *plsp == NULL) + return; + + lsp = *plsp; + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Free LSP in-label %u flags 0x%x", + lsp->ile.in_label, lsp->flags); + + lsp_free_nhlfe(lsp); + + hash_release(lsp_table, &lsp->ile); + XFREE(MTYPE_LSP, lsp); + + *plsp = NULL; +} + +/* + * Create printable string for NHLFE entry. + */ +static char *nhlfe2str(const struct zebra_nhlfe *nhlfe, char *buf, int size) +{ + const struct nexthop *nexthop; + + buf[0] = '\0'; + nexthop = nhlfe->nexthop; + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + inet_ntop(AF_INET, &nexthop->gate.ipv4, buf, size); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf, size); + break; + case NEXTHOP_TYPE_IFINDEX: + snprintf(buf, size, "Ifindex: %u", nexthop->ifindex); + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + return buf; +} + +/* + * Check if NHLFE matches with search info passed. + */ +static int nhlfe_nhop_match(struct zebra_nhlfe *nhlfe, + enum nexthop_types_t gtype, + const union g_addr *gate, ifindex_t ifindex) +{ + struct nexthop *nhop; + int cmp = 1; + + nhop = nhlfe->nexthop; + if (!nhop) + return 1; + + if (nhop->type != gtype) + return 1; + + switch (nhop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + cmp = memcmp(&(nhop->gate.ipv4), &(gate->ipv4), + sizeof(struct in_addr)); + if (!cmp && nhop->type == NEXTHOP_TYPE_IPV4_IFINDEX) + cmp = !(nhop->ifindex == ifindex); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + cmp = memcmp(&(nhop->gate.ipv6), &(gate->ipv6), + sizeof(struct in6_addr)); + if (!cmp && nhop->type == NEXTHOP_TYPE_IPV6_IFINDEX) + cmp = !(nhop->ifindex == ifindex); + break; + case NEXTHOP_TYPE_IFINDEX: + cmp = !(nhop->ifindex == ifindex); + break; + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + return cmp; +} + + +/* + * Locate NHLFE that matches with passed info. + */ +static struct zebra_nhlfe *nhlfe_find(struct nhlfe_list_head *list, + enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, + const union g_addr *gate, + ifindex_t ifindex) +{ + struct zebra_nhlfe *nhlfe; + + frr_each_safe(nhlfe_list, list, nhlfe) { + if (nhlfe->type != lsp_type) + continue; + if (!nhlfe_nhop_match(nhlfe, gtype, gate, ifindex)) + break; + } + + return nhlfe; +} + +/* + * Allocate and init new NHLFE. + */ +static struct zebra_nhlfe * +nhlfe_alloc(struct zebra_lsp *lsp, enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, const mpls_label_t *labels) +{ + struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop; + + assert(lsp); + + nhlfe = XCALLOC(MTYPE_NHLFE, sizeof(struct zebra_nhlfe)); + + nhlfe->lsp = lsp; + nhlfe->type = lsp_type; + nhlfe->distance = lsp_distance(lsp_type); + + nexthop = nexthop_new(); + + nexthop_add_labels(nexthop, lsp_type, num_labels, labels); + + nexthop->vrf_id = VRF_DEFAULT; + nexthop->type = gtype; + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + nexthop->gate.ipv4 = gate->ipv4; + if (ifindex) + nexthop->ifindex = ifindex; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + nexthop->gate.ipv6 = gate->ipv6; + if (ifindex) + nexthop->ifindex = ifindex; + break; + case NEXTHOP_TYPE_IFINDEX: + nexthop->ifindex = ifindex; + break; + case NEXTHOP_TYPE_BLACKHOLE: + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("%s: invalid: blackhole nexthop", __func__); + + nexthop_free(nexthop); + XFREE(MTYPE_NHLFE, nhlfe); + return NULL; + } + nhlfe->nexthop = nexthop; + + return nhlfe; +} + +/* + * Add primary or backup NHLFE. Base entry must have been created and + * duplicate check done. + */ +static struct zebra_nhlfe *nhlfe_add(struct zebra_lsp *lsp, + enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, + const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, + const mpls_label_t *labels, bool is_backup) +{ + struct zebra_nhlfe *nhlfe; + + if (!lsp) + return NULL; + + /* Must have labels */ + if (num_labels == 0 || labels == NULL) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("%s: invalid nexthop: no labels", __func__); + + return NULL; + } + + /* Allocate new object */ + nhlfe = nhlfe_alloc(lsp, lsp_type, gtype, gate, ifindex, num_labels, + labels); + + if (!nhlfe) + return NULL; + + /* Enqueue to LSP: primaries at head of list, backups at tail */ + if (is_backup) { + SET_FLAG(nhlfe->flags, NHLFE_FLAG_IS_BACKUP); + nhlfe_list_add_tail(&lsp->backup_nhlfe_list, nhlfe); + } else + nhlfe_list_add_head(&lsp->nhlfe_list, nhlfe); + + return nhlfe; +} + +/* + * Common delete for NHLFEs. + */ +static void nhlfe_free(struct zebra_nhlfe *nhlfe) +{ + if (!nhlfe) + return; + + /* Free nexthop. */ + if (nhlfe->nexthop) + nexthop_free(nhlfe->nexthop); + + nhlfe->nexthop = NULL; + + XFREE(MTYPE_NHLFE, nhlfe); +} + + +/* + * Disconnect NHLFE from LSP, and free. Entry must be present on LSP's list. + */ +static int nhlfe_del(struct zebra_nhlfe *nhlfe) +{ + struct zebra_lsp *lsp; + + if (!nhlfe) + return -1; + + lsp = nhlfe->lsp; + if (!lsp) + return -1; + + if (nhlfe == lsp->best_nhlfe) + lsp->best_nhlfe = NULL; + + /* Unlink from LSP */ + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_IS_BACKUP)) + nhlfe_list_del(&lsp->backup_nhlfe_list, nhlfe); + else + nhlfe_list_del(&lsp->nhlfe_list, nhlfe); + + nhlfe->lsp = NULL; + + nhlfe_free(nhlfe); + + return 0; +} + +/* + * Update label for NHLFE entry. + */ +static void nhlfe_out_label_update(struct zebra_nhlfe *nhlfe, + struct mpls_label_stack *nh_label) +{ + nhlfe->nexthop->nh_label->label[0] = nh_label->label[0]; +} + +static int mpls_lsp_uninstall_all(struct hash *lsp_table, struct zebra_lsp *lsp, + enum lsp_types_t type) +{ + struct zebra_nhlfe *nhlfe; + int schedule_lsp = 0; + char buf[BUFSIZ]; + + if (CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED)) + schedule_lsp = 1; + + /* Mark NHLFEs for delete or directly delete, as appropriate. */ + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + /* Skip non-static NHLFEs */ + if (nhlfe->type != type) + continue; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, sizeof(buf)); + zlog_debug( + "Del LSP in-label %u type %d nexthop %s flags 0x%x", + lsp->ile.in_label, type, buf, nhlfe->flags); + } + + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)) { + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + SET_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED); + schedule_lsp = 1; + } else { + nhlfe_del(nhlfe); + } + } + + frr_each_safe(nhlfe_list, &lsp->backup_nhlfe_list, nhlfe) { + /* Skip non-static NHLFEs */ + if (nhlfe->type != type) + continue; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, sizeof(buf)); + zlog_debug( + "Del backup LSP in-label %u type %d nexthop %s flags 0x%x", + lsp->ile.in_label, type, buf, nhlfe->flags); + } + + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)) { + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + SET_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED); + schedule_lsp = 1; + } else { + nhlfe_del(nhlfe); + } + } + + /* Queue LSP for processing, if needed, else delete. */ + if (schedule_lsp) { + if (IS_ZEBRA_DEBUG_MPLS) { + zlog_debug("Schedule LSP in-label %u flags 0x%x", + lsp->ile.in_label, lsp->flags); + } + if (lsp_processq_add(lsp)) + return -1; + } else { + lsp_check_free(lsp_table, &lsp); + } + + return 0; +} + +/* + * Uninstall all static NHLFEs for a particular LSP forwarding entry. + * If no other NHLFEs exist, the entry would be deleted. + */ +static int mpls_static_lsp_uninstall_all(struct zebra_vrf *zvrf, + mpls_label_t in_label) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return -1; + + /* If entry is not present, exit. */ + tmp_ile.in_label = in_label; + lsp = hash_lookup(lsp_table, &tmp_ile); + if (!lsp || (nhlfe_list_first(&lsp->nhlfe_list) == NULL)) + return 0; + + return mpls_lsp_uninstall_all(lsp_table, lsp, ZEBRA_LSP_STATIC); +} + +static json_object *nhlfe_json(struct zebra_nhlfe *nhlfe) +{ + json_object *json_nhlfe = NULL; + json_object *json_backups = NULL; + json_object *json_label_stack; + struct nexthop *nexthop = nhlfe->nexthop; + int i; + + json_nhlfe = json_object_new_object(); + json_object_string_add(json_nhlfe, "type", nhlfe_type2str(nhlfe->type)); + json_object_int_add(json_nhlfe, "outLabel", + nexthop->nh_label->label[0]); + + json_label_stack = json_object_new_array(); + json_object_object_add(json_nhlfe, "outLabelStack", json_label_stack); + for (i = 0; i < nexthop->nh_label->num_labels; i++) + json_object_array_add( + json_label_stack, + json_object_new_int(nexthop->nh_label->label[i])); + + json_object_int_add(json_nhlfe, "distance", nhlfe->distance); + + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)) + json_object_boolean_true_add(json_nhlfe, "installed"); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + json_object_string_addf(json_nhlfe, "nexthop", "%pI4", + &nexthop->gate.ipv4); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + json_object_string_addf(json_nhlfe, "nexthop", "%pI6", + &nexthop->gate.ipv6); + + if (nexthop->ifindex) + json_object_string_add(json_nhlfe, "interface", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_IFINDEX: + if (nexthop->ifindex) + json_object_string_add(json_nhlfe, "interface", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + json_backups = json_object_new_array(); + for (i = 0; i < nexthop->backup_num; i++) { + json_object_array_add( + json_backups, + json_object_new_int(nexthop->backup_idx[i])); + } + + json_object_object_add(json_nhlfe, "backupIndex", + json_backups); + } + + return json_nhlfe; +} + +/* + * Print the NHLFE for a LSP forwarding entry. + */ +static void nhlfe_print(struct zebra_nhlfe *nhlfe, struct vty *vty, + const char *indent) +{ + struct nexthop *nexthop; + char buf[MPLS_LABEL_STRLEN]; + + nexthop = nhlfe->nexthop; + if (!nexthop || !nexthop->nh_label) // unexpected + return; + + vty_out(vty, " type: %s remote label: %s distance: %d\n", + nhlfe_type2str(nhlfe->type), + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, + buf, sizeof(buf), 0), + nhlfe->distance); + + if (indent) + vty_out(vty, "%s", indent); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out(vty, " via %pI4", &nexthop->gate.ipv4); + if (nexthop->ifindex) + vty_out(vty, " dev %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " via %s", + inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf, + sizeof(buf))); + if (nexthop->ifindex) + vty_out(vty, " dev %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_IFINDEX: + if (nexthop->ifindex) + vty_out(vty, " dev %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + vty_out(vty, "%s", + CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_IS_BACKUP) ? " (backup)" + : ""); + vty_out(vty, "%s", + CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED) ? " (installed)" + : ""); + vty_out(vty, "\n"); +} + +/* + * Print an LSP forwarding entry. + */ +static void lsp_print(struct vty *vty, struct zebra_lsp *lsp) +{ + struct zebra_nhlfe *nhlfe, *backup; + int i, j; + + vty_out(vty, "Local label: %u%s\n", lsp->ile.in_label, + CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED) ? " (installed)" + : ""); + + frr_each(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + nhlfe_print(nhlfe, vty, NULL); + + if (nhlfe->nexthop == NULL || + !CHECK_FLAG(nhlfe->nexthop->flags, + NEXTHOP_FLAG_HAS_BACKUP)) + continue; + + /* Backup nhlfes: find backups in backup list */ + + for (j = 0; j < nhlfe->nexthop->backup_num; j++) { + i = 0; + backup = NULL; + frr_each(nhlfe_list, &lsp->backup_nhlfe_list, backup) { + if (i == nhlfe->nexthop->backup_idx[j]) + break; + i++; + } + + if (backup) { + vty_out(vty, " [backup %d]", i); + nhlfe_print(backup, vty, " "); + } + } + } +} + +/* + * JSON objects for an LSP forwarding entry. + */ +static json_object *lsp_json(struct zebra_lsp *lsp) +{ + struct zebra_nhlfe *nhlfe = NULL; + json_object *json = json_object_new_object(); + json_object *json_nhlfe_list = json_object_new_array(); + + json_object_int_add(json, "inLabel", lsp->ile.in_label); + + if (CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED)) + json_object_boolean_true_add(json, "installed"); + + frr_each(nhlfe_list, &lsp->nhlfe_list, nhlfe) + json_object_array_add(json_nhlfe_list, nhlfe_json(nhlfe)); + + json_object_object_add(json, "nexthops", json_nhlfe_list); + json_nhlfe_list = NULL; + + + frr_each(nhlfe_list, &lsp->backup_nhlfe_list, nhlfe) { + if (json_nhlfe_list == NULL) + json_nhlfe_list = json_object_new_array(); + + json_object_array_add(json_nhlfe_list, nhlfe_json(nhlfe)); + } + + if (json_nhlfe_list) + json_object_object_add(json, "backupNexthops", json_nhlfe_list); + + return json; +} + + +/* Return a sorted linked list of the hash contents */ +static struct list *hash_get_sorted_list(struct hash *hash, void *cmp) +{ + unsigned int i; + struct hash_bucket *hb; + struct list *sorted_list = list_new(); + + sorted_list->cmp = (int (*)(void *, void *))cmp; + + for (i = 0; i < hash->size; i++) + for (hb = hash->index[i]; hb; hb = hb->next) + listnode_add_sort(sorted_list, hb->data); + + return sorted_list; +} + +/* + * Compare two LSPs based on their label values. + */ +static int lsp_cmp(const struct zebra_lsp *lsp1, const struct zebra_lsp *lsp2) +{ + if (lsp1->ile.in_label < lsp2->ile.in_label) + return -1; + + if (lsp1->ile.in_label > lsp2->ile.in_label) + return 1; + + return 0; +} + +/* + * Initialize work queue for processing changed LSPs. + */ +static void mpls_processq_init(void) +{ + zrouter.lsp_process_q = work_queue_new(zrouter.master, "LSP processing"); + + zrouter.lsp_process_q->spec.workfunc = &lsp_process; + zrouter.lsp_process_q->spec.del_item_data = &lsp_processq_del; + zrouter.lsp_process_q->spec.errorfunc = NULL; + zrouter.lsp_process_q->spec.completion_func = &lsp_processq_complete; + zrouter.lsp_process_q->spec.max_retries = 0; + zrouter.lsp_process_q->spec.hold = 10; +} + + +/* + * Process LSP update results from zebra dataplane. + */ +void zebra_mpls_lsp_dplane_result(struct zebra_dplane_ctx *ctx) +{ + struct zebra_vrf *zvrf; + mpls_label_t label; + struct zebra_ile tmp_ile; + struct hash *lsp_table; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + struct nexthop *nexthop; + enum dplane_op_e op; + enum zebra_dplane_result status; + enum zebra_sr_policy_update_label_mode update_mode; + + op = dplane_ctx_get_op(ctx); + status = dplane_ctx_get_status(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("LSP dplane ctx %p, op %s, in-label %u, result %s", + ctx, dplane_op2str(op), + dplane_ctx_get_in_label(ctx), + dplane_res2str(status)); + + label = dplane_ctx_get_in_label(ctx); + + switch (op) { + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + /* Look for zebra LSP object */ + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (zvrf == NULL) + break; + + lsp_table = zvrf->lsp_table; + + tmp_ile.in_label = label; + lsp = hash_lookup(lsp_table, &tmp_ile); + if (lsp == NULL) { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("LSP ctx %p: in-label %u not found", + ctx, dplane_ctx_get_in_label(ctx)); + break; + } + + /* TODO -- Confirm that this result is still 'current' */ + + if (status != ZEBRA_DPLANE_REQUEST_SUCCESS) { + UNSET_FLAG(lsp->flags, LSP_FLAG_INSTALLED); + clear_nhlfe_installed(lsp); + flog_warn(EC_ZEBRA_LSP_INSTALL_FAILURE, + "LSP Install Failure: in-label %u", + lsp->ile.in_label); + break; + } + + /* Update zebra object */ + SET_FLAG(lsp->flags, LSP_FLAG_INSTALLED); + frr_each(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) && + CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { + SET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } + } + + update_mode = (op == DPLANE_OP_LSP_INSTALL) + ? ZEBRA_SR_POLICY_LABEL_CREATED + : ZEBRA_SR_POLICY_LABEL_UPDATED; + zebra_sr_policy_label_update(label, update_mode); + break; + + case DPLANE_OP_LSP_DELETE: + if (status != ZEBRA_DPLANE_REQUEST_SUCCESS) { + flog_warn(EC_ZEBRA_LSP_DELETE_FAILURE, + "LSP Deletion Failure: in-label %u", + dplane_ctx_get_in_label(ctx)); + break; + } + zebra_sr_policy_label_update(label, + ZEBRA_SR_POLICY_LABEL_REMOVED); + break; + + default: + break; + + } /* Switch */ +} + +/* + * Process LSP installation info from two sets of nhlfes: a set from + * a dplane notification, and a set from the zebra LSP object. Update + * counters of installed nexthops, and return whether the LSP has changed. + */ +static bool compare_notif_nhlfes(const struct nhlfe_list_head *ctx_head, + struct nhlfe_list_head *nhlfe_head, + int *start_counter, int *end_counter) +{ + struct zebra_nhlfe *nhlfe; + const struct zebra_nhlfe *ctx_nhlfe; + struct nexthop *nexthop; + const struct nexthop *ctx_nexthop; + int start_count = 0, end_count = 0; + bool changed_p = false; + bool is_debug = (IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_MPLS); + + frr_each_safe(nhlfe_list, nhlfe_head, nhlfe) { + char buf[NEXTHOP_STRLEN]; + + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + start_count++; + + ctx_nhlfe = NULL; + ctx_nexthop = NULL; + frr_each(nhlfe_list_const, ctx_head, ctx_nhlfe) { + ctx_nexthop = ctx_nhlfe->nexthop; + if (!ctx_nexthop) + continue; + + if ((ctx_nexthop->type == nexthop->type) && + nexthop_same(ctx_nexthop, nexthop)) { + /* Matched */ + break; + } + } + + if (is_debug) + nexthop2str(nexthop, buf, sizeof(buf)); + + if (ctx_nhlfe && ctx_nexthop) { + if (is_debug) { + const char *tstr = ""; + + if (!CHECK_FLAG(ctx_nhlfe->flags, + NHLFE_FLAG_INSTALLED)) + tstr = "not "; + + zlog_debug("LSP dplane notif: matched nh %s (%sinstalled)", + buf, tstr); + } + + /* Test zebra nhlfe install state */ + if (CHECK_FLAG(ctx_nhlfe->flags, + NHLFE_FLAG_INSTALLED)) { + + if (!CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED)) + changed_p = true; + + /* Update counter */ + end_count++; + } else { + + if (CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_INSTALLED)) + changed_p = true; + } + + } else { + /* Not mentioned in lfib set -> uninstalled */ + if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED) || + CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) || + CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) { + changed_p = true; + } + + if (is_debug) + zlog_debug("LSP dplane notif: no match, nh %s", + buf); + } + } + + if (start_counter) + *start_counter += start_count; + if (end_counter) + *end_counter += end_count; + + return changed_p; +} + +/* + * Update an lsp nhlfe list from a dplane context, typically an async + * notification context. Update the LSP list to match the installed + * status from the context's list. + */ +static int update_nhlfes_from_ctx(struct nhlfe_list_head *nhlfe_head, + const struct nhlfe_list_head *ctx_head) +{ + int ret = 0; + struct zebra_nhlfe *nhlfe; + const struct zebra_nhlfe *ctx_nhlfe; + struct nexthop *nexthop; + const struct nexthop *ctx_nexthop; + bool is_debug = (IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_MPLS); + + frr_each_safe(nhlfe_list, nhlfe_head, nhlfe) { + char buf[NEXTHOP_STRLEN]; + + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + ctx_nhlfe = NULL; + ctx_nexthop = NULL; + frr_each(nhlfe_list_const, ctx_head, ctx_nhlfe) { + ctx_nexthop = ctx_nhlfe->nexthop; + if (!ctx_nexthop) + continue; + + if ((ctx_nexthop->type == nexthop->type) && + nexthop_same(ctx_nexthop, nexthop)) { + /* Matched */ + break; + } + } + + if (is_debug) + nexthop2str(nexthop, buf, sizeof(buf)); + + if (ctx_nhlfe && ctx_nexthop) { + + /* Bring zebra nhlfe install state into sync */ + if (CHECK_FLAG(ctx_nhlfe->flags, + NHLFE_FLAG_INSTALLED)) { + if (is_debug) + zlog_debug("%s: matched lsp nhlfe %s (installed)", + __func__, buf); + + SET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + SET_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED); + + } else { + if (is_debug) + zlog_debug("%s: matched lsp nhlfe %s (not installed)", + __func__, buf); + + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED); + } + + if (CHECK_FLAG(ctx_nhlfe->nexthop->flags, + NEXTHOP_FLAG_FIB)) { + SET_FLAG(nhlfe->nexthop->flags, + NEXTHOP_FLAG_ACTIVE); + SET_FLAG(nhlfe->nexthop->flags, + NEXTHOP_FLAG_FIB); + } else { + UNSET_FLAG(nhlfe->nexthop->flags, + NEXTHOP_FLAG_ACTIVE); + UNSET_FLAG(nhlfe->nexthop->flags, + NEXTHOP_FLAG_FIB); + } + + } else { + /* Not mentioned in lfib set -> uninstalled */ + if (is_debug) + zlog_debug("%s: no match for lsp nhlfe %s", + __func__, buf); + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED); + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED); + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + } + } + + return ret; +} + +/* + * Process async dplane notifications. + */ +void zebra_mpls_process_dplane_notify(struct zebra_dplane_ctx *ctx) +{ + struct zebra_vrf *zvrf; + struct zebra_ile tmp_ile; + struct hash *lsp_table; + struct zebra_lsp *lsp; + const struct nhlfe_list_head *ctx_list; + int start_count = 0, end_count = 0; /* Installed counts */ + bool changed_p = false; + bool is_debug = (IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_MPLS); + enum zebra_sr_policy_update_label_mode update_mode; + + if (is_debug) + zlog_debug("LSP dplane notif, in-label %u", + dplane_ctx_get_in_label(ctx)); + + /* Look for zebra LSP object */ + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (zvrf == NULL) + return; + + lsp_table = zvrf->lsp_table; + + tmp_ile.in_label = dplane_ctx_get_in_label(ctx); + lsp = hash_lookup(lsp_table, &tmp_ile); + if (lsp == NULL) { + if (is_debug) + zlog_debug("dplane LSP notif: in-label %u not found", + dplane_ctx_get_in_label(ctx)); + return; + } + + /* + * The dataplane/forwarding plane is notifying zebra about the state + * of the nexthops associated with this LSP. First, we take a + * pre-scan pass to determine whether the LSP has transitioned + * from installed -> uninstalled. In that case, we need to have + * the existing state of the LSP objects available before making + * any changes. + */ + ctx_list = dplane_ctx_get_nhlfe_list(ctx); + + changed_p = compare_notif_nhlfes(ctx_list, &lsp->nhlfe_list, + &start_count, &end_count); + + if (is_debug) + zlog_debug("LSP dplane notif: lfib start_count %d, end_count %d%s", + start_count, end_count, + changed_p ? ", changed" : ""); + + ctx_list = dplane_ctx_get_backup_nhlfe_list(ctx); + + if (compare_notif_nhlfes(ctx_list, &lsp->backup_nhlfe_list, + &start_count, &end_count)) + /* Avoid accidentally setting back to 'false' */ + changed_p = true; + + if (is_debug) + zlog_debug("LSP dplane notif: lfib backups, start_count %d, end_count %d%s", + start_count, end_count, + changed_p ? ", changed" : ""); + + /* + * Has the LSP become uninstalled? We need the existing state of the + * nexthops/nhlfes at this point so we know what to delete. + */ + if (start_count > 0 && end_count == 0) { + /* Inform other lfibs */ + dplane_lsp_notif_update(lsp, DPLANE_OP_LSP_DELETE, ctx); + } + + /* + * Now we take a second pass and bring the zebra + * nexthop state into sync with the forwarding-plane state. + */ + ctx_list = dplane_ctx_get_nhlfe_list(ctx); + update_nhlfes_from_ctx(&lsp->nhlfe_list, ctx_list); + + ctx_list = dplane_ctx_get_backup_nhlfe_list(ctx); + update_nhlfes_from_ctx(&lsp->backup_nhlfe_list, ctx_list); + + if (end_count > 0) { + SET_FLAG(lsp->flags, LSP_FLAG_INSTALLED); + + /* SR-TE update too */ + if (start_count == 0) + update_mode = ZEBRA_SR_POLICY_LABEL_CREATED; + else + update_mode = ZEBRA_SR_POLICY_LABEL_UPDATED; + zebra_sr_policy_label_update(lsp->ile.in_label, update_mode); + + if (changed_p) + dplane_lsp_notif_update(lsp, DPLANE_OP_LSP_UPDATE, ctx); + + } else { + /* SR-TE update too */ + zebra_sr_policy_label_update(lsp->ile.in_label, + ZEBRA_SR_POLICY_LABEL_REMOVED); + + UNSET_FLAG(lsp->flags, LSP_FLAG_INSTALLED); + clear_nhlfe_installed(lsp); + } +} + +/* + * Install dynamic LSP entry. + */ +int zebra_mpls_lsp_install(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re) +{ + struct route_table *table; + struct zebra_fec *fec; + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(&rn->p))]; + if (!table) + return -1; + + /* See if there is a configured label binding for this FEC. */ + fec = fec_find(table, &rn->p); + if (!fec || fec->label == MPLS_INVALID_LABEL) + return 0; + + /* We cannot install a label forwarding entry if local label is the + * implicit-null label. + */ + if (fec->label == MPLS_LABEL_IMPLICIT_NULL) + return 0; + + if (lsp_install(zvrf, fec->label, rn, re)) + return -1; + + return 0; +} + +/* + * Uninstall dynamic LSP entry, if any. + */ +int zebra_mpls_lsp_uninstall(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re) +{ + struct route_table *table; + struct zebra_fec *fec; + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(&rn->p))]; + if (!table) + return -1; + + /* See if there is a configured label binding for this FEC. */ + fec = fec_find(table, &rn->p); + if (!fec || fec->label == MPLS_INVALID_LABEL) + return 0; + + /* Uninstall always removes all dynamic NHLFEs. */ + return lsp_uninstall(zvrf, fec->label); +} + +/* + * Add an NHLFE to an LSP, return the newly-added object. This path only changes + * the LSP object - nothing is scheduled for processing, for example. + */ +struct zebra_nhlfe * +zebra_mpls_lsp_add_nhlfe(struct zebra_lsp *lsp, enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, + const mpls_label_t *out_labels) +{ + /* Just a public pass-through to the internal implementation */ + return nhlfe_add(lsp, lsp_type, gtype, gate, ifindex, num_labels, + out_labels, false /*backup*/); +} + +/* + * Add a backup NHLFE to an LSP, return the newly-added object. + * This path only changes the LSP object - nothing is scheduled for + * processing, for example. + */ +struct zebra_nhlfe *zebra_mpls_lsp_add_backup_nhlfe( + struct zebra_lsp *lsp, enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, const union g_addr *gate, ifindex_t ifindex, + uint8_t num_labels, const mpls_label_t *out_labels) +{ + /* Just a public pass-through to the internal implementation */ + return nhlfe_add(lsp, lsp_type, gtype, gate, ifindex, num_labels, + out_labels, true); +} + +/* + * Add an NHLFE to an LSP based on a nexthop; return the newly-added object + */ +struct zebra_nhlfe *zebra_mpls_lsp_add_nh(struct zebra_lsp *lsp, + enum lsp_types_t lsp_type, + const struct nexthop *nh) +{ + struct zebra_nhlfe *nhlfe; + + if (nh->nh_label == NULL || nh->nh_label->num_labels == 0) + return NULL; + + nhlfe = nhlfe_add(lsp, lsp_type, nh->type, &nh->gate, nh->ifindex, + nh->nh_label->num_labels, nh->nh_label->label, + false /*backup*/); + + return nhlfe; +} + +/* + * Add a backup NHLFE to an LSP based on a nexthop; + * return the newly-added object. + */ +struct zebra_nhlfe *zebra_mpls_lsp_add_backup_nh(struct zebra_lsp *lsp, + enum lsp_types_t lsp_type, + const struct nexthop *nh) +{ + struct zebra_nhlfe *nhlfe; + + if (nh->nh_label == NULL || nh->nh_label->num_labels == 0) + return NULL; + + nhlfe = nhlfe_add(lsp, lsp_type, nh->type, &nh->gate, + nh->ifindex, nh->nh_label->num_labels, + nh->nh_label->label, true); + + return nhlfe; +} + +/* + * Free an allocated NHLFE + */ +void zebra_mpls_nhlfe_free(struct zebra_nhlfe *nhlfe) +{ + /* Just a pass-through to the internal implementation */ + nhlfe_free(nhlfe); +} + +/* + * Registration from a client for the label binding for a FEC. If a binding + * already exists, it is informed to the client. + * NOTE: If there is a manually configured label binding, that is used. + * Otherwise, if a label index is specified, it means we have to allocate the + * label from a locally configured label block (SRGB), if one exists and index + * is acceptable. If no label index then just register the specified label. + * NOTE2: Either label or label_index is expected to be set to MPLS_INVALID_* + * by the calling function. Register requests with both will be rejected. + */ +int zebra_mpls_fec_register(struct zebra_vrf *zvrf, struct prefix *p, + uint32_t label, uint32_t label_index, + struct zserv *client) +{ + struct route_table *table; + struct zebra_fec *fec; + bool new_client; + bool label_change = false; + uint32_t old_label; + bool have_label_index = (label_index != MPLS_INVALID_LABEL_INDEX); + bool is_configured_fec = false; /* indicate statically configured FEC */ + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(p))]; + if (!table) + return -1; + + if (label != MPLS_INVALID_LABEL && have_label_index) { + flog_err( + EC_ZEBRA_FEC_LABEL_INDEX_LABEL_CONFLICT, + "Rejecting FEC register for %pFX with both label %u and Label Index %u specified, client %s", + p, label, label_index, + zebra_route_string(client->proto)); + return -1; + } + + /* Locate FEC */ + fec = fec_find(table, p); + if (!fec) { + fec = fec_add(table, p, label, 0, label_index); + if (!fec) { + flog_err( + EC_ZEBRA_FEC_ADD_FAILED, + "Failed to add FEC %pFX upon register, client %s", + p, zebra_route_string(client->proto)); + return -1; + } + + old_label = MPLS_INVALID_LABEL; + new_client = true; + } else { + /* Check if the FEC has been statically defined in the config */ + is_configured_fec = fec->flags & FEC_FLAG_CONFIGURED; + /* Client may register same FEC with different label index. */ + new_client = + (listnode_lookup(fec->client_list, client) == NULL); + if (!new_client && fec->label_index == label_index + && fec->label == label) + /* Duplicate register */ + return 0; + + /* Save current label, update the FEC */ + old_label = fec->label; + fec->label_index = label_index; + } + + if (new_client) + listnode_add(fec->client_list, client); + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("FEC %pFX label%s %u %s by client %s%s", p, + have_label_index ? " index" : "", + have_label_index ? label_index : label, + new_client ? "registered" : "updated", + zebra_route_string(client->proto), + is_configured_fec + ? ", but using statically configured label" + : ""); + + /* If not a statically configured FEC, derive the local label + * from label index or use the provided label + */ + if (!is_configured_fec) { + if (have_label_index) + fec_derive_label_from_index(zvrf, fec); + else + fec->label = label; + + /* If no label change, exit. */ + if (fec->label == old_label) + return 0; + + label_change = true; + } + + /* If new client or label change, update client and install or uninstall + * label forwarding entry as needed. + */ + /* Inform client of label, if needed. */ + if ((new_client && fec->label != MPLS_INVALID_LABEL) || label_change) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Update client label %u", fec->label); + fec_send(fec, client); + } + + if (new_client || label_change) + return fec_change_update_lsp(zvrf, fec, old_label); + + return 0; +} + +/* + * Deregistration from a client for the label binding for a FEC. The FEC + * itself is deleted if no other registered clients exist and there is no + * label bound to the FEC. + */ +int zebra_mpls_fec_unregister(struct zebra_vrf *zvrf, struct prefix *p, + struct zserv *client) +{ + struct route_table *table; + struct zebra_fec *fec; + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(p))]; + if (!table) + return -1; + + fec = fec_find(table, p); + if (!fec) { + flog_err(EC_ZEBRA_FEC_RM_FAILED, + "Failed to find FEC %pFX upon unregister, client %s", + p, zebra_route_string(client->proto)); + return -1; + } + + listnode_delete(fec->client_list, client); + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("FEC %pFX unregistered by client %s", p, + zebra_route_string(client->proto)); + + /* If not a configured entry, delete the FEC if no other clients. Before + * deleting, see if any LSP needs to be uninstalled. + */ + if (!(fec->flags & FEC_FLAG_CONFIGURED) + && list_isempty(fec->client_list)) { + mpls_label_t old_label = fec->label; + fec->label = MPLS_INVALID_LABEL; /* reset */ + fec_change_update_lsp(zvrf, fec, old_label); + fec_del(fec); + } + + return 0; +} + +/* + * Cleanup any FECs registered by this client. + */ +static int zebra_mpls_cleanup_fecs_for_client(struct zserv *client) +{ + struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT); + struct route_node *rn; + struct zebra_fec *fec; + struct listnode *node; + struct zserv *fec_client; + int af; + + for (af = AFI_IP; af < AFI_MAX; af++) { + if (zvrf->fec_table[af] == NULL) + continue; + + for (rn = route_top(zvrf->fec_table[af]); rn; + rn = route_next(rn)) { + fec = rn->info; + if (!fec || list_isempty(fec->client_list)) + continue; + + for (ALL_LIST_ELEMENTS_RO(fec->client_list, node, + fec_client)) { + if (fec_client == client) { + listnode_delete(fec->client_list, + fec_client); + if (!(fec->flags & FEC_FLAG_CONFIGURED) + && list_isempty(fec->client_list)) + fec_del(fec); + break; + } + } + } + } + + return 0; +} + +struct lsp_uninstall_args { + struct hash *lsp_table; + enum lsp_types_t type; +}; + +/* + * Cleanup MPLS labels registered by this client. + */ +static int zebra_mpls_cleanup_zclient_labels(struct zserv *client) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + struct lsp_uninstall_args args; + + zvrf = vrf->info; + if (!zvrf) + continue; + + /* Cleanup LSPs. */ + args.lsp_table = zvrf->lsp_table; + args.type = lsp_type_from_re_type(client->proto); + hash_iterate(zvrf->lsp_table, mpls_lsp_uninstall_all_type, + &args); + + /* Cleanup FTNs. */ + mpls_ftn_uninstall_all(zvrf, AFI_IP, + lsp_type_from_re_type(client->proto)); + mpls_ftn_uninstall_all(zvrf, AFI_IP6, + lsp_type_from_re_type(client->proto)); + } + + return 0; +} + +/* + * Return FEC (if any) to which this label is bound. + * Note: Only works for per-prefix binding and when the label is not + * implicit-null. + * TODO: Currently walks entire table, can optimize later with another + * hash.. + */ +struct zebra_fec *zebra_mpls_fec_for_label(struct zebra_vrf *zvrf, + mpls_label_t label) +{ + struct route_node *rn; + struct zebra_fec *fec; + int af; + + for (af = AFI_IP; af < AFI_MAX; af++) { + if (zvrf->fec_table[af] == NULL) + continue; + + for (rn = route_top(zvrf->fec_table[af]); rn; + rn = route_next(rn)) { + if (!rn->info) + continue; + fec = rn->info; + if (fec->label == label) + return fec; + } + } + + return NULL; +} + +/* + * Inform if specified label is currently bound to a FEC or not. + */ +int zebra_mpls_label_already_bound(struct zebra_vrf *zvrf, mpls_label_t label) +{ + return (zebra_mpls_fec_for_label(zvrf, label) ? 1 : 0); +} + +/* + * Add static FEC to label binding. If there are clients registered for this + * FEC, notify them. If there are labeled routes for this FEC, install the + * label forwarding entry. +*/ +int zebra_mpls_static_fec_add(struct zebra_vrf *zvrf, struct prefix *p, + mpls_label_t in_label) +{ + struct route_table *table; + struct zebra_fec *fec; + mpls_label_t old_label; + int ret = 0; + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(p))]; + if (!table) + return -1; + + /* Update existing FEC or create a new one. */ + fec = fec_find(table, p); + if (!fec) { + fec = fec_add(table, p, in_label, FEC_FLAG_CONFIGURED, + MPLS_INVALID_LABEL_INDEX); + if (!fec) { + flog_err(EC_ZEBRA_FEC_ADD_FAILED, + "Failed to add FEC %pFX upon config", p); + return -1; + } + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Add fec %pFX label %u", p, in_label); + } else { + fec->flags |= FEC_FLAG_CONFIGURED; + if (fec->label == in_label) + /* Duplicate config */ + return 0; + + /* Label change, update clients. */ + old_label = fec->label; + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Update fec %pFX new label %u", p, in_label); + + fec->label = in_label; + fec_update_clients(fec); + + /* Update label forwarding entries appropriately */ + ret = fec_change_update_lsp(zvrf, fec, old_label); + } + + return ret; +} + +/* + * Remove static FEC to label binding. If there are no clients registered + * for this FEC, delete the FEC; else notify clients + * Note: Upon delete of static binding, if label index exists for this FEC, + * client may need to be updated with derived label. + */ +int zebra_mpls_static_fec_del(struct zebra_vrf *zvrf, struct prefix *p) +{ + struct route_table *table; + struct zebra_fec *fec; + mpls_label_t old_label; + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(p))]; + if (!table) + return -1; + + fec = fec_find(table, p); + if (!fec) { + flog_err(EC_ZEBRA_FEC_RM_FAILED, + "Failed to find FEC %pFX upon delete", p); + return -1; + } + + if (IS_ZEBRA_DEBUG_MPLS) { + zlog_debug("Delete fec %pFX label %u label index %u", p, + fec->label, fec->label_index); + } + + old_label = fec->label; + fec->flags &= ~FEC_FLAG_CONFIGURED; + fec->label = MPLS_INVALID_LABEL; + + /* If no client exists, just delete the FEC. */ + if (list_isempty(fec->client_list)) { + fec_del(fec); + return 0; + } + + /* Derive the local label (from label index) or reset it. */ + fec_derive_label_from_index(zvrf, fec); + + /* If there is a label change, update clients. */ + if (fec->label == old_label) + return 0; + fec_update_clients(fec); + + /* Update label forwarding entries appropriately */ + return fec_change_update_lsp(zvrf, fec, old_label); +} + +/* + * Display MPLS FEC to label binding configuration (VTY command handler). + */ +int zebra_mpls_write_fec_config(struct vty *vty, struct zebra_vrf *zvrf) +{ + struct route_node *rn; + int af; + struct zebra_fec *fec; + int write = 0; + + for (af = AFI_IP; af < AFI_MAX; af++) { + if (zvrf->fec_table[af] == NULL) + continue; + + for (rn = route_top(zvrf->fec_table[af]); rn; + rn = route_next(rn)) { + if (!rn->info) + continue; + + char lstr[BUFSIZ]; + fec = rn->info; + + if (!(fec->flags & FEC_FLAG_CONFIGURED)) + continue; + + write = 1; + vty_out(vty, "mpls label bind %pFX %s\n", &rn->p, + label2str(fec->label, lstr, BUFSIZ)); + } + } + + return write; +} + +/* + * Display MPLS FEC to label binding (VTY command handler). + */ +void zebra_mpls_print_fec_table(struct vty *vty, struct zebra_vrf *zvrf) +{ + struct route_node *rn; + int af; + + for (af = AFI_IP; af < AFI_MAX; af++) { + if (zvrf->fec_table[af] == NULL) + continue; + + for (rn = route_top(zvrf->fec_table[af]); rn; + rn = route_next(rn)) { + if (!rn->info) + continue; + fec_print(rn->info, vty); + } + } +} + +/* + * Display MPLS FEC to label binding for a specific FEC (VTY command handler). + */ +void zebra_mpls_print_fec(struct vty *vty, struct zebra_vrf *zvrf, + struct prefix *p) +{ + struct route_table *table; + struct route_node *rn; + + table = zvrf->fec_table[family2afi(PREFIX_FAMILY(p))]; + if (!table) + return; + + apply_mask(p); + rn = route_node_lookup(table, p); + if (!rn) + return; + + route_unlock_node(rn); + if (!rn->info) + return; + + fec_print(rn->info, vty); +} + +static void mpls_zebra_nhe_update(struct route_entry *re, afi_t afi, + struct nhg_hash_entry *new_nhe) +{ + struct nhg_hash_entry *nhe; + + nhe = zebra_nhg_rib_find_nhe(new_nhe, afi); + + route_entry_update_nhe(re, nhe); +} + +static bool ftn_update_nexthop(bool add_p, struct nexthop *nexthop, + enum lsp_types_t type, + const struct zapi_nexthop *znh) +{ + if (add_p && nexthop->nh_label_type == ZEBRA_LSP_NONE) + nexthop_add_labels(nexthop, type, znh->label_num, znh->labels); + else if (!add_p && nexthop->nh_label_type == type) + nexthop_del_labels(nexthop); + else + return false; + + return true; +} + +void zebra_mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, + struct prefix *prefix, uint8_t route_type, + uint8_t route_instance) +{ + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + struct nexthop *nexthop; + struct nhg_hash_entry *new_nhe; + afi_t afi = family2afi(prefix->family); + + /* Lookup table. */ + table = zebra_vrf_table(afi, SAFI_UNICAST, zvrf_id(zvrf)); + if (!table) + return; + + /* Lookup existing route */ + rn = route_node_get(table, prefix); + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + if (re->type == route_type && re->instance == route_instance) + break; + } + if (re == NULL) + return; + + /* + * Nexthops are now shared by multiple routes, so we have to make + * a local copy, modify the copy, then update the route. + */ + new_nhe = zebra_nhe_copy(re->nhe, 0); + + for (nexthop = new_nhe->nhg.nexthop; nexthop; nexthop = nexthop->next) + nexthop_del_labels(nexthop); + + /* Update backup routes/nexthops also, if present. */ + if (zebra_nhg_get_backup_nhg(new_nhe) != NULL) { + for (nexthop = new_nhe->backup_info->nhe->nhg.nexthop; nexthop; + nexthop = nexthop->next) + nexthop_del_labels(nexthop); + } + + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + SET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED); + + /* This will create (or ref) a new nhe, so we will discard the local + * temporary nhe + */ + mpls_zebra_nhe_update(re, afi, new_nhe); + + zebra_nhg_free(new_nhe); + + rib_queue_add(rn); +} + +/* + * Iterate through a list of nexthops, for a match for 'znh'. If found, + * update its labels according to 'add_p', and return 'true' if successful. + */ +static bool ftn_update_znh(bool add_p, enum lsp_types_t type, + struct nexthop *head, const struct zapi_nexthop *znh) +{ + bool found = false, success = false; + struct nexthop *nexthop; + + for (nexthop = head; nexthop; nexthop = nexthop->next) { + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (znh->type != NEXTHOP_TYPE_IPV4 + && znh->type != NEXTHOP_TYPE_IPV4_IFINDEX) + continue; + if (!IPV4_ADDR_SAME(&nexthop->gate.ipv4, + &znh->gate.ipv4)) + continue; + if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX + && nexthop->ifindex != znh->ifindex) + continue; + + found = true; + + if (!ftn_update_nexthop(add_p, nexthop, type, znh)) + break; + + success = true; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (znh->type != NEXTHOP_TYPE_IPV6 + && znh->type != NEXTHOP_TYPE_IPV6_IFINDEX) + continue; + if (!IPV6_ADDR_SAME(&nexthop->gate.ipv6, + &znh->gate.ipv6)) + continue; + if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX + && nexthop->ifindex != znh->ifindex) + continue; + + found = true; + + if (!ftn_update_nexthop(add_p, nexthop, type, znh)) + break; + success = true; + break; + case NEXTHOP_TYPE_IFINDEX: + if (znh->type != NEXTHOP_TYPE_IFINDEX) + continue; + if (nexthop->ifindex != znh->ifindex) + continue; + + found = true; + + if (!ftn_update_nexthop(add_p, nexthop, type, znh)) + break; + success = true; + break; + case NEXTHOP_TYPE_BLACKHOLE: + /* Not valid */ + continue; + } + + if (found) + break; + } + + return success; +} + +/* + * Install/uninstall LSP and (optionally) FEC-To-NHLFE (FTN) bindings, + * using zapi message info. + * There are several changes that need to be made, in several zebra + * data structures, so we want to do all the work required at once. + */ +void zebra_mpls_zapi_labels_process(bool add_p, struct zebra_vrf *zvrf, + const struct zapi_labels *zl) +{ + int i, counter, ret = 0; + char buf[NEXTHOP_STRLEN]; + const struct zapi_nexthop *znh; + struct route_table *table; + struct route_node *rn = NULL; + struct route_entry *re = NULL; + struct nhg_hash_entry *new_nhe = NULL; + bool found; + afi_t afi = AFI_IP; + const struct prefix *prefix = NULL; + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp = NULL; + + /* Prep LSP for add case */ + if (add_p) { + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return; + + /* Find or create LSP object */ + tmp_ile.in_label = zl->local_label; + lsp = hash_get(lsp_table, &tmp_ile, lsp_alloc); + } + + /* Prep for route/FEC update if requested */ + if (CHECK_FLAG(zl->message, ZAPI_LABELS_FTN)) { + prefix = &zl->route.prefix; + + afi = family2afi(prefix->family); + + /* Lookup table. */ + table = zebra_vrf_table(afi, SAFI_UNICAST, zvrf_id(zvrf)); + if (table) { + /* Lookup existing route */ + rn = route_node_get(table, prefix); + RNODE_FOREACH_RE(rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + if (re->type == zl->route.type && + re->instance == zl->route.instance) + break; + } + } + + if (re) { + /* + * Copy over current nexthops into a temporary group. + * We can't just change the values here since the nhgs + * are shared and if the labels change, we'll need + * to find or create a new nhg. We need to create + * a whole temporary group, make changes to it, + * then attach that to the route. + */ + new_nhe = zebra_nhe_copy(re->nhe, 0); + + } else { + /* + * The old version of the zapi code + * attempted to manage LSPs before trying to + * find a route/FEC, so we'll continue that way. + */ + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%s: FTN update requested: no route for prefix %pFX", + __func__, prefix); + } + } + + /* + * Use info from the zapi nexthops to add/replace/remove LSP/FECs + */ + + counter = 0; + for (i = 0; i < zl->nexthop_num; i++) { + + znh = &zl->nexthops[i]; + + /* Attempt LSP update */ + if (add_p) + ret = lsp_znh_install(lsp, zl->type, znh); + else + ret = mpls_lsp_uninstall(zvrf, zl->type, + zl->local_label, znh->type, + &znh->gate, znh->ifindex, + false); + if (ret < 0) { + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_MPLS) { + zapi_nexthop2str(znh, buf, sizeof(buf)); + zlog_debug("%s: Unable to %sinstall LSP: label %u, znh %s", + __func__, (add_p ? "" : "un"), + zl->local_label, buf); + } + continue; + } + + /* Attempt route/FEC update if requested */ + if (re == NULL) + continue; + + /* Search the route's nexthops for a match, and update it. */ + found = ftn_update_znh(add_p, zl->type, new_nhe->nhg.nexthop, + znh); + if (found) { + counter++; + } else if (IS_ZEBRA_DEBUG_RECV | IS_ZEBRA_DEBUG_MPLS) { + zapi_nexthop2str(znh, buf, sizeof(buf)); + zlog_debug( + "%s: Unable to update FEC: prefix %pFX, label %u, znh %s", + __func__, prefix, zl->local_label, buf); + } + } + + /* + * Process backup LSPs/nexthop entries also. We associate backup + * LSP info with backup nexthops. + */ + if (!CHECK_FLAG(zl->message, ZAPI_LABELS_HAS_BACKUPS)) + goto znh_done; + + for (i = 0; i < zl->backup_nexthop_num; i++) { + + znh = &zl->backup_nexthops[i]; + + if (add_p) + ret = lsp_backup_znh_install(lsp, zl->type, znh); + else + ret = mpls_lsp_uninstall(zvrf, zl->type, + zl->local_label, + znh->type, &znh->gate, + znh->ifindex, true); + + if (ret < 0) { + if (IS_ZEBRA_DEBUG_RECV || + IS_ZEBRA_DEBUG_MPLS) { + zapi_nexthop2str(znh, buf, sizeof(buf)); + zlog_debug("%s: Unable to %sinstall backup LSP: label %u, znh %s", + __func__, (add_p ? "" : "un"), + zl->local_label, buf); + } + continue; + } + + /* Attempt backup nexthop/FEC update if requested */ + if (re == NULL || zebra_nhg_get_backup_nhg(new_nhe) == NULL) + continue; + + /* Search the route's backup nexthops for a match + * and update it. + */ + found = ftn_update_znh(add_p, zl->type, + new_nhe->backup_info->nhe->nhg.nexthop, + znh); + if (found) { + counter++; + } else if (IS_ZEBRA_DEBUG_RECV | IS_ZEBRA_DEBUG_MPLS) { + zapi_nexthop2str(znh, buf, sizeof(buf)); + zlog_debug( + "%s: Unable to update backup FEC: prefix %pFX, label %u, znh %s", + __func__, prefix, zl->local_label, buf); + } + } + +znh_done: + + /* + * If we made changes, update the route, and schedule it + * for rib processing + */ + if (re != NULL && counter > 0) { + assert(rn != NULL); + + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + SET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED); + + mpls_zebra_nhe_update(re, afi, new_nhe); + + rib_queue_add(rn); + } + + if (new_nhe) + zebra_nhg_free(new_nhe); +} + +/* + * Install/update a NHLFE for an LSP in the forwarding table. This may be + * a new LSP entry or a new NHLFE for an existing in-label or an update of + * the out-label for an existing NHLFE (update case). + */ +static struct zebra_nhlfe * +lsp_add_nhlfe(struct zebra_lsp *lsp, enum lsp_types_t type, + uint8_t num_out_labels, const mpls_label_t *out_labels, + enum nexthop_types_t gtype, const union g_addr *gate, + ifindex_t ifindex, bool is_backup) +{ + struct zebra_nhlfe *nhlfe; + char buf[MPLS_LABEL_STRLEN]; + const char *backup_str; + + if (is_backup) { + nhlfe = nhlfe_find(&lsp->backup_nhlfe_list, type, gtype, + gate, ifindex); + backup_str = "backup "; + } else { + nhlfe = nhlfe_find(&lsp->nhlfe_list, type, gtype, gate, + ifindex); + backup_str = ""; + } + + if (nhlfe) { + struct nexthop *nh = nhlfe->nexthop; + + assert(nh); + assert(nh->nh_label); + + /* Clear deleted flag (in case it was set) */ + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED); + if (nh->nh_label->num_labels == num_out_labels + && !memcmp(nh->nh_label->label, out_labels, + sizeof(mpls_label_t) * num_out_labels)) + /* No change */ + return nhlfe; + + if (IS_ZEBRA_DEBUG_MPLS) { + char buf2[MPLS_LABEL_STRLEN]; + char buf3[MPLS_LABEL_STRLEN]; + + nhlfe2str(nhlfe, buf, sizeof(buf)); + mpls_label2str(num_out_labels, out_labels, buf2, + sizeof(buf2), 0); + mpls_label2str(nh->nh_label->num_labels, + nh->nh_label->label, buf3, sizeof(buf3), + 0); + + zlog_debug("LSP in-label %u type %d %snexthop %s out-label(s) changed to %s (old %s)", + lsp->ile.in_label, type, backup_str, buf, + buf2, buf3); + } + + /* Update out label(s), trigger processing. */ + if (nh->nh_label->num_labels == num_out_labels) + memcpy(nh->nh_label->label, out_labels, + sizeof(mpls_label_t) * num_out_labels); + else { + nexthop_del_labels(nh); + nexthop_add_labels(nh, type, num_out_labels, + out_labels); + } + } else { + /* Add LSP entry to this nexthop */ + nhlfe = nhlfe_add(lsp, type, gtype, gate, ifindex, + num_out_labels, out_labels, is_backup); + if (!nhlfe) + return NULL; + + if (IS_ZEBRA_DEBUG_MPLS) { + char buf2[MPLS_LABEL_STRLEN]; + + nhlfe2str(nhlfe, buf, sizeof(buf)); + mpls_label2str(num_out_labels, out_labels, buf2, + sizeof(buf2), 0); + + zlog_debug("Add LSP in-label %u type %d %snexthop %s out-label(s) %s", + lsp->ile.in_label, type, backup_str, buf, + buf2); + } + + lsp->addr_family = NHLFE_FAMILY(nhlfe); + } + + /* Mark NHLFE, queue LSP for processing. */ + SET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + + return nhlfe; +} + +/* + * Install an LSP and forwarding entry; used primarily + * from vrf zapi message processing. + */ +int mpls_lsp_install(struct zebra_vrf *zvrf, enum lsp_types_t type, + mpls_label_t in_label, uint8_t num_out_labels, + const mpls_label_t *out_labels, enum nexthop_types_t gtype, + const union g_addr *gate, ifindex_t ifindex) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return -1; + + /* Find or create LSP object */ + tmp_ile.in_label = in_label; + lsp = hash_get(lsp_table, &tmp_ile, lsp_alloc); + + nhlfe = lsp_add_nhlfe(lsp, type, num_out_labels, out_labels, gtype, + gate, ifindex, false /*backup*/); + if (nhlfe == NULL) + return -1; + + /* Queue LSP for processing. */ + if (lsp_processq_add(lsp)) + return -1; + + return 0; +} + +/* + * Install or replace NHLFE, using info from zapi nexthop + */ +static int lsp_znh_install(struct zebra_lsp *lsp, enum lsp_types_t type, + const struct zapi_nexthop *znh) +{ + struct zebra_nhlfe *nhlfe; + + nhlfe = lsp_add_nhlfe(lsp, type, znh->label_num, znh->labels, + znh->type, &znh->gate, znh->ifindex, + false /*backup*/); + if (nhlfe == NULL) + return -1; + + /* Update backup info if present */ + if (CHECK_FLAG(znh->flags, ZAPI_NEXTHOP_FLAG_HAS_BACKUP)) { + if (znh->backup_num > NEXTHOP_MAX_BACKUPS) { + nhlfe_del(nhlfe); + return -1; + } + + nhlfe->nexthop->backup_num = znh->backup_num; + memcpy(nhlfe->nexthop->backup_idx, znh->backup_idx, + znh->backup_num); + SET_FLAG(nhlfe->nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + } else { + /* Ensure there's no stale backup info */ + UNSET_FLAG(nhlfe->nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + nhlfe->nexthop->backup_num = 0; + } + + /* Queue LSP for processing. */ + if (lsp_processq_add(lsp)) + return -1; + + return 0; +} + +/* + * Install/update backup NHLFE for an LSP, using info from a zapi message. + */ +static int lsp_backup_znh_install(struct zebra_lsp *lsp, enum lsp_types_t type, + const struct zapi_nexthop *znh) +{ + struct zebra_nhlfe *nhlfe; + + nhlfe = lsp_add_nhlfe(lsp, type, znh->label_num, + znh->labels, znh->type, &znh->gate, + znh->ifindex, true /*backup*/); + if (nhlfe == NULL) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("%s: unable to add backup nhlfe, label: %u", + __func__, lsp->ile.in_label); + return -1; + } + + /* Queue LSP for processing. */ + if (lsp_processq_add(lsp)) + return -1; + + return 0; +} + +struct zebra_lsp *mpls_lsp_find(struct zebra_vrf *zvrf, mpls_label_t in_label) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return NULL; + + /* If entry is not present, exit. */ + tmp_ile.in_label = in_label; + return hash_lookup(lsp_table, &tmp_ile); +} + +/* + * Uninstall a particular NHLFE in the forwarding table. If this is + * the only NHLFE, the entire LSP forwarding entry has to be deleted. + */ +int mpls_lsp_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, + mpls_label_t in_label, enum nexthop_types_t gtype, + const union g_addr *gate, ifindex_t ifindex, + bool backup_p) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + char buf[NEXTHOP_STRLEN]; + bool schedule_lsp = false; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return -1; + + /* If entry is not present, exit. */ + tmp_ile.in_label = in_label; + lsp = hash_lookup(lsp_table, &tmp_ile); + if (!lsp) + return 0; + + if (backup_p) + nhlfe = nhlfe_find(&lsp->backup_nhlfe_list, type, gtype, + gate, ifindex); + else + nhlfe = nhlfe_find(&lsp->nhlfe_list, type, gtype, gate, + ifindex); + if (!nhlfe) + return 0; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, sizeof(buf)); + zlog_debug("Del LSP in-label %u type %d nexthop %s flags 0x%x", + in_label, type, buf, nhlfe->flags); + } + + if (CHECK_FLAG(lsp->flags, LSP_FLAG_INSTALLED) || + CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)) + schedule_lsp = true; + + /* Mark NHLFE for delete or directly delete, as appropriate. */ + if (schedule_lsp) { + SET_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED); + UNSET_FLAG(nhlfe->flags, NHLFE_FLAG_CHANGED); + + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Schedule LSP in-label %u flags 0x%x", + lsp->ile.in_label, lsp->flags); + if (lsp_processq_add(lsp)) + return -1; + } else { + nhlfe_del(nhlfe); + + /* Free LSP entry if no other NHLFEs and not scheduled. */ + lsp_check_free(lsp_table, &lsp); + } + return 0; +} + +int mpls_lsp_uninstall_all_vrf(struct zebra_vrf *zvrf, enum lsp_types_t type, + mpls_label_t in_label) +{ + struct hash *lsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return -1; + + /* If entry is not present, exit. */ + tmp_ile.in_label = in_label; + lsp = hash_lookup(lsp_table, &tmp_ile); + if (!lsp) + return 0; + + return mpls_lsp_uninstall_all(lsp_table, lsp, type); +} + +/* + * Uninstall all NHLFEs for a particular LSP forwarding entry. + * If no other NHLFEs exist, the entry would be deleted. + */ +static void mpls_lsp_uninstall_all_type(struct hash_bucket *bucket, void *ctxt) +{ + struct lsp_uninstall_args *args = ctxt; + struct zebra_lsp *lsp; + struct hash *lsp_table; + + lsp = (struct zebra_lsp *)bucket->data; + if (nhlfe_list_first(&lsp->nhlfe_list) == NULL) + return; + + lsp_table = args->lsp_table; + if (!lsp_table) + return; + + mpls_lsp_uninstall_all(lsp_table, lsp, args->type); +} + +/* + * Uninstall all FEC-To-NHLFE (FTN) bindings of the given address-family and + * LSP type. + */ +static void mpls_ftn_uninstall_all(struct zebra_vrf *zvrf, + int afi, enum lsp_types_t lsp_type) +{ + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + struct nexthop *nexthop; + struct nexthop_group *nhg; + bool update; + + /* Process routes of interested address-families. */ + table = zebra_vrf_table(afi, SAFI_UNICAST, zvrf_id(zvrf)); + if (!table) + return; + + for (rn = route_top(table); rn; rn = route_next(rn)) { + update = false; + + RNODE_FOREACH_RE (rn, re) { + struct nhg_hash_entry *new_nhe; + + new_nhe = zebra_nhe_copy(re->nhe, 0); + + nhg = &new_nhe->nhg; + for (nexthop = nhg->nexthop; nexthop; + nexthop = nexthop->next) { + if (nexthop->nh_label_type != lsp_type) + continue; + + nexthop_del_labels(nexthop); + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + SET_FLAG(re->status, + ROUTE_ENTRY_LABELS_CHANGED); + update = true; + } + + /* Check for backup info and update that also */ + nhg = zebra_nhg_get_backup_nhg(new_nhe); + if (nhg != NULL) { + for (nexthop = nhg->nexthop; nexthop; + nexthop = nexthop->next) { + if (nexthop->nh_label_type != lsp_type) + continue; + + nexthop_del_labels(nexthop); + SET_FLAG(re->status, + ROUTE_ENTRY_CHANGED); + SET_FLAG(re->status, + ROUTE_ENTRY_LABELS_CHANGED); + update = true; + } + } + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)) + mpls_zebra_nhe_update(re, afi, new_nhe); + + zebra_nhg_free(new_nhe); + } + + if (update) + rib_queue_add(rn); + } +} + +#if defined(HAVE_CUMULUS) +/* + * Check that the label values used in LSP creation are consistent. The + * main criteria is that if there is ECMP, the label operation must still + * be consistent - i.e., all paths either do a swap or do PHP. This is due + * to current HW restrictions. + */ +int zebra_mpls_lsp_label_consistent(struct zebra_vrf *zvrf, + mpls_label_t in_label, + mpls_label_t out_label, + enum nexthop_types_t gtype, + union g_addr *gate, ifindex_t ifindex) +{ + struct hash *slsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + const struct nexthop *nh; + + /* Lookup table. */ + slsp_table = zvrf->slsp_table; + if (!slsp_table) + return 0; + + /* If entry is not present, exit. */ + tmp_ile.in_label = in_label; + lsp = hash_lookup(slsp_table, &tmp_ile); + if (!lsp) + return 1; + + nhlfe = nhlfe_find(&lsp->nhlfe_list, ZEBRA_LSP_STATIC, + gtype, gate, ifindex); + if (nhlfe) { + nh = nhlfe->nexthop; + + if (nh == NULL || nh->nh_label == NULL) + return 0; + + if (nh->nh_label->label[0] == out_label) + return 1; + + /* If not only NHLFE, cannot allow label change. */ + if (nhlfe != nhlfe_list_first(&lsp->nhlfe_list) || + nhlfe_list_next(&lsp->nhlfe_list, nhlfe) != NULL) + return 0; + } else { + /* If other NHLFEs exist, label operation must match. */ + nhlfe = nhlfe_list_first(&lsp->nhlfe_list); + if (nhlfe != NULL) { + int cur_op, new_op; + + nh = nhlfe->nexthop; + + if (nh == NULL || nh->nh_label == NULL) + return 0; + + cur_op = (nh->nh_label->label[0] == + MPLS_LABEL_IMPLICIT_NULL); + new_op = (out_label == MPLS_LABEL_IMPLICIT_NULL); + if (cur_op != new_op) + return 0; + } + } + + /* Label values are good. */ + return 1; +} +#endif /* HAVE_CUMULUS */ + +/* + * Add static LSP entry. This may be the first entry for this incoming label + * or an additional nexthop; an existing entry may also have outgoing label + * changed. + * Note: The label operation (swap or PHP) is common for the LSP entry (all + * NHLFEs). + */ +int zebra_mpls_static_lsp_add(struct zebra_vrf *zvrf, mpls_label_t in_label, + mpls_label_t out_label, + enum nexthop_types_t gtype, union g_addr *gate, + ifindex_t ifindex) +{ + struct hash *slsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + char buf[BUFSIZ]; + + /* Lookup table. */ + slsp_table = zvrf->slsp_table; + if (!slsp_table) + return -1; + + /* Find or create LSP. */ + tmp_ile.in_label = in_label; + lsp = hash_get(slsp_table, &tmp_ile, lsp_alloc); + + nhlfe = nhlfe_find(&lsp->nhlfe_list, ZEBRA_LSP_STATIC, gtype, gate, + ifindex); + if (nhlfe) { + struct nexthop *nh = nhlfe->nexthop; + + assert(nh); + assert(nh->nh_label); + + /* Compare existing nexthop */ + if (nh->nh_label->num_labels == 1 && + nh->nh_label->label[0] == out_label) + /* No change */ + return 0; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, sizeof(buf)); + zlog_debug( + "Upd static LSP in-label %u nexthop %s out-label %u (old %u)", + in_label, buf, out_label, + nh->nh_label->label[0]); + } + if (nh->nh_label->num_labels == 1) + nh->nh_label->label[0] = out_label; + else { + nexthop_del_labels(nh); + nexthop_add_labels(nh, ZEBRA_LSP_STATIC, 1, &out_label); + } + + } else { + /* Add static LSP entry to this nexthop */ + nhlfe = nhlfe_add(lsp, ZEBRA_LSP_STATIC, gtype, gate, + ifindex, 1, &out_label, false /*backup*/); + if (!nhlfe) + return -1; + + if (IS_ZEBRA_DEBUG_MPLS) { + nhlfe2str(nhlfe, buf, sizeof(buf)); + zlog_debug( + "Add static LSP in-label %u nexthop %s out-label %u", + in_label, buf, out_label); + } + } + + /* (Re)Install LSP in the main table. */ + if (mpls_lsp_install(zvrf, ZEBRA_LSP_STATIC, in_label, 1, &out_label, + gtype, gate, ifindex)) + return -1; + + return 0; +} + +/* + * Delete static LSP entry. This may be the delete of one particular + * NHLFE for this incoming label or the delete of the entire entry (i.e., + * all NHLFEs). + * NOTE: Delete of the only NHLFE will also end up deleting the entire + * LSP configuration. + */ +int zebra_mpls_static_lsp_del(struct zebra_vrf *zvrf, mpls_label_t in_label, + enum nexthop_types_t gtype, union g_addr *gate, + ifindex_t ifindex) +{ + struct hash *slsp_table; + struct zebra_ile tmp_ile; + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + + /* Lookup table. */ + slsp_table = zvrf->slsp_table; + if (!slsp_table) + return -1; + + /* If entry is not present, exit. */ + tmp_ile.in_label = in_label; + lsp = hash_lookup(slsp_table, &tmp_ile); + if (!lsp) + return 0; + + /* Is it delete of entire LSP or a specific NHLFE? */ + if (gtype == NEXTHOP_TYPE_BLACKHOLE) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug("Del static LSP in-label %u", in_label); + + /* Uninstall entire LSP from the main table. */ + mpls_static_lsp_uninstall_all(zvrf, in_label); + + /* Delete all static NHLFEs */ + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + nhlfe_del(nhlfe); + } + } else { + /* Find specific NHLFE, exit if not found. */ + nhlfe = nhlfe_find(&lsp->nhlfe_list, ZEBRA_LSP_STATIC, + gtype, gate, ifindex); + if (!nhlfe) + return 0; + + if (IS_ZEBRA_DEBUG_MPLS) { + char buf[BUFSIZ]; + nhlfe2str(nhlfe, buf, sizeof(buf)); + zlog_debug("Del static LSP in-label %u nexthop %s", + in_label, buf); + } + + /* Uninstall LSP from the main table. */ + mpls_lsp_uninstall(zvrf, ZEBRA_LSP_STATIC, in_label, gtype, + gate, ifindex, false); + + /* Delete static LSP NHLFE */ + nhlfe_del(nhlfe); + } + + /* Remove entire static LSP entry if no NHLFE - valid in either case + * above. + */ + if (nhlfe_list_first(&lsp->nhlfe_list) == NULL) { + lsp = hash_release(slsp_table, &tmp_ile); + lsp_free_nhlfe(lsp); + XFREE(MTYPE_LSP, lsp); + } + + return 0; +} + +/* + * Schedule all MPLS label forwarding entries for processing. + * Called upon changes that may affect one or more of them such as + * interface or nexthop state changes. + */ +void zebra_mpls_lsp_schedule(struct zebra_vrf *zvrf) +{ + if (!zvrf) + return; + hash_iterate(zvrf->lsp_table, lsp_schedule, NULL); +} + +/* + * Display MPLS label forwarding table for a specific LSP + * (VTY command handler). + */ +void zebra_mpls_print_lsp(struct vty *vty, struct zebra_vrf *zvrf, + mpls_label_t label, bool use_json) +{ + struct hash *lsp_table; + struct zebra_lsp *lsp; + struct zebra_ile tmp_ile; + json_object *json = NULL; + + /* Lookup table. */ + lsp_table = zvrf->lsp_table; + if (!lsp_table) + return; + + /* If entry is not present, exit. */ + tmp_ile.in_label = label; + lsp = hash_lookup(lsp_table, &tmp_ile); + if (!lsp) + return; + + if (use_json) { + json = lsp_json(lsp); + vty_json(vty, json); + } else + lsp_print(vty, lsp); +} + +/* + * Display MPLS label forwarding table (VTY command handler). + */ +void zebra_mpls_print_lsp_table(struct vty *vty, struct zebra_vrf *zvrf, + bool use_json) +{ + char buf[BUFSIZ]; + json_object *json = NULL; + struct zebra_lsp *lsp = NULL; + struct zebra_nhlfe *nhlfe = NULL; + struct listnode *node = NULL; + struct list *lsp_list = hash_get_sorted_list(zvrf->lsp_table, lsp_cmp); + + if (use_json) { + json = json_object_new_object(); + + for (ALL_LIST_ELEMENTS_RO(lsp_list, node, lsp)) + json_object_object_add( + json, label2str(lsp->ile.in_label, buf, + sizeof(buf)), + lsp_json(lsp)); + + vty_json(vty, json); + } else { + struct ttable *tt; + + /* Prepare table. */ + tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]); + ttable_add_row(tt, "Inbound Label|Type|Nexthop|Outbound Label"); + tt->style.cell.rpad = 2; + tt->style.corner = '+'; + ttable_restyle(tt); + ttable_rowseps(tt, 0, BOTTOM, true, '-'); + + for (ALL_LIST_ELEMENTS_RO(lsp_list, node, lsp)) { + frr_each_safe(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + struct nexthop *nexthop; + const char *out_label_str; + char nh_buf[NEXTHOP_STRLEN]; + + nexthop = nhlfe->nexthop; + + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: { + struct zebra_ns *zns; + struct interface *ifp; + + zns = zebra_ns_lookup(NS_DEFAULT); + ifp = if_lookup_by_index_per_ns( + zns, nexthop->ifindex); + snprintf(nh_buf, sizeof(nh_buf), "%s", + ifp ? ifp->name : "Null"); + break; + } + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + inet_ntop(AF_INET, &nexthop->gate.ipv4, + nh_buf, sizeof(nh_buf)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + inet_ntop(AF_INET6, &nexthop->gate.ipv6, + nh_buf, sizeof(nh_buf)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + if (nexthop->type != NEXTHOP_TYPE_IFINDEX) + out_label_str = mpls_label2str( + nexthop->nh_label->num_labels, + &nexthop->nh_label->label[0], + buf, sizeof(buf), 1); + else + out_label_str = "-"; + + ttable_add_row(tt, "%u|%s|%s|%s", + lsp->ile.in_label, + nhlfe_type2str(nhlfe->type), + nh_buf, out_label_str); + } + } + + /* Dump the generated table. */ + if (tt->nrows > 1) { + char *table = ttable_dump(tt, "\n"); + vty_out(vty, "%s\n", table); + XFREE(MTYPE_TMP, table); + } + ttable_del(tt); + } + + list_delete(&lsp_list); +} + +/* + * Create printable string for static LSP configuration. + */ +static char *nhlfe_config_str(const struct zebra_nhlfe *nhlfe, char *buf, + int size) +{ + const struct nexthop *nh; + + nh = nhlfe->nexthop; + + buf[0] = '\0'; + switch (nh->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + inet_ntop(AF_INET, &nh->gate.ipv4, buf, size); + if (nh->ifindex) + strlcat(buf, ifindex2ifname(nh->ifindex, VRF_DEFAULT), + size); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + inet_ntop(AF_INET6, &nh->gate.ipv6, buf, size); + if (nh->ifindex) + strlcat(buf, + ifindex2ifname(nh->ifindex, VRF_DEFAULT), + size); + break; + case NEXTHOP_TYPE_IFINDEX: + if (nh->ifindex) + strlcat(buf, + ifindex2ifname(nh->ifindex, VRF_DEFAULT), + size); + break; + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + return buf; +} + +/* + * Display MPLS LSP configuration of all static LSPs (VTY command handler). + */ +int zebra_mpls_write_lsp_config(struct vty *vty, struct zebra_vrf *zvrf) +{ + struct zebra_lsp *lsp; + struct zebra_nhlfe *nhlfe; + struct nexthop *nh; + struct listnode *node; + struct list *slsp_list = + hash_get_sorted_list(zvrf->slsp_table, lsp_cmp); + + for (ALL_LIST_ELEMENTS_RO(slsp_list, node, lsp)) { + frr_each(nhlfe_list, &lsp->nhlfe_list, nhlfe) { + char buf[BUFSIZ]; + char lstr[30]; + + nh = nhlfe->nexthop; + if (nh == NULL || nh->nh_label == NULL) + continue; + + nhlfe_config_str(nhlfe, buf, sizeof(buf)); + + switch (nh->nh_label->label[0]) { + case MPLS_LABEL_IPV4_EXPLICIT_NULL: + case MPLS_LABEL_IPV6_EXPLICIT_NULL: + strlcpy(lstr, "explicit-null", sizeof(lstr)); + break; + case MPLS_LABEL_IMPLICIT_NULL: + strlcpy(lstr, "implicit-null", sizeof(lstr)); + break; + default: + snprintf(lstr, sizeof(lstr), "%u", + nh->nh_label->label[0]); + break; + } + + vty_out(vty, "mpls lsp %u %s %s\n", lsp->ile.in_label, + buf, lstr); + } + } + + list_delete(&slsp_list); + return (zvrf->slsp_table->count ? 1 : 0); +} + +/* + * Add/update global label block. + */ +int zebra_mpls_label_block_add(struct zebra_vrf *zvrf, uint32_t start_label, + uint32_t end_label) +{ + zvrf->mpls_srgb.start_label = start_label; + zvrf->mpls_srgb.end_label = end_label; + + /* Evaluate registered FECs to see if any get a label or not. */ + fec_evaluate(zvrf); + return 0; +} + +/* + * Delete global label block. + */ +int zebra_mpls_label_block_del(struct zebra_vrf *zvrf) +{ + zvrf->mpls_srgb.start_label = MPLS_DEFAULT_MIN_SRGB_LABEL; + zvrf->mpls_srgb.end_label = MPLS_DEFAULT_MAX_SRGB_LABEL; + + /* Process registered FECs to clear their local label, if needed. */ + fec_evaluate(zvrf); + return 0; +} + +/* + * Display MPLS global label block configuration (VTY command handler). + */ +int zebra_mpls_write_label_block_config(struct vty *vty, struct zebra_vrf *zvrf) +{ + if (zvrf->mpls_srgb.start_label == 0) + return 0; + + if ((zvrf->mpls_srgb.start_label != MPLS_DEFAULT_MIN_SRGB_LABEL) + || (zvrf->mpls_srgb.end_label != MPLS_DEFAULT_MAX_SRGB_LABEL)) { + vty_out(vty, "mpls label global-block %u %u\n", + zvrf->mpls_srgb.start_label, zvrf->mpls_srgb.end_label); + } + + return 1; +} + +/* + * Called when VRF becomes inactive, cleans up information but keeps + * the table itself. + */ +void zebra_mpls_cleanup_tables(struct zebra_vrf *zvrf) +{ + struct zebra_vrf *def_zvrf; + afi_t afi; + + if (zvrf_id(zvrf) == VRF_DEFAULT) + hash_iterate(zvrf->lsp_table, lsp_uninstall_from_kernel, NULL); + else { + /* + * For other vrfs, we try to remove associated LSPs; we locate + * the LSPs in the default vrf. + */ + def_zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + /* At shutdown, the default may be gone already */ + if (def_zvrf == NULL) + return; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + if (zvrf->label[afi] != MPLS_LABEL_NONE) + lsp_uninstall(def_zvrf, zvrf->label[afi]); + } + } +} + +/* + * When a vrf label is assigned and the client goes away + * we should cleanup the vrf labels associated with + * that zclient. + */ +void zebra_mpls_client_cleanup_vrf_label(uint8_t proto) +{ + struct vrf *vrf; + struct zebra_vrf *def_zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + if (def_zvrf == NULL) + return; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + struct zebra_vrf *zvrf = vrf->info; + afi_t afi; + + if (!zvrf) + continue; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + if (zvrf->label_proto[afi] == proto + && zvrf->label[afi] != MPLS_LABEL_NONE) + lsp_uninstall(def_zvrf, zvrf->label[afi]); + + /* + * Cleanup data structures by fiat + */ + zvrf->label_proto[afi] = 0; + zvrf->label[afi] = MPLS_LABEL_NONE; + } + } +} + +static void lsp_table_free(void *p) +{ + struct zebra_lsp *lsp = p; + + lsp_free_nhlfe(lsp); + + XFREE(MTYPE_LSP, lsp); +} + +/* + * Called upon process exiting, need to delete LSP forwarding + * entries from the kernel. + * NOTE: Currently supported only for default VRF. + */ +void zebra_mpls_close_tables(struct zebra_vrf *zvrf) +{ + hash_iterate(zvrf->lsp_table, lsp_uninstall_from_kernel, NULL); + hash_clean(zvrf->lsp_table, lsp_table_free); + hash_free(zvrf->lsp_table); + hash_clean(zvrf->slsp_table, lsp_table_free); + hash_free(zvrf->slsp_table); + route_table_finish(zvrf->fec_table[AFI_IP]); + route_table_finish(zvrf->fec_table[AFI_IP6]); +} + +/* + * Allocate MPLS tables for this VRF and do other initialization. + * NOTE: Currently supported only for default VRF. + */ +void zebra_mpls_init_tables(struct zebra_vrf *zvrf) +{ + char buffer[80]; + + if (!zvrf) + return; + + snprintf(buffer, sizeof(buffer), "ZEBRA SLSP table: %s", + zvrf->vrf->name); + zvrf->slsp_table = hash_create_size(8, label_hash, label_cmp, buffer); + + snprintf(buffer, sizeof(buffer), "ZEBRA LSP table: %s", + zvrf->vrf->name); + zvrf->lsp_table = hash_create_size(8, label_hash, label_cmp, buffer); + zvrf->fec_table[AFI_IP] = route_table_init(); + zvrf->fec_table[AFI_IP6] = route_table_init(); + zvrf->mpls_flags = 0; + zvrf->mpls_srgb.start_label = MPLS_DEFAULT_MIN_SRGB_LABEL; + zvrf->mpls_srgb.end_label = MPLS_DEFAULT_MAX_SRGB_LABEL; +} + +void zebra_mpls_turned_on(void) +{ + if (!mpls_enabled) { + mpls_processq_init(); + mpls_enabled = true; + } + + hook_register(zserv_client_close, zebra_mpls_cleanup_fecs_for_client); + hook_register(zserv_client_close, zebra_mpls_cleanup_zclient_labels); +} + +/* + * Global MPLS initialization. + */ +void zebra_mpls_init(void) +{ + mpls_enabled = false; + mpls_pw_reach_strict = false; + + if (mpls_kernel_init() < 0) { + flog_warn(EC_ZEBRA_MPLS_SUPPORT_DISABLED, + "Disabling MPLS support (no kernel support)"); + return; + } + + zebra_mpls_turned_on(); +} diff --git a/zebra/zebra_mpls.h b/zebra/zebra_mpls.h new file mode 100644 index 0000000..cf24786 --- /dev/null +++ b/zebra/zebra_mpls.h @@ -0,0 +1,599 @@ +/* + * Zebra MPLS Data structures and definitions + * Copyright (C) 2015 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_MPLS_H +#define _ZEBRA_MPLS_H + +#include "prefix.h" +#include "table.h" +#include "queue.h" +#include "hash.h" +#include "jhash.h" +#include "nexthop.h" +#include "vty.h" +#include "memory.h" +#include "mpls.h" +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" +#include "hook.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Definitions and macros. */ + +#define NHLFE_FAMILY(nhlfe) \ + (((nhlfe)->nexthop->type == NEXTHOP_TYPE_IPV6 \ + || (nhlfe)->nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) \ + ? AF_INET6 \ + : AF_INET) + +/* Declare LSP nexthop list types */ +PREDECL_DLIST(nhlfe_list); + +/* + * (Outgoing) nexthop label forwarding entry + */ +struct zebra_nhlfe { + /* Type of entry - static etc. */ + enum lsp_types_t type; + + /* Nexthop information (with outgoing label) */ + struct nexthop *nexthop; + + /* Backpointer to base entry. */ + struct zebra_lsp *lsp; + + /* Runtime info - flags, pointers etc. */ + uint32_t flags; +#define NHLFE_FLAG_CHANGED (1 << 0) +#define NHLFE_FLAG_SELECTED (1 << 1) +#define NHLFE_FLAG_MULTIPATH (1 << 2) +#define NHLFE_FLAG_DELETED (1 << 3) +#define NHLFE_FLAG_INSTALLED (1 << 4) +#define NHLFE_FLAG_IS_BACKUP (1 << 5) + + uint8_t distance; + + /* Linkage for LSPs' lists */ + struct nhlfe_list_item list; +}; + +/* + * Incoming label entry + */ +struct zebra_ile { + mpls_label_t in_label; +}; + +/* + * Label swap entry (ile -> list of nhlfes) + */ +struct zebra_lsp { + /* Incoming label */ + struct zebra_ile ile; + + /* List of NHLFEs, pointer to best, and num equal-cost. */ + struct nhlfe_list_head nhlfe_list; + + struct zebra_nhlfe *best_nhlfe; + uint32_t num_ecmp; + + /* Backup nhlfes, if present. The nexthop in a primary/active nhlfe + * refers to its backup (if any) by index, so the order of this list + * is significant. + */ + struct nhlfe_list_head backup_nhlfe_list; + + /* Flags */ + uint32_t flags; +#define LSP_FLAG_SCHEDULED (1 << 0) +#define LSP_FLAG_INSTALLED (1 << 1) +#define LSP_FLAG_CHANGED (1 << 2) +#define LSP_FLAG_FPM (1 << 3) + + /* Address-family of NHLFE - saved here for delete. All NHLFEs */ + /* have to be of the same AF */ + uint8_t addr_family; +}; + +/* + * FEC to label binding. + */ +struct zebra_fec { + /* FEC (prefix) */ + struct route_node *rn; + + /* In-label - either statically bound or derived from label block. */ + mpls_label_t label; + + /* Label index (into global label block), if valid */ + uint32_t label_index; + + /* Flags. */ + uint32_t flags; +#define FEC_FLAG_CONFIGURED (1 << 0) + + /* Clients interested in this FEC. */ + struct list *client_list; +}; + +/* Declare typesafe list apis/macros */ +DECLARE_DLIST(nhlfe_list, struct zebra_nhlfe, list); + +/* Function declarations. */ + +/* + * Add/update global label block. + */ +int zebra_mpls_label_block_add(struct zebra_vrf *zvrf, uint32_t start_label, + uint32_t end_label); + +/* + * Delete global label block. + */ +int zebra_mpls_label_block_del(struct zebra_vrf *vrf); + +/* + * Display MPLS global label block configuration (VTY command handler). + */ +int zebra_mpls_write_label_block_config(struct vty *vty, struct zebra_vrf *vrf); + +/* + * Install dynamic LSP entry. + */ +int zebra_mpls_lsp_install(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re); + +/* + * Uninstall dynamic LSP entry, if any. + */ +int zebra_mpls_lsp_uninstall(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *re); + +/* Add an NHLFE to an LSP, return the newly-added object */ +struct zebra_nhlfe * +zebra_mpls_lsp_add_nhlfe(struct zebra_lsp *lsp, enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, const union g_addr *gate, + ifindex_t ifindex, uint8_t num_labels, + const mpls_label_t *out_labels); + +/* Add or update a backup NHLFE for an LSP; return the object */ +struct zebra_nhlfe *zebra_mpls_lsp_add_backup_nhlfe( + struct zebra_lsp *lsp, enum lsp_types_t lsp_type, + enum nexthop_types_t gtype, const union g_addr *gate, ifindex_t ifindex, + uint8_t num_labels, const mpls_label_t *out_labels); + +/* + * Add NHLFE or backup NHLFE to an LSP based on a nexthop. These just maintain + * the LSP and NHLFE objects; nothing is scheduled for processing. + * Return: the newly-added object + */ +struct zebra_nhlfe *zebra_mpls_lsp_add_nh(struct zebra_lsp *lsp, + enum lsp_types_t lsp_type, + const struct nexthop *nh); +struct zebra_nhlfe *zebra_mpls_lsp_add_backup_nh(struct zebra_lsp *lsp, + enum lsp_types_t lsp_type, + const struct nexthop *nh); + +/* Free an allocated NHLFE */ +void zebra_mpls_nhlfe_free(struct zebra_nhlfe *nhlfe); + +int zebra_mpls_fec_register(struct zebra_vrf *zvrf, struct prefix *p, + uint32_t label, uint32_t label_index, + struct zserv *client); + +/* + * Deregistration from a client for the label binding for a FEC. The FEC + * itself is deleted if no other registered clients exist and there is no + * label bound to the FEC. + */ +int zebra_mpls_fec_unregister(struct zebra_vrf *zvrf, struct prefix *p, + struct zserv *client); + +/* + * Return FEC (if any) to which this label is bound. + * Note: Only works for per-prefix binding and when the label is not + * implicit-null. + * TODO: Currently walks entire table, can optimize later with another + * hash.. + */ +struct zebra_fec *zebra_mpls_fec_for_label(struct zebra_vrf *zvrf, + mpls_label_t label); + +/* + * Inform if specified label is currently bound to a FEC or not. + */ +int zebra_mpls_label_already_bound(struct zebra_vrf *zvrf, mpls_label_t label); + +/* + * Add static FEC to label binding. If there are clients registered for this + * FEC, notify them. If there are labeled routes for this FEC, install the + * label forwarding entry. + */ +int zebra_mpls_static_fec_add(struct zebra_vrf *zvrf, struct prefix *p, + mpls_label_t in_label); + +/* + * Remove static FEC to label binding. If there are no clients registered + * for this FEC, delete the FEC; else notify clients. + * Note: Upon delete of static binding, if label index exists for this FEC, + * client may need to be updated with derived label. + */ +int zebra_mpls_static_fec_del(struct zebra_vrf *zvrf, struct prefix *p); + +/* + * Display MPLS FEC to label binding configuration (VTY command handler). + */ +int zebra_mpls_write_fec_config(struct vty *vty, struct zebra_vrf *zvrf); + +/* + * Display MPLS FEC to label binding (VTY command handler). + */ +void zebra_mpls_print_fec_table(struct vty *vty, struct zebra_vrf *zvrf); + +/* + * Display MPLS FEC to label binding for a specific FEC (VTY command handler). + */ +void zebra_mpls_print_fec(struct vty *vty, struct zebra_vrf *zvrf, + struct prefix *p); + +/* + * Handle zapi request to install/uninstall LSP and + * (optionally) FEC-To-NHLFE (FTN) bindings. + * + * mpls_zapi_labels_process -> Installs for future processing + * in the meta-q + * zebra_mpls_labels_process -> called by the meta-q + */ +void mpls_zapi_labels_process(bool add_p, struct zebra_vrf *zvrf, + const struct zapi_labels *zl); +void zebra_mpls_zapi_labels_process(bool add_p, struct zebra_vrf *zvrf, + const struct zapi_labels *zl); + +/* + * Uninstall all NHLFEs bound to a single FEC. + * + * mpls_ftn_uninstall -> Called to enqueue into early label processing + * via the metaq + * zebra_mpls_ftn_uninstall -> Called when we process the meta q + * for this item + */ +void mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, + struct prefix *prefix, uint8_t route_type, + uint8_t route_instance); +void zebra_mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, + struct prefix *prefix, uint8_t route_type, + uint8_t route_instance); +/* + * Install/update a NHLFE for an LSP in the forwarding table. This may be + * a new LSP entry or a new NHLFE for an existing in-label or an update of + * the out-label(s) for an existing NHLFE (update case). + */ +int mpls_lsp_install(struct zebra_vrf *zvrf, enum lsp_types_t type, + mpls_label_t in_label, uint8_t num_out_labels, + const mpls_label_t *out_labels, enum nexthop_types_t gtype, + const union g_addr *gate, ifindex_t ifindex); + +/* + * Lookup LSP by its input label. + */ +struct zebra_lsp *mpls_lsp_find(struct zebra_vrf *zvrf, mpls_label_t in_label); + +/* + * Uninstall a particular NHLFE in the forwarding table. If this is + * the only NHLFE, the entire LSP forwarding entry has to be deleted. + */ +int mpls_lsp_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, + mpls_label_t in_label, enum nexthop_types_t gtype, + const union g_addr *gate, ifindex_t ifindex, + bool backup_p); + +/* + * Uninstall all NHLFEs for a particular LSP forwarding entry. + */ +int mpls_lsp_uninstall_all_vrf(struct zebra_vrf *zvrf, enum lsp_types_t type, + mpls_label_t in_label); + +#if defined(HAVE_CUMULUS) +/* + * Check that the label values used in LSP creation are consistent. The + * main criteria is that if there is ECMP, the label operation must still + * be consistent - i.e., all paths either do a swap or do PHP. This is due + * to current HW restrictions. + */ +int zebra_mpls_lsp_label_consistent(struct zebra_vrf *zvrf, + mpls_label_t in_label, + mpls_label_t out_label, + enum nexthop_types_t gtype, + union g_addr *gate, ifindex_t ifindex); +#endif /* HAVE_CUMULUS */ + +/* + * Add static LSP entry. This may be the first entry for this incoming label + * or an additional nexthop; an existing entry may also have outgoing label + * changed. + * Note: The label operation (swap or PHP) is common for the LSP entry (all + * NHLFEs). + */ +int zebra_mpls_static_lsp_add(struct zebra_vrf *zvrf, mpls_label_t in_label, + mpls_label_t out_label, + enum nexthop_types_t gtype, union g_addr *gate, + ifindex_t ifindex); + +/* + * Delete static LSP entry. This may be the delete of one particular + * NHLFE for this incoming label or the delete of the entire entry (i.e., + * all NHLFEs). + * NOTE: Delete of the only NHLFE will also end up deleting the entire + * LSP configuration. + */ +int zebra_mpls_static_lsp_del(struct zebra_vrf *zvrf, mpls_label_t in_label, + enum nexthop_types_t gtype, union g_addr *gate, + ifindex_t ifindex); + +/* + * Process LSP update results from zebra dataplane. + */ +/* Forward ref of dplane update context type */ +struct zebra_dplane_ctx; + +void zebra_mpls_lsp_dplane_result(struct zebra_dplane_ctx *ctx); + +/* Process async dplane notifications. */ +void zebra_mpls_process_dplane_notify(struct zebra_dplane_ctx *ctx); + +/* + * Schedule all MPLS label forwarding entries for processing. + * Called upon changes that may affect one or more of them such as + * interface or nexthop state changes. + */ +void zebra_mpls_lsp_schedule(struct zebra_vrf *zvrf); + +/* + * Display MPLS label forwarding table for a specific LSP + * (VTY command handler). + */ +void zebra_mpls_print_lsp(struct vty *vty, struct zebra_vrf *zvrf, + mpls_label_t label, bool use_json); + +/* + * Display MPLS label forwarding table (VTY command handler). + */ +void zebra_mpls_print_lsp_table(struct vty *vty, struct zebra_vrf *zvrf, + bool use_json); + +/* + * Display MPLS LSP configuration of all static LSPs (VTY command handler). + */ +int zebra_mpls_write_lsp_config(struct vty *vty, struct zebra_vrf *zvrf); + +/* + * Called when VRF becomes inactive, cleans up information but keeps + * the table itself. + * NOTE: Currently supported only for default VRF. + */ +void zebra_mpls_cleanup_tables(struct zebra_vrf *zvrf); + +/* + * Called upon process exiting, need to delete LSP forwarding + * entries from the kernel. + * NOTE: Currently supported only for default VRF. + */ +void zebra_mpls_close_tables(struct zebra_vrf *zvrf); + +/* + * Allocate MPLS tables for this VRF. + * NOTE: Currently supported only for default VRF. + */ +void zebra_mpls_init_tables(struct zebra_vrf *zvrf); + +/* + * If mpls is turned on *after* FRR is brought + * up let's actually notice this and turn on + * the relevant bits to make it work. + */ +void zebra_mpls_turned_on(void); + +/* + * Global MPLS initialization. + */ +void zebra_mpls_init(void); + +/* + * MPLS VTY. + */ +void zebra_mpls_vty_init(void); + +/* + * When cleaning up a client connection ensure that there are no + * vrf labels that need cleaning up too + */ +void zebra_mpls_client_cleanup_vrf_label(uint8_t proto); + +/* Inline functions. */ + +/* + * Distance (priority) definition for LSP NHLFE. + */ +static inline uint8_t lsp_distance(enum lsp_types_t type) +{ + switch (type) { + case ZEBRA_LSP_STATIC: + return (route_distance(ZEBRA_ROUTE_STATIC)); + case ZEBRA_LSP_LDP: + return (route_distance(ZEBRA_ROUTE_LDP)); + case ZEBRA_LSP_BGP: + return (route_distance(ZEBRA_ROUTE_BGP)); + case ZEBRA_LSP_NONE: + case ZEBRA_LSP_SHARP: + case ZEBRA_LSP_OSPF_SR: + case ZEBRA_LSP_ISIS_SR: + case ZEBRA_LSP_SRTE: + return 150; + } + + /* + * For some reason certain compilers do not believe + * that all the cases have been handled. And + * WTF does this work differently than when I removed + * the default case???? + */ + return 150; +} + +/* + * Map RIB type to LSP type. Used when labeled-routes from BGP + * are converted into LSPs. + */ +static inline enum lsp_types_t lsp_type_from_re_type(int re_type) +{ + switch (re_type) { + case ZEBRA_ROUTE_STATIC: + return ZEBRA_LSP_STATIC; + case ZEBRA_ROUTE_LDP: + return ZEBRA_LSP_LDP; + case ZEBRA_ROUTE_BGP: + return ZEBRA_LSP_BGP; + case ZEBRA_ROUTE_OSPF: + return ZEBRA_LSP_OSPF_SR; + case ZEBRA_ROUTE_ISIS: + return ZEBRA_LSP_ISIS_SR; + case ZEBRA_ROUTE_SHARP: + return ZEBRA_LSP_SHARP; + case ZEBRA_ROUTE_SRTE: + return ZEBRA_LSP_SRTE; + default: + return ZEBRA_LSP_NONE; + } +} + +/* + * Map LSP type to RIB type. + */ +static inline int re_type_from_lsp_type(enum lsp_types_t lsp_type) +{ + switch (lsp_type) { + case ZEBRA_LSP_STATIC: + return ZEBRA_ROUTE_STATIC; + case ZEBRA_LSP_LDP: + return ZEBRA_ROUTE_LDP; + case ZEBRA_LSP_BGP: + return ZEBRA_ROUTE_BGP; + case ZEBRA_LSP_OSPF_SR: + return ZEBRA_ROUTE_OSPF; + case ZEBRA_LSP_ISIS_SR: + return ZEBRA_ROUTE_ISIS; + case ZEBRA_LSP_NONE: + return ZEBRA_ROUTE_KERNEL; + case ZEBRA_LSP_SHARP: + return ZEBRA_ROUTE_SHARP; + case ZEBRA_LSP_SRTE: + return ZEBRA_ROUTE_SRTE; + } + + /* + * For some reason certain compilers do not believe + * that all the cases have been handled. And + * WTF does this work differently than when I removed + * the default case???? + */ + return ZEBRA_ROUTE_KERNEL; +} + +/* NHLFE type as printable string. */ +static inline const char *nhlfe_type2str(enum lsp_types_t lsp_type) +{ + switch (lsp_type) { + case ZEBRA_LSP_STATIC: + return "Static"; + case ZEBRA_LSP_LDP: + return "LDP"; + case ZEBRA_LSP_BGP: + return "BGP"; + case ZEBRA_LSP_OSPF_SR: + return "SR (OSPF)"; + case ZEBRA_LSP_ISIS_SR: + return "SR (IS-IS)"; + case ZEBRA_LSP_SHARP: + return "SHARP"; + case ZEBRA_LSP_SRTE: + return "SR-TE"; + case ZEBRA_LSP_NONE: + return "Unknown"; + } + + /* + * For some reason certain compilers do not believe + * that all the cases have been handled. And + * WTF does this work differently than when I removed + * the default case???? + */ + return "Unknown"; +} + +static inline void mpls_mark_lsps_for_processing(struct zebra_vrf *zvrf, + struct prefix *p) +{ + struct route_table *table; + struct route_node *rn; + rib_dest_t *dest; + + if (!zvrf) + return; + + table = zvrf->table[family2afi(p->family)][SAFI_UNICAST]; + if (!table) + return; + + rn = route_node_match(table, p); + if (!rn) + return; + + + dest = rib_dest_from_rnode(rn); + SET_FLAG(dest->flags, RIB_DEST_UPDATE_LSPS); +} + +static inline void mpls_unmark_lsps_for_processing(struct route_node *rn) +{ + rib_dest_t *dest = rib_dest_from_rnode(rn); + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_LSPS); +} + +static inline int mpls_should_lsps_be_processed(struct route_node *rn) +{ + rib_dest_t *dest = rib_dest_from_rnode(rn); + + return !!CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_LSPS); +} + +/* Global variables. */ +extern bool mpls_enabled; +extern bool mpls_pw_reach_strict; /* Strict pseudowire reachability checking */ + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_MPLS_H */ diff --git a/zebra/zebra_mpls_netlink.c b/zebra/zebra_mpls_netlink.c new file mode 100644 index 0000000..ce7702b --- /dev/null +++ b/zebra/zebra_mpls_netlink.c @@ -0,0 +1,88 @@ +/* MPLS forwarding table updates using netlink over GNU/Linux system. + * Copyright (C) 2016 Cumulus Networks, Inc. + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK + +#include "zebra/debug.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_mpls.h" +#include "zebra/kernel_netlink.h" + +ssize_t netlink_lsp_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, + size_t buflen) +{ + int cmd; + + /* Call to netlink layer based on type of update */ + if (dplane_ctx_get_op(ctx) == DPLANE_OP_LSP_DELETE) { + cmd = RTM_DELROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_LSP_INSTALL || + dplane_ctx_get_op(ctx) == DPLANE_OP_LSP_UPDATE) { + + /* Validate */ + if (dplane_ctx_get_best_nhlfe(ctx) == NULL) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_MPLS) + zlog_debug("LSP in-label %u: update fails, no best NHLFE", + dplane_ctx_get_in_label(ctx)); + return -1; + } + + cmd = RTM_NEWROUTE; + } else + /* Invalid op? */ + return -1; + + return netlink_mpls_multipath_msg_encode(cmd, ctx, buf, buflen); +} + +enum netlink_msg_status netlink_put_lsp_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + return netlink_batch_add_msg(bth, ctx, netlink_lsp_msg_encoder, false); +} + +/* + * Pseudowire update api - not supported by netlink as of 12/18, + * but note that the default has been to report 'success' for pw updates + * on unsupported platforms. + */ +enum netlink_msg_status netlink_put_pw_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + return FRR_NETLINK_SUCCESS; +} + +int mpls_kernel_init(void) +{ + struct stat st; + + /* + * Check if the MPLS module is loaded in the kernel. + */ + if (stat("/proc/sys/net/mpls", &st) != 0) + return -1; + + return 0; +}; + +#endif /* HAVE_NETLINK */ diff --git a/zebra/zebra_mpls_null.c b/zebra/zebra_mpls_null.c new file mode 100644 index 0000000..2cc3f3b --- /dev/null +++ b/zebra/zebra_mpls_null.c @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2016 by Open Source Routing. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include "zebra/rt.h" +#include "zebra/zebra_mpls.h" + +#if !defined(HAVE_NETLINK) && !defined(OPEN_BSD) + +int mpls_kernel_init(void) +{ + return -1; +}; + +/* + * Pseudowire update api - note that the default has been + * to report 'success' for pw updates on unsupported platforms. + */ +enum zebra_dplane_result kernel_pw_update(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +enum zebra_dplane_result kernel_lsp_update(struct zebra_dplane_ctx *ctx) +{ + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif /* !defined(HAVE_NETLINK) && !defined(OPEN_BSD) */ diff --git a/zebra/zebra_mpls_openbsd.c b/zebra/zebra_mpls_openbsd.c new file mode 100644 index 0000000..cdf3493 --- /dev/null +++ b/zebra/zebra_mpls_openbsd.c @@ -0,0 +1,467 @@ +/* + * Copyright (C) 2016 by Open Source Routing. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef OPEN_BSD + +#include <netmpls/mpls.h> +#include "zebra/rt.h" +#include "zebra/zebra_mpls.h" +#include "zebra/debug.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_router.h" + +#include "privs.h" +#include "prefix.h" +#include "interface.h" +#include "log.h" +#include "lib_errors.h" + +extern struct zebra_privs_t zserv_privs; + +struct { + uint32_t rtseq; + int fd; + int ioctl_fd; +} kr_state; + +static int kernel_send_rtmsg_v4(int action, mpls_label_t in_label, + const struct zebra_nhlfe *nhlfe) +{ + struct iovec iov[5]; + struct rt_msghdr hdr; + struct sockaddr_mpls sa_label_in, sa_label_out; + struct sockaddr_in nexthop; + int iovcnt = 0; + int ret; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: 0x%x, label=%u", __func__, action, in_label); + + /* initialize header */ + memset(&hdr, 0, sizeof(hdr)); + hdr.rtm_version = RTM_VERSION; + + hdr.rtm_type = action; + hdr.rtm_flags = RTF_UP; + hdr.rtm_fmask = RTF_MPLS; + hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ + hdr.rtm_msglen = sizeof(hdr); + hdr.rtm_hdrlen = sizeof(struct rt_msghdr); + hdr.rtm_priority = 0; + /* adjust iovec */ + iov[iovcnt].iov_base = &hdr; + iov[iovcnt++].iov_len = sizeof(hdr); + + /* in label */ + memset(&sa_label_in, 0, sizeof(sa_label_in)); + sa_label_in.smpls_len = sizeof(sa_label_in); + sa_label_in.smpls_family = AF_MPLS; + sa_label_in.smpls_label = htonl(in_label << MPLS_LABEL_OFFSET); + /* adjust header */ + hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; + hdr.rtm_addrs |= RTA_DST; + hdr.rtm_msglen += sizeof(sa_label_in); + /* adjust iovec */ + iov[iovcnt].iov_base = &sa_label_in; + iov[iovcnt++].iov_len = sizeof(sa_label_in); + + /* nexthop */ + memset(&nexthop, 0, sizeof(nexthop)); + nexthop.sin_len = sizeof(nexthop); + nexthop.sin_family = AF_INET; + nexthop.sin_addr = nhlfe->nexthop->gate.ipv4; + /* adjust header */ + hdr.rtm_flags |= RTF_GATEWAY; + hdr.rtm_addrs |= RTA_GATEWAY; + hdr.rtm_msglen += sizeof(nexthop); + /* adjust iovec */ + iov[iovcnt].iov_base = &nexthop; + iov[iovcnt++].iov_len = sizeof(nexthop); + + /* If action is RTM_DELETE we have to get rid of MPLS infos */ + if (action != RTM_DELETE) { + memset(&sa_label_out, 0, sizeof(sa_label_out)); + sa_label_out.smpls_len = sizeof(sa_label_out); + sa_label_out.smpls_family = AF_MPLS; + sa_label_out.smpls_label = + htonl(nhlfe->nexthop->nh_label->label[0] + << MPLS_LABEL_OFFSET); + /* adjust header */ + hdr.rtm_addrs |= RTA_SRC; + hdr.rtm_flags |= RTF_MPLS; + hdr.rtm_msglen += sizeof(sa_label_out); + /* adjust iovec */ + iov[iovcnt].iov_base = &sa_label_out; + iov[iovcnt++].iov_len = sizeof(sa_label_out); + + if (nhlfe->nexthop->nh_label->label[0] == MPLS_LABEL_IMPLNULL) + hdr.rtm_mpls = MPLS_OP_POP; + else + hdr.rtm_mpls = MPLS_OP_SWAP; + } + + frr_with_privs(&zserv_privs) { + ret = writev(kr_state.fd, iov, iovcnt); + } + + if (ret == -1) + flog_err_sys(EC_LIB_SOCKET, "%s: %s", __func__, + safe_strerror(errno)); + + return ret; +} + +#if !defined(ROUNDUP) +#define ROUNDUP(a) \ + (((a) & (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a)) +#endif + +static int kernel_send_rtmsg_v6(int action, mpls_label_t in_label, + const struct zebra_nhlfe *nhlfe) +{ + struct iovec iov[5]; + struct rt_msghdr hdr; + struct sockaddr_mpls sa_label_in, sa_label_out; + struct pad { + struct sockaddr_in6 addr; + char pad[sizeof(long)]; /* thank you IPv6 */ + } nexthop; + int iovcnt = 0; + int ret; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: 0x%x, label=%u", __func__, action, in_label); + + /* initialize header */ + memset(&hdr, 0, sizeof(hdr)); + hdr.rtm_version = RTM_VERSION; + + hdr.rtm_type = action; + hdr.rtm_flags = RTF_UP; + hdr.rtm_fmask = RTF_MPLS; + hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ + hdr.rtm_msglen = sizeof(hdr); + hdr.rtm_hdrlen = sizeof(struct rt_msghdr); + hdr.rtm_priority = 0; + /* adjust iovec */ + iov[iovcnt].iov_base = &hdr; + iov[iovcnt++].iov_len = sizeof(hdr); + + /* in label */ + memset(&sa_label_in, 0, sizeof(sa_label_in)); + sa_label_in.smpls_len = sizeof(sa_label_in); + sa_label_in.smpls_family = AF_MPLS; + sa_label_in.smpls_label = htonl(in_label << MPLS_LABEL_OFFSET); + /* adjust header */ + hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; + hdr.rtm_addrs |= RTA_DST; + hdr.rtm_msglen += sizeof(sa_label_in); + /* adjust iovec */ + iov[iovcnt].iov_base = &sa_label_in; + iov[iovcnt++].iov_len = sizeof(sa_label_in); + + /* nexthop */ + memset(&nexthop, 0, sizeof(nexthop)); + nexthop.addr.sin6_len = sizeof(struct sockaddr_in6); + nexthop.addr.sin6_family = AF_INET6; + nexthop.addr.sin6_addr = nhlfe->nexthop->gate.ipv6; + if (IN6_IS_ADDR_LINKLOCAL(&nexthop.addr.sin6_addr)) { + uint16_t tmp16; + struct sockaddr_in6 *sin6 = &nexthop.addr; + + nexthop.addr.sin6_scope_id = nhlfe->nexthop->ifindex; + + memcpy(&tmp16, &sin6->sin6_addr.s6_addr[2], sizeof(tmp16)); + tmp16 = htons(sin6->sin6_scope_id); + memcpy(&sin6->sin6_addr.s6_addr[2], &tmp16, sizeof(tmp16)); + sin6->sin6_scope_id = 0; + } + + /* adjust header */ + hdr.rtm_flags |= RTF_GATEWAY; + hdr.rtm_addrs |= RTA_GATEWAY; + hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6)); + /* adjust iovec */ + iov[iovcnt].iov_base = &nexthop; + iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6)); + + /* If action is RTM_DELETE we have to get rid of MPLS infos */ + if (action != RTM_DELETE) { + memset(&sa_label_out, 0, sizeof(sa_label_out)); + sa_label_out.smpls_len = sizeof(sa_label_out); + sa_label_out.smpls_family = AF_MPLS; + sa_label_out.smpls_label = + htonl(nhlfe->nexthop->nh_label->label[0] + << MPLS_LABEL_OFFSET); + /* adjust header */ + hdr.rtm_addrs |= RTA_SRC; + hdr.rtm_flags |= RTF_MPLS; + hdr.rtm_msglen += sizeof(sa_label_out); + /* adjust iovec */ + iov[iovcnt].iov_base = &sa_label_out; + iov[iovcnt++].iov_len = sizeof(sa_label_out); + + if (nhlfe->nexthop->nh_label->label[0] == MPLS_LABEL_IMPLNULL) + hdr.rtm_mpls = MPLS_OP_POP; + else + hdr.rtm_mpls = MPLS_OP_SWAP; + } + + frr_with_privs(&zserv_privs) { + ret = writev(kr_state.fd, iov, iovcnt); + } + + if (ret == -1) + flog_err_sys(EC_LIB_SOCKET, "%s: %s", __func__, + safe_strerror(errno)); + + return ret; +} + +static int kernel_lsp_cmd(struct zebra_dplane_ctx *ctx) +{ + const struct nhlfe_list_head *head; + const struct zebra_nhlfe *nhlfe; + const struct nexthop *nexthop = NULL; + unsigned int nexthop_num = 0; + int action; + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_LSP_DELETE: + action = RTM_DELETE; + break; + case DPLANE_OP_LSP_INSTALL: + action = RTM_ADD; + break; + case DPLANE_OP_LSP_UPDATE: + action = RTM_CHANGE; + break; + default: + return -1; + } + + head = dplane_ctx_get_nhlfe_list(ctx); + frr_each(nhlfe_list_const, head, nhlfe) { + nexthop = nhlfe->nexthop; + if (!nexthop) + continue; + + if (nexthop_num >= zrouter.multipath_num) + break; + + if (((action == RTM_ADD || action == RTM_CHANGE) + && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))) + || (action == RTM_DELETE + && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED) + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)))) { + if (nhlfe->nexthop->nh_label->num_labels > 1) { + flog_warn(EC_ZEBRA_MAX_LABELS_PUSH, + "%s: can't push %u labels at once (maximum is 1)", + __func__, + nhlfe->nexthop->nh_label->num_labels); + continue; + } + + nexthop_num++; + + switch (NHLFE_FAMILY(nhlfe)) { + case AF_INET: + kernel_send_rtmsg_v4( + action, + dplane_ctx_get_in_label(ctx), + nhlfe); + break; + case AF_INET6: + kernel_send_rtmsg_v6( + action, + dplane_ctx_get_in_label(ctx), + nhlfe); + break; + default: + break; + } + } + } + + return 0; +} + +enum zebra_dplane_result kernel_lsp_update(struct zebra_dplane_ctx *ctx) +{ + int ret; + + ret = kernel_lsp_cmd(ctx); + + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); +} + +static enum zebra_dplane_result kmpw_install(struct zebra_dplane_ctx *ctx) +{ + struct ifreq ifr; + struct ifmpwreq imr; + struct sockaddr_storage ss; + struct sockaddr_in *sa_in = (struct sockaddr_in *)&ss; + struct sockaddr_in6 *sa_in6 = (struct sockaddr_in6 *)&ss; + const union g_addr *gaddr; + + memset(&imr, 0, sizeof(imr)); + switch (dplane_ctx_get_pw_type(ctx)) { + case PW_TYPE_ETHERNET: + imr.imr_type = IMR_TYPE_ETHERNET; + break; + case PW_TYPE_ETHERNET_TAGGED: + imr.imr_type = IMR_TYPE_ETHERNET_TAGGED; + break; + default: + zlog_debug("%s: unhandled pseudowire type (%#X)", __func__, + dplane_ctx_get_pw_type(ctx)); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + if (dplane_ctx_get_pw_flags(ctx) & F_PSEUDOWIRE_CWORD) + imr.imr_flags |= IMR_FLAG_CONTROLWORD; + + /* pseudowire nexthop */ + memset(&ss, 0, sizeof(ss)); + gaddr = dplane_ctx_get_pw_dest(ctx); + switch (dplane_ctx_get_pw_af(ctx)) { + case AF_INET: + sa_in->sin_family = AF_INET; + sa_in->sin_len = sizeof(struct sockaddr_in); + sa_in->sin_addr = gaddr->ipv4; + break; + case AF_INET6: + sa_in6->sin6_family = AF_INET6; + sa_in6->sin6_len = sizeof(struct sockaddr_in6); + sa_in6->sin6_addr = gaddr->ipv6; + break; + default: + zlog_debug("%s: unhandled pseudowire address-family (%u)", + __func__, dplane_ctx_get_pw_af(ctx)); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + memcpy(&imr.imr_nexthop, (struct sockaddr *)&ss, + sizeof(imr.imr_nexthop)); + + /* pseudowire local/remote labels */ + imr.imr_lshim.shim_label = dplane_ctx_get_pw_local_label(ctx); + imr.imr_rshim.shim_label = dplane_ctx_get_pw_remote_label(ctx); + + /* ioctl */ + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifr.ifr_name)); + ifr.ifr_data = (caddr_t)&imr; + if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr) == -1) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "ioctl SIOCSETMPWCFG: %s", + safe_strerror(errno)); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +static enum zebra_dplane_result kmpw_uninstall(struct zebra_dplane_ctx *ctx) +{ + struct ifreq ifr; + struct ifmpwreq imr; + + memset(&ifr, 0, sizeof(ifr)); + memset(&imr, 0, sizeof(imr)); + strlcpy(ifr.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifr.ifr_name)); + ifr.ifr_data = (caddr_t)&imr; + if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr) == -1) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "ioctl SIOCSETMPWCFG: %s", + safe_strerror(errno)); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + +/* + * Pseudowire update api for openbsd. + */ +enum zebra_dplane_result kernel_pw_update(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_PW_INSTALL: + result = kmpw_install(ctx); + break; + case DPLANE_OP_PW_UNINSTALL: + result = kmpw_uninstall(ctx); + break; + default: + break; + } + + return result; +} + +#define MAX_RTSOCK_BUF 128 * 1024 +int mpls_kernel_init(void) +{ + int rcvbuf, default_rcvbuf; + socklen_t optlen; + + if ((kr_state.fd = socket(AF_ROUTE, SOCK_RAW, 0)) == -1) { + flog_err_sys(EC_LIB_SOCKET, "%s: socket", __func__); + return -1; + } + + if ((kr_state.ioctl_fd = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0)) + == -1) { + flog_err_sys(EC_LIB_SOCKET, "%s: ioctl socket", __func__); + return -1; + } + + /* grow receive buffer, don't wanna miss messages */ + optlen = sizeof(default_rcvbuf); + if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, &default_rcvbuf, + &optlen) + == -1) + flog_err_sys(EC_LIB_SOCKET, + "kr_init getsockopt SOL_SOCKET SO_RCVBUF"); + else + for (rcvbuf = MAX_RTSOCK_BUF; + rcvbuf > default_rcvbuf + && setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, + sizeof(rcvbuf)) + == -1 + && errno == ENOBUFS; + rcvbuf /= 2) + ; /* nothing */ + + kr_state.rtseq = 1; + + /* Strict pseudowire reachability checking required for obsd */ + mpls_pw_reach_strict = true; + + return 0; +} + +#endif /* OPEN_BSD */ diff --git a/zebra/zebra_mpls_vty.c b/zebra/zebra_mpls_vty.c new file mode 100644 index 0000000..ca66e03 --- /dev/null +++ b/zebra/zebra_mpls_vty.c @@ -0,0 +1,482 @@ +/* Zebra MPLS VTY functions + * Copyright (C) 2002 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "memory.h" +#include "if.h" +#include "prefix.h" +#include "command.h" +#include "table.h" +#include "rib.h" +#include "nexthop.h" +#include "vrf.h" +#include "mpls.h" +#include "lib/json.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_rnh.h" +#include "zebra/redistribute.h" +#include "zebra/zebra_routemap.h" + +static int zebra_mpls_transit_lsp(struct vty *vty, int add_cmd, + const char *inlabel_str, const char *gate_str, + const char *outlabel_str, + const char *flag_str) +{ + struct zebra_vrf *zvrf; + int ret; + enum nexthop_types_t gtype; + union g_addr gate; + mpls_label_t label; + mpls_label_t in_label, out_label; + + if (!mpls_enabled) { + vty_out(vty, + "%% MPLS not turned on in kernel, ignoring command\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) { + vty_out(vty, "%% Default VRF does not exist\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!inlabel_str) { + vty_out(vty, "%% No Label Information\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + out_label = MPLS_LABEL_IMPLICIT_NULL; /* as initialization */ + label = atoi(inlabel_str); + if (!IS_MPLS_UNRESERVED_LABEL(label)) { + vty_out(vty, "%% Invalid label\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (add_cmd) { + if (!gate_str) { + vty_out(vty, "%% No Nexthop Information\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (!outlabel_str) { + vty_out(vty, "%% No Outgoing label Information\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + in_label = label; + gtype = NEXTHOP_TYPE_BLACKHOLE; /* as initialization */ + + if (gate_str) { + /* Gateway is a IPv4 or IPv6 nexthop. */ + ret = inet_pton(AF_INET6, gate_str, &gate.ipv6); + if (ret == 1) + gtype = NEXTHOP_TYPE_IPV6; + else { + ret = inet_pton(AF_INET, gate_str, &gate.ipv4); + if (ret == 1) + gtype = NEXTHOP_TYPE_IPV4; + else { + vty_out(vty, "%% Invalid nexthop\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + } + + if (outlabel_str) { + if (outlabel_str[0] == 'i') + out_label = MPLS_LABEL_IMPLICIT_NULL; + else if (outlabel_str[0] == 'e' && gtype == NEXTHOP_TYPE_IPV4) + out_label = MPLS_LABEL_IPV4_EXPLICIT_NULL; + else if (outlabel_str[0] == 'e' && gtype == NEXTHOP_TYPE_IPV6) + out_label = MPLS_LABEL_IPV6_EXPLICIT_NULL; + else + out_label = atoi(outlabel_str); + } + + if (add_cmd) { +#if defined(HAVE_CUMULUS) + /* Check that label value is consistent. */ + if (!zebra_mpls_lsp_label_consistent(zvrf, in_label, out_label, + gtype, &gate, 0)) { + vty_out(vty, "%% Label value not consistent\n"); + return CMD_WARNING_CONFIG_FAILED; + } +#endif /* HAVE_CUMULUS */ + + ret = zebra_mpls_static_lsp_add(zvrf, in_label, out_label, + gtype, &gate, 0); + } else + ret = zebra_mpls_static_lsp_del(zvrf, in_label, gtype, &gate, + 0); + + if (ret != 0) { + vty_out(vty, "%% LSP cannot be %s\n", + add_cmd ? "added" : "deleted"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (mpls_transit_lsp, + mpls_transit_lsp_cmd, + "mpls lsp (16-1048575) <A.B.C.D|X:X::X:X> <(16-1048575)|explicit-null|implicit-null>", + MPLS_STR + "Establish label switched path\n" + "Incoming MPLS label\n" + "IPv4 gateway address\n" + "IPv6 gateway address\n" + "Outgoing MPLS label\n" + "Use Explicit-Null label\n" + "Use Implicit-Null label\n") +{ + return zebra_mpls_transit_lsp(vty, 1, argv[2]->arg, argv[3]->arg, + argv[4]->arg, NULL); +} + +DEFUN (no_mpls_transit_lsp, + no_mpls_transit_lsp_cmd, + "no mpls lsp (16-1048575) <A.B.C.D|X:X::X:X>", + NO_STR + MPLS_STR + "Establish label switched path\n" + "Incoming MPLS label\n" + "IPv4 gateway address\n" + "IPv6 gateway address\n") +{ + return zebra_mpls_transit_lsp(vty, 0, argv[3]->arg, argv[4]->arg, NULL, + NULL); +} + +ALIAS(no_mpls_transit_lsp, no_mpls_transit_lsp_out_label_cmd, + "no mpls lsp (16-1048575) <A.B.C.D|X:X::X:X> <(16-1048575)|explicit-null|implicit-null>", + NO_STR MPLS_STR + "Establish label switched path\n" + "Incoming MPLS label\n" + "IPv4 gateway address\n" + "IPv6 gateway address\n" + "Outgoing MPLS label\n" + "Use Explicit-Null label\n" + "Use Implicit-Null label\n") + +DEFUN (no_mpls_transit_lsp_all, + no_mpls_transit_lsp_all_cmd, + "no mpls lsp (16-1048575)", + NO_STR + MPLS_STR + "Establish label switched path\n" + "Incoming MPLS label\n") +{ + return zebra_mpls_transit_lsp(vty, 0, argv[3]->arg, NULL, NULL, NULL); +} + +static int zebra_mpls_bind(struct vty *vty, int add_cmd, const char *prefix, + const char *label_str) +{ + struct zebra_vrf *zvrf; + struct prefix p; + uint32_t label; + int ret; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) { + vty_out(vty, "%% Default VRF does not exist\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + memset(&p, 0, sizeof(p)); + ret = str2prefix(prefix, &p); + if (ret <= 0) { + vty_out(vty, "%% Malformed address\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (add_cmd) { + if (!label_str) { + vty_out(vty, "%% No label binding specified\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!strcmp(label_str, "implicit-null")) + label = MPLS_LABEL_IMPLICIT_NULL; + else if (!strcmp(label_str, "explicit-null")) { + if (p.family == AF_INET) + label = MPLS_LABEL_IPV4_EXPLICIT_NULL; + else + label = MPLS_LABEL_IPV6_EXPLICIT_NULL; + } else { + label = atoi(label_str); + if (!IS_MPLS_UNRESERVED_LABEL(label)) { + vty_out(vty, "%% Invalid label\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (zebra_mpls_label_already_bound(zvrf, label)) { + vty_out(vty, + "%% Label already bound to a FEC\n"); + return CMD_WARNING_CONFIG_FAILED; + } + } + + ret = zebra_mpls_static_fec_add(zvrf, &p, label); + } else + ret = zebra_mpls_static_fec_del(zvrf, &p); + + if (ret) { + vty_out(vty, "%% FEC to label binding cannot be %s\n", + add_cmd ? "added" : "deleted"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (mpls_label_bind, + mpls_label_bind_cmd, + "mpls label bind <A.B.C.D/M|X:X::X:X/M> <(16-1048575)|implicit-null|explicit-null>", + MPLS_STR + "Label configuration\n" + "Establish FEC to label binding\n" + "IPv4 prefix\n" + "IPv6 prefix\n" + "MPLS Label to bind\n" + "Use Implicit-Null Label\n" + "Use Explicit-Null Label\n") +{ + return zebra_mpls_bind(vty, 1, argv[3]->arg, argv[4]->arg); +} + +DEFUN (no_mpls_label_bind, + no_mpls_label_bind_cmd, + "no mpls label bind <A.B.C.D/M|X:X::X:X/M> [<(16-1048575)|implicit-null>]", + NO_STR + MPLS_STR + "Label configuration\n" + "Establish FEC to label binding\n" + "IPv4 prefix\n" + "IPv6 prefix\n" + "MPLS Label to bind\n" + "Use Implicit-Null Label\n") +{ + return zebra_mpls_bind(vty, 0, argv[4]->arg, NULL); +} + +/* MPLS LSP configuration write function. */ +static int zebra_mpls_config(struct vty *vty) +{ + int write = 0; + struct zebra_vrf *zvrf; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return 0; + + write += zebra_mpls_write_lsp_config(vty, zvrf); + write += zebra_mpls_write_fec_config(vty, zvrf); + write += zebra_mpls_write_label_block_config(vty, zvrf); + return write; +} + +DEFUN (show_mpls_fec, + show_mpls_fec_cmd, + "show mpls fec [<A.B.C.D/M|X:X::X:X/M>]", + SHOW_STR + MPLS_STR + "MPLS FEC table\n" + "FEC to display information about\n" + "FEC to display information about\n") +{ + struct zebra_vrf *zvrf; + struct prefix p; + int ret; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return 0; + + if (argc == 3) + zebra_mpls_print_fec_table(vty, zvrf); + else { + memset(&p, 0, sizeof(struct prefix)); + ret = str2prefix(argv[3]->arg, &p); + if (ret <= 0) { + vty_out(vty, "%% Malformed address\n"); + return CMD_WARNING; + } + zebra_mpls_print_fec(vty, zvrf, &p); + } + + return CMD_SUCCESS; +} + +DEFUN (show_mpls_table, + show_mpls_table_cmd, + "show mpls table [json]", + SHOW_STR + MPLS_STR + "MPLS table\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = vrf_info_lookup(VRF_DEFAULT); + zebra_mpls_print_lsp_table(vty, zvrf, uj); + return CMD_SUCCESS; +} + +DEFUN (show_mpls_table_lsp, + show_mpls_table_lsp_cmd, + "show mpls table (16-1048575) [json]", + SHOW_STR + MPLS_STR + "MPLS table\n" + "LSP to display information about\n" + JSON_STR) +{ + uint32_t label; + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = vrf_info_lookup(VRF_DEFAULT); + label = atoi(argv[3]->arg); + zebra_mpls_print_lsp(vty, zvrf, label, uj); + return CMD_SUCCESS; +} + +DEFUN (show_mpls_status, + show_mpls_status_cmd, + "show mpls status", + SHOW_STR + "MPLS information\n" + "MPLS status\n") +{ + vty_out(vty, "MPLS support enabled: %s\n", + (mpls_enabled) ? "yes" + : "no (mpls kernel extensions not detected)"); + return CMD_SUCCESS; +} + +static int zebra_mpls_global_block(struct vty *vty, int add_cmd, + const char *start_label_str, + const char *end_label_str) +{ + int ret; + uint32_t start_label; + uint32_t end_label; + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + if (!zvrf) { + vty_out(vty, "%% Default VRF does not exist\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (add_cmd) { + if (!start_label_str || !end_label_str) { + vty_out(vty, "%% Labels not specified\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + start_label = atoi(start_label_str); + end_label = atoi(end_label_str); + if (!IS_MPLS_UNRESERVED_LABEL(start_label) + || !IS_MPLS_UNRESERVED_LABEL(end_label)) { + vty_out(vty, "%% Invalid label\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (end_label < start_label) { + vty_out(vty, "%% End label is less than Start label\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = zebra_mpls_label_block_add(zvrf, start_label, end_label); + } else + ret = zebra_mpls_label_block_del(zvrf); + + if (ret) { + vty_out(vty, "%% Global label block could not be %s\n", + add_cmd ? "added" : "deleted"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (mpls_label_global_block, + mpls_label_global_block_cmd, + "mpls label global-block (16-1048575) (16-1048575)", + MPLS_STR + "Label configuration\n" + "Configure global label block\n" + "Start label\n" + "End label\n") +{ + return zebra_mpls_global_block(vty, 1, argv[3]->arg, argv[4]->arg); +} + +DEFUN (no_mpls_label_global_block, + no_mpls_label_global_block_cmd, + "no mpls label global-block [(16-1048575) (16-1048575)]", + NO_STR + MPLS_STR + "Label configuration\n" + "Configure global label block\n" + "Start label\n" + "End label\n") +{ + return zebra_mpls_global_block(vty, 0, NULL, NULL); +} + +static int zebra_mpls_config(struct vty *vty); +/* MPLS node for MPLS LSP. */ +static struct cmd_node mpls_node = { + .name = "mpls", + .node = MPLS_NODE, + .prompt = "", + .config_write = zebra_mpls_config, +}; + +/* MPLS VTY. */ +void zebra_mpls_vty_init(void) +{ + install_element(VIEW_NODE, &show_mpls_status_cmd); + + install_node(&mpls_node); + + install_element(CONFIG_NODE, &mpls_transit_lsp_cmd); + install_element(CONFIG_NODE, &no_mpls_transit_lsp_cmd); + install_element(CONFIG_NODE, &no_mpls_transit_lsp_out_label_cmd); + install_element(CONFIG_NODE, &no_mpls_transit_lsp_all_cmd); + + install_element(CONFIG_NODE, &mpls_label_bind_cmd); + install_element(CONFIG_NODE, &no_mpls_label_bind_cmd); + + install_element(CONFIG_NODE, &mpls_label_global_block_cmd); + install_element(CONFIG_NODE, &no_mpls_label_global_block_cmd); + + install_element(VIEW_NODE, &show_mpls_table_cmd); + install_element(VIEW_NODE, &show_mpls_table_lsp_cmd); + install_element(VIEW_NODE, &show_mpls_fec_cmd); +} diff --git a/zebra/zebra_mroute.c b/zebra/zebra_mroute.c new file mode 100644 index 0000000..5d38c37 --- /dev/null +++ b/zebra/zebra_mroute.c @@ -0,0 +1,100 @@ +/* zebra_mroute code + * Copyright (C) 2016 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of Quagga + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "stream.h" +#include "prefix.h" +#include "vrf.h" +#include "rib.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_mroute.h" +#include "zebra/rt.h" +#include "zebra/debug.h" + +void zebra_ipmr_route_stats(ZAPI_HANDLER_ARGS) +{ + struct mcast_route_data mroute; + struct stream *s; + int suc = -1; + + memset(&mroute, 0, sizeof(mroute)); + STREAM_GETL(msg, mroute.family); + + switch (mroute.family) { + case AF_INET: + SET_IPADDR_V4(&mroute.src); + SET_IPADDR_V4(&mroute.grp); + STREAM_GET(&mroute.src.ipaddr_v4, msg, + sizeof(mroute.src.ipaddr_v4)); + STREAM_GET(&mroute.grp.ipaddr_v4, msg, + sizeof(mroute.grp.ipaddr_v4)); + break; + case AF_INET6: + SET_IPADDR_V6(&mroute.src); + SET_IPADDR_V6(&mroute.grp); + STREAM_GET(&mroute.src.ipaddr_v6, msg, + sizeof(mroute.src.ipaddr_v6)); + STREAM_GET(&mroute.grp.ipaddr_v6, msg, + sizeof(mroute.grp.ipaddr_v6)); + break; + default: + zlog_warn("%s: Invalid address family received while parsing", + __func__); + return; + } + + STREAM_GETL(msg, mroute.ifindex); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Asking for (%pIA,%pIA)[%s(%u)] mroute information", + &mroute.src, &mroute.grp, zvrf->vrf->name, + zvrf->vrf->vrf_id); + + suc = kernel_get_ipmr_sg_stats(zvrf, &mroute); + +stream_failure: + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + stream_reset(s); + + zclient_create_header(s, ZEBRA_IPMR_ROUTE_STATS, zvrf_id(zvrf)); + + if (mroute.family == AF_INET) { + stream_write(s, &mroute.src.ipaddr_v4, + sizeof(mroute.src.ipaddr_v4)); + stream_write(s, &mroute.grp.ipaddr_v4, + sizeof(mroute.grp.ipaddr_v4)); + } else { + stream_write(s, &mroute.src.ipaddr_v6, + sizeof(mroute.src.ipaddr_v6)); + stream_write(s, &mroute.grp.ipaddr_v6, + sizeof(mroute.grp.ipaddr_v6)); + } + + stream_put(s, &mroute.lastused, sizeof(mroute.lastused)); + stream_putl(s, (uint32_t)suc); + + stream_putw_at(s, 0, stream_get_endp(s)); + zserv_send_message(client, s); +} diff --git a/zebra/zebra_mroute.h b/zebra/zebra_mroute.h new file mode 100644 index 0000000..6c56c2e --- /dev/null +++ b/zebra/zebra_mroute.h @@ -0,0 +1,45 @@ +/* zebra_mroute.h + * Copyright (C) 2016 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __ZEBRA_MROUTE_H__ +#define __ZEBRA_MROUTE_H__ + +#include "zebra/zserv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct mcast_route_data { + int family; + struct ipaddr src; + struct ipaddr grp; + unsigned int ifindex; + unsigned long long lastused; +}; + +void zebra_ipmr_route_stats(ZAPI_HANDLER_ARGS); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_nb.c b/zebra/zebra_nb.c new file mode 100644 index 0000000..90d4ee7 --- /dev/null +++ b/zebra/zebra_nb.c @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2020 Cumulus Networks, Inc. + * Chirag Shah + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include "northbound.h" +#include "libfrr.h" +#include "zebra_nb.h" + +/* clang-format off */ +const struct frr_yang_module_info frr_zebra_info = { + .name = "frr-zebra", + .nodes = { + { + .xpath = "/frr-zebra:zebra/mcast-rpf-lookup", + .cbs = { + .modify = zebra_mcast_rpf_lookup_modify, + } + }, + { + .xpath = "/frr-zebra:zebra/ip-forwarding", + .cbs = { + .modify = zebra_ip_forwarding_modify, + .destroy = zebra_ip_forwarding_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/ipv6-forwarding", + .cbs = { + .modify = zebra_ipv6_forwarding_modify, + .destroy = zebra_ipv6_forwarding_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/workqueue-hold-timer", + .cbs = { + .modify = zebra_workqueue_hold_timer_modify, + } + }, + { + .xpath = "/frr-zebra:zebra/zapi-packets", + .cbs = { + .modify = zebra_zapi_packets_modify, + } + }, + { + .xpath = "/frr-zebra:zebra/import-kernel-table/table-id", + .cbs = { + .modify = zebra_import_kernel_table_table_id_modify, + .destroy = zebra_import_kernel_table_table_id_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/import-kernel-table/distance", + .cbs = { + .modify = zebra_import_kernel_table_distance_modify, + } + }, + { + .xpath = "/frr-zebra:zebra/import-kernel-table/route-map", + .cbs = { + .modify = zebra_import_kernel_table_route_map_modify, + .destroy = zebra_import_kernel_table_route_map_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/allow-external-route-update", + .cbs = { + .create = zebra_allow_external_route_update_create, + .destroy = zebra_allow_external_route_update_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/dplane-queue-limit", + .cbs = { + .modify = zebra_dplane_queue_limit_modify, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-events", + .cbs = { + .modify = zebra_debugs_debug_events_modify, + .destroy = zebra_debugs_debug_events_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-zapi-send", + .cbs = { + .modify = zebra_debugs_debug_zapi_send_modify, + .destroy = zebra_debugs_debug_zapi_send_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-zapi-recv", + .cbs = { + .modify = zebra_debugs_debug_zapi_recv_modify, + .destroy = zebra_debugs_debug_zapi_recv_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-zapi-detail", + .cbs = { + .modify = zebra_debugs_debug_zapi_detail_modify, + .destroy = zebra_debugs_debug_zapi_detail_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-kernel", + .cbs = { + .modify = zebra_debugs_debug_kernel_modify, + .destroy = zebra_debugs_debug_kernel_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-kernel-msg-send", + .cbs = { + .modify = zebra_debugs_debug_kernel_msg_send_modify, + .destroy = zebra_debugs_debug_kernel_msg_send_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-kernel-msg-recv", + .cbs = { + .modify = zebra_debugs_debug_kernel_msg_recv_modify, + .destroy = zebra_debugs_debug_kernel_msg_recv_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-rib", + .cbs = { + .modify = zebra_debugs_debug_rib_modify, + .destroy = zebra_debugs_debug_rib_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-rib-detail", + .cbs = { + .modify = zebra_debugs_debug_rib_detail_modify, + .destroy = zebra_debugs_debug_rib_detail_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-fpm", + .cbs = { + .modify = zebra_debugs_debug_fpm_modify, + .destroy = zebra_debugs_debug_fpm_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-nht", + .cbs = { + .modify = zebra_debugs_debug_nht_modify, + .destroy = zebra_debugs_debug_nht_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-nht-detail", + .cbs = { + .modify = zebra_debugs_debug_nht_detail_modify, + .destroy = zebra_debugs_debug_nht_detail_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-mpls", + .cbs = { + .modify = zebra_debugs_debug_mpls_modify, + .destroy = zebra_debugs_debug_mpls_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-vxlan", + .cbs = { + .modify = zebra_debugs_debug_vxlan_modify, + .destroy = zebra_debugs_debug_vxlan_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-pw", + .cbs = { + .modify = zebra_debugs_debug_pw_modify, + .destroy = zebra_debugs_debug_pw_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-dplane", + .cbs = { + .modify = zebra_debugs_debug_dplane_modify, + .destroy = zebra_debugs_debug_dplane_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-dplane-detail", + .cbs = { + .modify = zebra_debugs_debug_dplane_detail_modify, + .destroy = zebra_debugs_debug_dplane_detail_destroy, + } + }, + { + .xpath = "/frr-zebra:zebra/debugs/debug-mlag", + .cbs = { + .modify = zebra_debugs_debug_mlag_modify, + .destroy = zebra_debugs_debug_mlag_destroy, + } + }, + { + .xpath = "/frr-zebra:get-route-information", + .cbs = { + .rpc = get_route_information_rpc, + } + }, + { + .xpath = "/frr-zebra:get-v6-mroute-info", + .cbs = { + .rpc = get_v6_mroute_info_rpc, + } + }, + { + .xpath = "/frr-zebra:get-vrf-info", + .cbs = { + .rpc = get_vrf_info_rpc, + } + }, + { + .xpath = "/frr-zebra:get-vrf-vni-info", + .cbs = { + .rpc = get_vrf_vni_info_rpc, + } + }, + { + .xpath = "/frr-zebra:get-evpn-info", + .cbs = { + .rpc = get_evpn_info_rpc, + } + }, + { + .xpath = "/frr-zebra:get-vni-info", + .cbs = { + .rpc = get_vni_info_rpc, + } + }, + { + .xpath = "/frr-zebra:get-evpn-vni-rmac", + .cbs = { + .rpc = get_evpn_vni_rmac_rpc, + } + }, + { + .xpath = "/frr-zebra:get-evpn-vni-nexthops", + .cbs = { + .rpc = get_evpn_vni_nexthops_rpc, + } + }, + { + .xpath = "/frr-zebra:clear-evpn-dup-addr", + .cbs = { + .rpc = clear_evpn_dup_addr_rpc, + } + }, + { + .xpath = "/frr-zebra:get-evpn-macs", + .cbs = { + .rpc = get_evpn_macs_rpc, + } + }, + { + .xpath = "/frr-zebra:get-evpn-arp-cache", + .cbs = { + .rpc = get_evpn_arp_cache_rpc, + } + }, + { + .xpath = "/frr-zebra:get-pbr-ipset", + .cbs = { + .rpc = get_pbr_ipset_rpc, + } + }, + { + .xpath = "/frr-zebra:get-pbr-iptable", + .cbs = { + .rpc = get_pbr_iptable_rpc, + } + }, + { + .xpath = "/frr-zebra:get-debugs", + .cbs = { + .rpc = get_debugs_rpc, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/ip-addrs", + .cbs = { + .create = lib_interface_zebra_ip_addrs_create, + .destroy = lib_interface_zebra_ip_addrs_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/ip-addrs/label", + .cbs = { + .modify = lib_interface_zebra_ip_addrs_label_modify, + .destroy = lib_interface_zebra_ip_addrs_label_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/ip-addrs/ip4-peer", + .cbs = { + .modify = lib_interface_zebra_ip_addrs_ip4_peer_modify, + .destroy = lib_interface_zebra_ip_addrs_ip4_peer_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/multicast", + .cbs = { + .modify = lib_interface_zebra_multicast_modify, + .destroy = lib_interface_zebra_multicast_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/link-detect", + .cbs = { + .modify = lib_interface_zebra_link_detect_modify, + .destroy = lib_interface_zebra_link_detect_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/shutdown", + .cbs = { + .modify = lib_interface_zebra_shutdown_modify, + .destroy = lib_interface_zebra_shutdown_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/bandwidth", + .cbs = { + .modify = lib_interface_zebra_bandwidth_modify, + .destroy = lib_interface_zebra_bandwidth_destroy, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/up-count", + .cbs = { + .get_elem = lib_interface_zebra_state_up_count_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/down-count", + .cbs = { + .get_elem = lib_interface_zebra_state_down_count_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/zif-type", + .cbs = { + .get_elem = lib_interface_zebra_state_zif_type_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/ptm-status", + .cbs = { + .get_elem = lib_interface_zebra_state_ptm_status_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/vlan-id", + .cbs = { + .get_elem = lib_interface_zebra_state_vlan_id_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/vni-id", + .cbs = { + .get_elem = lib_interface_zebra_state_vni_id_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/remote-vtep", + .cbs = { + .get_elem = lib_interface_zebra_state_remote_vtep_get_elem, + } + }, + { + .xpath = "/frr-interface:lib/interface/frr-zebra:zebra/state/mcast-group", + .cbs = { + .get_elem = lib_interface_zebra_state_mcast_group_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib", + .cbs = { + .get_next = lib_vrf_zebra_ribs_rib_get_next, + .get_keys = lib_vrf_zebra_ribs_rib_get_keys, + .lookup_entry = lib_vrf_zebra_ribs_rib_lookup_entry, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/afi-safi-name", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_afi_safi_name_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/table-id", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_table_id_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route", + .cbs = { + .get_next = lib_vrf_zebra_ribs_rib_route_get_next, + .get_keys = lib_vrf_zebra_ribs_rib_route_get_keys, + .lookup_entry = lib_vrf_zebra_ribs_rib_route_lookup_entry, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/prefix", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_prefix_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry", + .cbs = { + .get_next = lib_vrf_zebra_ribs_rib_route_route_entry_get_next, + .get_keys = lib_vrf_zebra_ribs_rib_route_route_entry_get_keys, + .lookup_entry = lib_vrf_zebra_ribs_rib_route_route_entry_lookup_entry, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/protocol", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_protocol_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/instance", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_instance_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/distance", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_distance_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/metric", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_metric_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/tag", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_tag_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/selected", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_selected_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/installed", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_installed_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/failed", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_failed_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/queued", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_queued_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/internal-flags", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_internal_flags_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/internal-status", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_internal_status_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/uptime", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_uptime_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/id", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_id_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop", + .cbs = { + .get_next = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_get_next, + .get_keys = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_get_keys, + .lookup_entry = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_lookup_entry, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/nh-type", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_nh_type_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/vrf", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_vrf_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/gateway", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_gateway_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/interface", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_interface_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/bh-type", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_bh_type_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/onlink", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_onlink_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/srte-color", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_color_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry", + .cbs = { + .get_next = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_get_next, + .get_keys = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_get_keys, + .lookup_entry = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_lookup_entry, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/id", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_id_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/label", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_label_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/ttl", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_ttl_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/traffic-class", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_traffic_class_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/duplicate", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_duplicate_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/recursive", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_recursive_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/active", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_active_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/fib", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_fib_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/weight", + .cbs = { + .get_elem = lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_weight_get_elem, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/l3vni-id", + .cbs = { + .modify = lib_vrf_zebra_l3vni_id_modify, + .destroy = lib_vrf_zebra_l3vni_id_destroy, + } + }, + { + .xpath = "/frr-vrf:lib/vrf/frr-zebra:zebra/prefix-only", + .cbs = { + .modify = lib_vrf_zebra_prefix_only_modify, + } + }, + { + .xpath = NULL, + }, + } +}; diff --git a/zebra/zebra_nb.h b/zebra/zebra_nb.h new file mode 100644 index 0000000..9590705 --- /dev/null +++ b/zebra/zebra_nb.h @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2020 Cumulus Networks, Inc. + * Chirag Shah + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef ZEBRA_ZEBRA_NB_H_ +#define ZEBRA_ZEBRA_NB_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern const struct frr_yang_module_info frr_zebra_info; + +/* prototypes */ +int get_route_information_rpc(struct nb_cb_rpc_args *args); +int get_v6_mroute_info_rpc(struct nb_cb_rpc_args *args); +int get_vrf_info_rpc(struct nb_cb_rpc_args *args); +int get_vrf_vni_info_rpc(struct nb_cb_rpc_args *args); +int get_evpn_info_rpc(struct nb_cb_rpc_args *args); +int get_vni_info_rpc(struct nb_cb_rpc_args *args); +int get_evpn_vni_rmac_rpc(struct nb_cb_rpc_args *args); +int get_evpn_vni_nexthops_rpc(struct nb_cb_rpc_args *args); +int clear_evpn_dup_addr_rpc(struct nb_cb_rpc_args *args); +int get_evpn_macs_rpc(struct nb_cb_rpc_args *args); +int get_evpn_arp_cache_rpc(struct nb_cb_rpc_args *args); +int get_pbr_ipset_rpc(struct nb_cb_rpc_args *args); +int get_pbr_iptable_rpc(struct nb_cb_rpc_args *args); +int get_debugs_rpc(struct nb_cb_rpc_args *args); +int zebra_mcast_rpf_lookup_modify(struct nb_cb_modify_args *args); +int zebra_ip_forwarding_modify(struct nb_cb_modify_args *args); +int zebra_ip_forwarding_destroy(struct nb_cb_destroy_args *args); +int zebra_ipv6_forwarding_modify(struct nb_cb_modify_args *args); +int zebra_ipv6_forwarding_destroy(struct nb_cb_destroy_args *args); +int zebra_workqueue_hold_timer_modify(struct nb_cb_modify_args *args); +int zebra_zapi_packets_modify(struct nb_cb_modify_args *args); +int zebra_import_kernel_table_table_id_modify(struct nb_cb_modify_args *args); +int zebra_import_kernel_table_table_id_destroy(struct nb_cb_destroy_args *args); +int zebra_import_kernel_table_distance_modify(struct nb_cb_modify_args *args); +int zebra_import_kernel_table_route_map_modify(struct nb_cb_modify_args *args); +int zebra_import_kernel_table_route_map_destroy( + struct nb_cb_destroy_args *args); +int zebra_allow_external_route_update_create(struct nb_cb_create_args *args); +int zebra_allow_external_route_update_destroy(struct nb_cb_destroy_args *args); +int zebra_dplane_queue_limit_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_events_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_events_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_zapi_send_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_zapi_send_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_zapi_recv_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_zapi_recv_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_zapi_detail_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_zapi_detail_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_kernel_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_kernel_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_kernel_msg_send_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_kernel_msg_send_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_kernel_msg_recv_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_kernel_msg_recv_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_rib_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_rib_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_rib_detail_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_rib_detail_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_fpm_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_fpm_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_nht_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_nht_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_nht_detail_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_nht_detail_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_mpls_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_mpls_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_vxlan_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_vxlan_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_pw_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_pw_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_dplane_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_dplane_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_dplane_detail_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_dplane_detail_destroy(struct nb_cb_destroy_args *args); +int zebra_debugs_debug_mlag_modify(struct nb_cb_modify_args *args); +int zebra_debugs_debug_mlag_destroy(struct nb_cb_destroy_args *args); +int lib_interface_zebra_ip_addrs_create(struct nb_cb_create_args *args); +int lib_interface_zebra_ip_addrs_destroy(struct nb_cb_destroy_args *args); +int lib_interface_zebra_ip_addrs_label_modify(struct nb_cb_modify_args *args); +int lib_interface_zebra_ip_addrs_label_destroy(struct nb_cb_destroy_args *args); +int lib_interface_zebra_ip_addrs_ip4_peer_modify( + struct nb_cb_modify_args *args); +int lib_interface_zebra_ip_addrs_ip4_peer_destroy( + struct nb_cb_destroy_args *args); +int lib_interface_zebra_multicast_modify(struct nb_cb_modify_args *args); +int lib_interface_zebra_multicast_destroy(struct nb_cb_destroy_args *args); +int lib_interface_zebra_link_detect_modify(struct nb_cb_modify_args *args); +int lib_interface_zebra_link_detect_destroy(struct nb_cb_destroy_args *args); +int lib_interface_zebra_shutdown_modify(struct nb_cb_modify_args *args); +int lib_interface_zebra_shutdown_destroy(struct nb_cb_destroy_args *args); +int lib_interface_zebra_bandwidth_modify(struct nb_cb_modify_args *args); +int lib_interface_zebra_bandwidth_destroy(struct nb_cb_destroy_args *args); +struct yang_data * +lib_interface_zebra_state_up_count_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_interface_zebra_state_down_count_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_interface_zebra_state_zif_type_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_interface_zebra_state_ptm_status_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_interface_zebra_state_vlan_id_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_interface_zebra_state_vni_id_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data *lib_interface_zebra_state_remote_vtep_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_interface_zebra_state_mcast_group_get_elem( + struct nb_cb_get_elem_args *args); +const void *lib_vrf_zebra_ribs_rib_get_next(struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_get_keys(struct nb_cb_get_keys_args *args); +const void * +lib_vrf_zebra_ribs_rib_lookup_entry(struct nb_cb_lookup_entry_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_afi_safi_name_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_table_id_get_elem(struct nb_cb_get_elem_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_get_next(struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_route_get_keys(struct nb_cb_get_keys_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_lookup_entry(struct nb_cb_lookup_entry_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_prefix_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_protocol_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_protocol_v6_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_vrf_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_distance_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_metric_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_tag_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_selected_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_installed_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_failed_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_queued_get_elem(struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_internal_flags_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_internal_status_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_uptime_get_elem(struct nb_cb_get_elem_args *args); +const void *lib_vrf_zebra_ribs_rib_route_nexthop_group_get_next( + struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_route_nexthop_group_get_keys( + struct nb_cb_get_keys_args *args); +const void *lib_vrf_zebra_ribs_rib_route_nexthop_group_lookup_entry( + struct nb_cb_lookup_entry_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_nexthop_group_name_get_elem( + struct nb_cb_get_elem_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_nexthop_group_frr_nexthops_nexthop_get_next( + struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_route_nexthop_group_frr_nexthops_nexthop_get_keys( + struct nb_cb_get_keys_args *args); +const void *lib_vrf_zebra_ribs_rib_route_route_entry_get_next( + struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_route_route_entry_get_keys( + struct nb_cb_get_keys_args *args); +const void *lib_vrf_zebra_ribs_rib_route_route_entry_lookup_entry( + struct nb_cb_lookup_entry_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_protocol_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_instance_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_distance_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_metric_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_tag_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_selected_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_installed_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_failed_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_queued_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_internal_flags_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_internal_status_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_uptime_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_id_get_elem( + struct nb_cb_get_elem_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_get_next( + struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_get_keys( + struct nb_cb_get_keys_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_lookup_entry( + struct nb_cb_lookup_entry_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_nh_type_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_vrf_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_gateway_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_interface_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_bh_type_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_onlink_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_color_get_elem( + struct nb_cb_get_elem_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_get_next( + struct nb_cb_get_next_args *args); +int lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_get_keys( + struct nb_cb_get_keys_args *args); +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_lookup_entry( + struct nb_cb_lookup_entry_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_id_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_label_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_ttl_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_traffic_class_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_duplicate_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_recursive_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_active_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_fib_get_elem( + struct nb_cb_get_elem_args *args); +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_weight_get_elem( + struct nb_cb_get_elem_args *args); +int lib_vrf_zebra_l3vni_id_modify(struct nb_cb_modify_args *args); +int lib_vrf_zebra_l3vni_id_destroy(struct nb_cb_destroy_args *args); +int lib_vrf_zebra_prefix_only_modify(struct nb_cb_modify_args *args); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_nb_config.c b/zebra/zebra_nb_config.c new file mode 100644 index 0000000..dfb55b0 --- /dev/null +++ b/zebra/zebra_nb_config.c @@ -0,0 +1,1280 @@ +/* + * Copyright (C) 2019 Cumulus Networks, Inc. + * Chirag Shah + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "lib/log.h" +#include "lib/northbound.h" +#include "lib/printfrr.h" +#include "libfrr.h" +#include "lib/command.h" +#include "lib/routemap.h" +#include "zebra/zebra_nb.h" +#include "zebra/rib.h" +#include "zebra_nb.h" +#include "zebra/interface.h" +#include "zebra/connected.h" +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_vxlan.h" + +/* + * XPath: /frr-zebra:zebra/mcast-rpf-lookup + */ +int zebra_mcast_rpf_lookup_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/ip-forwarding + */ +int zebra_ip_forwarding_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_ip_forwarding_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/ipv6-forwarding + */ +int zebra_ipv6_forwarding_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_ipv6_forwarding_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/workqueue-hold-timer + */ +int zebra_workqueue_hold_timer_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/zapi-packets + */ +int zebra_zapi_packets_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/import-kernel-table/table-id + */ +int zebra_import_kernel_table_table_id_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_import_kernel_table_table_id_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/import-kernel-table/distance + */ +int zebra_import_kernel_table_distance_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/import-kernel-table/route-map + */ +int zebra_import_kernel_table_route_map_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_import_kernel_table_route_map_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/allow-external-route-update + */ +int zebra_allow_external_route_update_create(struct nb_cb_create_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_allow_external_route_update_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/dplane-queue-limit + */ +int zebra_dplane_queue_limit_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-events + */ +int zebra_debugs_debug_events_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_events_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-zapi-send + */ +int zebra_debugs_debug_zapi_send_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_zapi_send_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-zapi-recv + */ +int zebra_debugs_debug_zapi_recv_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_zapi_recv_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-zapi-detail + */ +int zebra_debugs_debug_zapi_detail_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_zapi_detail_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-kernel + */ +int zebra_debugs_debug_kernel_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_kernel_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-kernel-msg-send + */ +int zebra_debugs_debug_kernel_msg_send_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_kernel_msg_send_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-kernel-msg-recv + */ +int zebra_debugs_debug_kernel_msg_recv_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_kernel_msg_recv_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-rib + */ +int zebra_debugs_debug_rib_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_rib_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-rib-detail + */ +int zebra_debugs_debug_rib_detail_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_rib_detail_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-fpm + */ +int zebra_debugs_debug_fpm_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_fpm_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-nht + */ +int zebra_debugs_debug_nht_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_nht_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-nht-detail + */ +int zebra_debugs_debug_nht_detail_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_nht_detail_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-mpls + */ +int zebra_debugs_debug_mpls_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_mpls_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-vxlan + */ +int zebra_debugs_debug_vxlan_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_vxlan_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-pw + */ +int zebra_debugs_debug_pw_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_pw_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-dplane + */ +int zebra_debugs_debug_dplane_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_dplane_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-dplane-detail + */ +int zebra_debugs_debug_dplane_detail_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_dplane_detail_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-zebra:zebra/debugs/debug-mlag + */ +int zebra_debugs_debug_mlag_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int zebra_debugs_debug_mlag_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/ip-addrs + */ +int lib_interface_zebra_ip_addrs_create(struct nb_cb_create_args *args) +{ + struct interface *ifp; + struct prefix prefix; + + // addr_family = yang_dnode_get_enum(dnode, "./address-family"); + yang_dnode_get_prefix(&prefix, args->dnode, "./ip-prefix"); + apply_mask(&prefix); + + switch (args->event) { + case NB_EV_VALIDATE: + if (prefix.family == AF_INET + && ipv4_martian(&prefix.u.prefix4)) { + snprintfrr(args->errmsg, args->errmsg_len, + "invalid address %pFX", &prefix); + return NB_ERR_VALIDATION; + } else if (prefix.family == AF_INET6 + && ipv6_martian(&prefix.u.prefix6)) { + snprintfrr(args->errmsg, args->errmsg_len, + "invalid address %pFX", &prefix); + return NB_ERR_VALIDATION; + } + break; + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + ifp = nb_running_get_entry(args->dnode, NULL, true); + if (prefix.family == AF_INET) + if_ip_address_install(ifp, &prefix, NULL, NULL); + else if (prefix.family == AF_INET6) + if_ipv6_address_install(ifp, &prefix, NULL); + + break; + } + + return NB_OK; +} + +int lib_interface_zebra_ip_addrs_destroy(struct nb_cb_destroy_args *args) +{ + struct interface *ifp; + struct prefix prefix; + struct connected *ifc; + + yang_dnode_get_prefix(&prefix, args->dnode, "./ip-prefix"); + apply_mask(&prefix); + + switch (args->event) { + case NB_EV_VALIDATE: + ifp = nb_running_get_entry(args->dnode, NULL, false); + if (!ifp) + return NB_OK; + + if (prefix.family == AF_INET) { + /* Check current interface address. */ + ifc = connected_check_ptp(ifp, &prefix, NULL); + if (!ifc) { + snprintf(args->errmsg, args->errmsg_len, + "interface %s Can't find address\n", + ifp->name); + return NB_ERR_VALIDATION; + } + } else if (prefix.family == AF_INET6) { + /* Check current interface address. */ + ifc = connected_check(ifp, &prefix); + if (!ifc) { + snprintf(args->errmsg, args->errmsg_len, + "interface can't find address %s", + ifp->name); + return NB_ERR_VALIDATION; + } + } else + return NB_ERR_VALIDATION; + + /* This is not configured address. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_CONFIGURED)) { + snprintf(args->errmsg, args->errmsg_len, + "interface %s not configured", ifp->name); + return NB_ERR_VALIDATION; + } + + /* This is not real address or interface is not active. */ + if (!CHECK_FLAG(ifc->conf, ZEBRA_IFC_QUEUED) + || !CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) { + listnode_delete(ifp->connected, ifc); + connected_free(&ifc); + return NB_ERR_VALIDATION; + } + break; + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + ifp = nb_running_get_entry(args->dnode, NULL, true); + if_ip_address_uinstall(ifp, &prefix); + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/ip-addrs/label + */ +int lib_interface_zebra_ip_addrs_label_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int lib_interface_zebra_ip_addrs_label_destroy(struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/ip-addrs/ip4-peer + */ +int lib_interface_zebra_ip_addrs_ip4_peer_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +int lib_interface_zebra_ip_addrs_ip4_peer_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/multicast + */ +int lib_interface_zebra_multicast_modify(struct nb_cb_modify_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + + if_multicast_set(ifp); + + return NB_OK; +} + +int lib_interface_zebra_multicast_destroy(struct nb_cb_destroy_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + + if_multicast_unset(ifp); + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/link-detect + */ +int lib_interface_zebra_link_detect_modify(struct nb_cb_modify_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + bool link_detect; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + link_detect = yang_dnode_get_bool(args->dnode, "./link-detect"); + + if_linkdetect(ifp, link_detect); + + return NB_OK; +} + +int lib_interface_zebra_link_detect_destroy(struct nb_cb_destroy_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + bool link_detect; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + link_detect = yang_dnode_get_bool(args->dnode, "./link-detect"); + + if_linkdetect(ifp, link_detect); + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/shutdown + */ +int lib_interface_zebra_shutdown_modify(struct nb_cb_modify_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + + if_shutdown(ifp); + + return NB_OK; +} + +int lib_interface_zebra_shutdown_destroy(struct nb_cb_destroy_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + + if_no_shutdown(ifp); + + return NB_OK; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/bandwidth + */ +int lib_interface_zebra_bandwidth_modify(struct nb_cb_modify_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + uint32_t bandwidth; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + bandwidth = yang_dnode_get_uint32(args->dnode, "./bandwidth"); + + ifp->bandwidth = bandwidth; + + /* force protocols to recalculate routes due to cost change */ + if (if_is_operative(ifp)) + zebra_interface_up_update(ifp); + + return NB_OK; +} + +int lib_interface_zebra_bandwidth_destroy(struct nb_cb_destroy_args *args) +{ + if (args->event != NB_EV_APPLY) + return NB_OK; + + struct interface *ifp; + + ifp = nb_running_get_entry(args->dnode, NULL, true); + + ifp->bandwidth = 0; + + /* force protocols to recalculate routes due to cost change */ + if (if_is_operative(ifp)) + zebra_interface_up_update(ifp); + + return NB_OK; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/l3vni-id + */ +int lib_vrf_zebra_l3vni_id_modify(struct nb_cb_modify_args *args) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + vni_t vni = 0; + struct zebra_l3vni *zl3vni = NULL; + char err[ERR_STR_SZ]; + bool pfx_only = false; + const struct lyd_node *pn_dnode; + const char *vrfname; + + switch (args->event) { + case NB_EV_PREPARE: + case NB_EV_ABORT: + return NB_OK; + case NB_EV_VALIDATE: + vni = yang_dnode_get_uint32(args->dnode, NULL); + /* Get vrf info from parent node, reject configuration + * if zebra vrf already mapped to different vni id. + */ + pn_dnode = yang_dnode_get_parent(args->dnode, "vrf"); + vrfname = yang_dnode_get_string(pn_dnode, "./name"); + zvrf = zebra_vrf_lookup_by_name(vrfname); + if (!zvrf) { + snprintf(args->errmsg, args->errmsg_len, + "zebra vrf info not found for vrf:%s.", + vrfname); + return NB_ERR_VALIDATION; + } + if (zvrf->l3vni && zvrf->l3vni != vni) { + snprintf( + args->errmsg, args->errmsg_len, + "vni %u cannot be configured as vni %u is already configured under the vrf", + vni, zvrf->l3vni); + return NB_ERR_VALIDATION; + } + + /* Check if this VNI is already present in the system */ + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + snprintf(args->errmsg, args->errmsg_len, + "VNI %u is already configured as L3-VNI", vni); + return NB_ERR_VALIDATION; + } + + break; + case NB_EV_APPLY: + + vrf = nb_running_get_entry(args->dnode, NULL, true); + zvrf = zebra_vrf_lookup_by_name(vrf->name); + vni = yang_dnode_get_uint32(args->dnode, NULL); + /* Note: This covers lib_vrf_zebra_prefix_only_modify() config + * along with l3vni config + */ + pfx_only = yang_dnode_get_bool(args->dnode, "../prefix-only"); + + if (zebra_vxlan_process_vrf_vni_cmd(zvrf, vni, err, ERR_STR_SZ, + pfx_only ? 1 : 0, 1) + != 0) { + if (IS_ZEBRA_DEBUG_VXLAN) + snprintf( + args->errmsg, args->errmsg_len, + "vrf vni %u mapping failed with error: %s", + vni, err); + return NB_ERR; + } + + break; + } + + return NB_OK; +} + +int lib_vrf_zebra_l3vni_id_destroy(struct nb_cb_destroy_args *args) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + vni_t vni = 0; + char err[ERR_STR_SZ]; + uint8_t filter = 0; + + switch (args->event) { + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_VALIDATE: + return NB_OK; + case NB_EV_APPLY: + vrf = nb_running_get_entry(args->dnode, NULL, true); + zvrf = zebra_vrf_lookup_by_name(vrf->name); + vni = yang_dnode_get_uint32(args->dnode, NULL); + + if (!zl3vni_lookup(vni)) + return NB_OK; + + if (zvrf->l3vni != vni) { + snprintf(args->errmsg, args->errmsg_len, + "vrf %s has different vni %u mapped", + vrf->name, zvrf->l3vni); + return NB_ERR; + } + + if (is_l3vni_for_prefix_routes_only(zvrf->l3vni)) + filter = 1; + + if (zebra_vxlan_process_vrf_vni_cmd(zvrf, vni, err, ERR_STR_SZ, + filter, 0) + != 0) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "vrf vni %u unmapping failed with error: %s", + vni, err); + return NB_ERR; + } + + break; + } + + return NB_OK; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/prefix-only + */ +int lib_vrf_zebra_prefix_only_modify(struct nb_cb_modify_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + case NB_EV_APPLY: + /* TODO: implement me. */ + break; + } + + return NB_OK; +} diff --git a/zebra/zebra_nb_rpcs.c b/zebra/zebra_nb_rpcs.c new file mode 100644 index 0000000..e7d438b --- /dev/null +++ b/zebra/zebra_nb_rpcs.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2020 Cumulus Networks, Inc. + * Chirag Shah + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include "northbound.h" +#include "libfrr.h" + +#include "zebra/zebra_nb.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" + +/* + * XPath: /frr-zebra:clear-evpn-dup-addr + */ +int clear_evpn_dup_addr_rpc(struct nb_cb_rpc_args *args) +{ + struct zebra_vrf *zvrf; + int ret = NB_OK; + struct yang_data *yang_dup_choice = NULL, *yang_dup_vni = NULL, + *yang_dup_ip = NULL, *yang_dup_mac = NULL; + + yang_dup_choice = yang_data_list_find(args->input, "%s/%s", args->xpath, + "input/clear-dup-choice"); + + zvrf = zebra_vrf_get_evpn(); + + if (yang_dup_choice + && strcmp(yang_dup_choice->value, "all-case") == 0) { + zebra_vxlan_clear_dup_detect_vni_all(zvrf); + } else { + vni_t vni; + struct ipaddr host_ip = {.ipa_type = IPADDR_NONE}; + struct ethaddr mac; + + yang_dup_vni = yang_data_list_find( + args->input, "%s/%s", args->xpath, + "input/clear-dup-choice/single-case/vni-id"); + if (yang_dup_vni) { + vni = yang_str2uint32(yang_dup_vni->value); + + yang_dup_mac = yang_data_list_find( + args->input, "%s/%s", args->xpath, + "input/clear-dup-choice/single-case/vni-id/mac-addr"); + yang_dup_ip = yang_data_list_find( + args->input, "%s/%s", args->xpath, + "input/clear-dup-choice/single-case/vni-id/vni-ipaddr"); + + if (yang_dup_mac) { + yang_str2mac(yang_dup_mac->value, &mac); + ret = zebra_vxlan_clear_dup_detect_vni_mac( + zvrf, vni, &mac, args->errmsg, + args->errmsg_len); + } else if (yang_dup_ip) { + yang_str2ip(yang_dup_ip->value, &host_ip); + ret = zebra_vxlan_clear_dup_detect_vni_ip( + zvrf, vni, &host_ip, args->errmsg, + args->errmsg_len); + } else + ret = zebra_vxlan_clear_dup_detect_vni(zvrf, + vni); + } + } + if (ret < 0) + return NB_ERR; + + return NB_OK; +} + +/* + * XPath: /frr-zebra:get-route-information + */ +int get_route_information_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-v6-mroute-info + */ +int get_v6_mroute_info_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-vrf-info + */ +int get_vrf_info_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-vrf-vni-info + */ +int get_vrf_vni_info_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-evpn-info + */ +int get_evpn_info_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-vni-info + */ +int get_vni_info_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-evpn-vni-rmac + */ +int get_evpn_vni_rmac_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-evpn-vni-nexthops + */ +int get_evpn_vni_nexthops_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-evpn-macs + */ +int get_evpn_macs_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-evpn-arp-cache + */ +int get_evpn_arp_cache_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-pbr-ipset + */ +int get_pbr_ipset_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-pbr-iptable + */ +int get_pbr_iptable_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} + +/* + * XPath: /frr-zebra:get-debugs + */ +int get_debugs_rpc(struct nb_cb_rpc_args *args) +{ + /* TODO: implement me. */ + return NB_ERR_NOT_FOUND; +} diff --git a/zebra/zebra_nb_state.c b/zebra/zebra_nb_state.c new file mode 100644 index 0000000..219e4d0 --- /dev/null +++ b/zebra/zebra_nb_state.c @@ -0,0 +1,1001 @@ +/* + * Copyright (C) 2020 Cumulus Networks, Inc. + * Chirag Shah + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include "northbound.h" +#include "libfrr.h" +#include "zebra_nb.h" +#include "zebra/interface.h" +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "printfrr.h" + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/up-count + */ +struct yang_data * +lib_interface_zebra_state_up_count_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct interface *ifp = args->list_entry; + struct zebra_if *zebra_if; + + zebra_if = ifp->info; + + return yang_data_new_uint16(args->xpath, zebra_if->up_count); +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/down-count + */ +struct yang_data * +lib_interface_zebra_state_down_count_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct interface *ifp = args->list_entry; + struct zebra_if *zebra_if; + + zebra_if = ifp->info; + + return yang_data_new_uint16(args->xpath, zebra_if->down_count); +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/zif-type + */ +struct yang_data * +lib_interface_zebra_state_zif_type_get_elem(struct nb_cb_get_elem_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/ptm-status + */ +struct yang_data * +lib_interface_zebra_state_ptm_status_get_elem(struct nb_cb_get_elem_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/vlan-id + */ +struct yang_data * +lib_interface_zebra_state_vlan_id_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct interface *ifp = args->list_entry; + struct zebra_if *zebra_if; + struct zebra_l2info_vlan *vlan_info; + + if (!IS_ZEBRA_IF_VLAN(ifp)) + return NULL; + + zebra_if = ifp->info; + vlan_info = &zebra_if->l2info.vl; + + return yang_data_new_uint16(args->xpath, vlan_info->vid); +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/vni-id + */ +struct yang_data * +lib_interface_zebra_state_vni_id_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct interface *ifp = args->list_entry; + struct zebra_if *zebra_if; + struct zebra_l2info_vxlan *vxlan_info; + + if (!IS_ZEBRA_IF_VXLAN(ifp)) + return NULL; + + zebra_if = ifp->info; + vxlan_info = &zebra_if->l2info.vxl; + + return yang_data_new_uint32(args->xpath, vxlan_info->vni); +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/remote-vtep + */ +struct yang_data * +lib_interface_zebra_state_remote_vtep_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct interface *ifp = args->list_entry; + struct zebra_if *zebra_if; + struct zebra_l2info_vxlan *vxlan_info; + + if (!IS_ZEBRA_IF_VXLAN(ifp)) + return NULL; + + zebra_if = ifp->info; + vxlan_info = &zebra_if->l2info.vxl; + + return yang_data_new_ipv4(args->xpath, &vxlan_info->vtep_ip); +} + +/* + * XPath: /frr-interface:lib/interface/frr-zebra:zebra/state/mcast-group + */ +struct yang_data * +lib_interface_zebra_state_mcast_group_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct interface *ifp = args->list_entry; + struct zebra_if *zebra_if; + struct zebra_l2info_vxlan *vxlan_info; + + if (!IS_ZEBRA_IF_VXLAN(ifp)) + return NULL; + + zebra_if = ifp->info; + vxlan_info = &zebra_if->l2info.vxl; + + return yang_data_new_ipv4(args->xpath, &vxlan_info->mcast_grp); +} + +const void *lib_vrf_zebra_ribs_rib_get_next(struct nb_cb_get_next_args *args) +{ + struct vrf *vrf = (struct vrf *)args->parent_list_entry; + struct zebra_router_table *zrt = + (struct zebra_router_table *)args->list_entry; + + struct zebra_vrf *zvrf; + afi_t afi; + safi_t safi; + + zvrf = zebra_vrf_lookup_by_id(vrf->vrf_id); + + if (args->list_entry == NULL) { + afi = AFI_IP; + safi = SAFI_UNICAST; + + zrt = zebra_router_find_zrt(zvrf, zvrf->table_id, afi, safi); + if (zrt == NULL) + return NULL; + } else { + zrt = RB_NEXT(zebra_router_table_head, zrt); + /* vrf_id/ns_id do not match, only walk for the given VRF */ + while (zrt && zrt->ns_id != zvrf->zns->ns_id) + zrt = RB_NEXT(zebra_router_table_head, zrt); + } + + return zrt; +} + +int lib_vrf_zebra_ribs_rib_get_keys(struct nb_cb_get_keys_args *args) +{ + const struct zebra_router_table *zrt = args->list_entry; + + args->keys->num = 2; + + snprintfrr(args->keys->key[0], sizeof(args->keys->key[0]), "%s", + yang_afi_safi_value2identity(zrt->afi, zrt->safi)); + snprintfrr(args->keys->key[1], sizeof(args->keys->key[1]), "%u", + zrt->tableid); + + return NB_OK; +} + +const void * +lib_vrf_zebra_ribs_rib_lookup_entry(struct nb_cb_lookup_entry_args *args) +{ + struct vrf *vrf = (struct vrf *)args->parent_list_entry; + struct zebra_vrf *zvrf; + afi_t afi; + safi_t safi; + uint32_t table_id = 0; + + zvrf = zebra_vrf_lookup_by_id(vrf->vrf_id); + + yang_afi_safi_identity2value(args->keys->key[0], &afi, &safi); + table_id = yang_str2uint32(args->keys->key[1]); + /* table_id 0 assume vrf's table_id. */ + if (!table_id) + table_id = zvrf->table_id; + + return zebra_router_find_zrt(zvrf, table_id, afi, safi); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/afi-safi-name + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_afi_safi_name_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct zebra_router_table *zrt = args->list_entry; + + return yang_data_new_string(args->xpath, + yang_afi_safi_value2identity(zrt->afi, zrt->safi)); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/table-id + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_table_id_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct zebra_router_table *zrt = args->list_entry; + + return yang_data_new_uint32(args->xpath, zrt->tableid); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route + */ +const void * +lib_vrf_zebra_ribs_rib_route_get_next(struct nb_cb_get_next_args *args) +{ + const struct zebra_router_table *zrt = args->parent_list_entry; + struct route_node *rn = (struct route_node *)args->list_entry; + + if (args->list_entry == NULL) + rn = route_top(zrt->table); + else + rn = srcdest_route_next(rn); + /* Optimization: skip empty route nodes. */ + while (rn && rn->info == NULL) + rn = route_next(rn); + + /* Skip link-local routes. */ + if (rn && rn->p.family == AF_INET6 + && IN6_IS_ADDR_LINKLOCAL(&rn->p.u.prefix6)) + return NULL; + + return rn; +} + +int lib_vrf_zebra_ribs_rib_route_get_keys(struct nb_cb_get_keys_args *args) +{ + const struct route_node *rn = args->list_entry; + + args->keys->num = 1; + prefix2str(&rn->p, args->keys->key[0], sizeof(args->keys->key[0])); + + return NB_OK; +} + +const void * +lib_vrf_zebra_ribs_rib_route_lookup_entry(struct nb_cb_lookup_entry_args *args) +{ + const struct zebra_router_table *zrt = args->parent_list_entry; + struct prefix p; + struct route_node *rn; + + yang_str2prefix(args->keys->key[0], &p); + + rn = route_node_lookup(zrt->table, &p); + + if (!rn) + return NULL; + + route_unlock_node(rn); + + return rn; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/prefix + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_prefix_get_elem(struct nb_cb_get_elem_args *args) +{ + const struct route_node *rn = args->list_entry; + + return yang_data_new_prefix(args->xpath, &rn->p); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry + */ +const void *lib_vrf_zebra_ribs_rib_route_route_entry_get_next( + struct nb_cb_get_next_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + struct route_node *rn = (struct route_node *)args->parent_list_entry; + + if (args->list_entry == NULL) + RNODE_FIRST_RE(rn, re); + else + RNODE_NEXT_RE(rn, re); + + return re; +} + +int lib_vrf_zebra_ribs_rib_route_route_entry_get_keys( + struct nb_cb_get_keys_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + args->keys->num = 1; + + strlcpy(args->keys->key[0], zebra_route_string(re->type), + sizeof(args->keys->key[0])); + + return NB_OK; +} + +const void *lib_vrf_zebra_ribs_rib_route_route_entry_lookup_entry( + struct nb_cb_lookup_entry_args *args) +{ + struct route_node *rn = (struct route_node *)args->parent_list_entry; + struct route_entry *re = NULL; + int proto_type = 0; + afi_t afi; + + afi = family2afi(rn->p.family); + proto_type = proto_redistnum(afi, args->keys->key[0]); + + RNODE_FOREACH_RE (rn, re) { + if (proto_type == re->type) + return re; + } + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/protocol + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_protocol_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + return yang_data_new_enum(args->xpath, re->type); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/instance + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_instance_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (re->instance) + return yang_data_new_uint16(args->xpath, re->instance); + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/distance + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_distance_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + return yang_data_new_uint8(args->xpath, re->distance); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/metric + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_metric_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + return yang_data_new_uint32(args->xpath, re->metric); +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/tag + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_tag_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (re->tag) + return yang_data_new_uint32(args->xpath, re->tag); + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/selected + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_selected_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/installed + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_installed_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/failed + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_failed_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/queued + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_queued_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/internal-flags + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_internal_flags_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (re->flags) + return yang_data_new_int32(args->xpath, re->flags); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/internal-status + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_internal_status_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + if (re->status) + return yang_data_new_int32(args->xpath, re->status); + + return NULL; +} + +/* + * XPath: /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/uptime + */ +struct yang_data *lib_vrf_zebra_ribs_rib_route_route_entry_uptime_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + return yang_data_new_date_and_time(args->xpath, re->uptime); +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/id + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_id_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct route_entry *re = (struct route_entry *)args->list_entry; + + return yang_data_new_uint32(args->xpath, re->nhe->id); +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop + */ +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_get_next( + struct nb_cb_get_next_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + struct route_entry *re = (struct route_entry *)args->parent_list_entry; + struct nhg_hash_entry *nhe = re->nhe; + + if (args->list_entry == NULL) { + nexthop = nhe->nhg.nexthop; + } else + nexthop = nexthop_next(nexthop); + + return nexthop; +} + +int lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_get_keys( + struct nb_cb_get_keys_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + args->keys->num = 4; + + strlcpy(args->keys->key[0], yang_nexthop_type2str(nexthop->type), + sizeof(args->keys->key[0])); + + snprintfrr(args->keys->key[1], sizeof(args->keys->key[1]), "%" PRIu32, + nexthop->vrf_id); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + snprintfrr(args->keys->key[2], sizeof(args->keys->key[2]), + "%pI4", &nexthop->gate.ipv4); + if (nexthop->ifindex) + strlcpy(args->keys->key[3], + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id), + sizeof(args->keys->key[3])); + else + /* no ifindex */ + strlcpy(args->keys->key[3], " ", + sizeof(args->keys->key[3])); + + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + snprintfrr(args->keys->key[2], sizeof(args->keys->key[2]), + "%pI6", &nexthop->gate.ipv6); + + if (nexthop->ifindex) + strlcpy(args->keys->key[3], + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id), + sizeof(args->keys->key[3])); + else + /* no ifindex */ + strlcpy(args->keys->key[3], " ", + sizeof(args->keys->key[3])); + + break; + case NEXTHOP_TYPE_IFINDEX: + strlcpy(args->keys->key[2], "", sizeof(args->keys->key[2])); + strlcpy(args->keys->key[3], + ifindex2ifname(nexthop->ifindex, nexthop->vrf_id), + sizeof(args->keys->key[3])); + + break; + case NEXTHOP_TYPE_BLACKHOLE: + /* Gateway IP */ + strlcpy(args->keys->key[2], "", sizeof(args->keys->key[2])); + strlcpy(args->keys->key[3], " ", sizeof(args->keys->key[3])); + break; + default: + break; + } + + return NB_OK; +} + +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_lookup_entry( + struct nb_cb_lookup_entry_args *args) +{ + struct nhg_hash_entry *nhe; + struct nexthop nexthop_lookup = {}; + struct nexthop *nexthop; + const char *nh_type_str; + + nhe = (struct nhg_hash_entry *)args->parent_list_entry; + nexthop_lookup.vrf_id = nhe->vrf_id; + + /* + * Get nexthop type. + * TODO: use yang_str2enum() instead. + */ + nh_type_str = args->keys->key[0]; + if (strmatch(nh_type_str, "ifindex")) + nexthop_lookup.type = NEXTHOP_TYPE_IFINDEX; + else if (strmatch(nh_type_str, "ip4")) + nexthop_lookup.type = NEXTHOP_TYPE_IPV4; + else if (strmatch(nh_type_str, "ip4-ifindex")) + nexthop_lookup.type = NEXTHOP_TYPE_IPV4_IFINDEX; + else if (strmatch(nh_type_str, "ip6")) + nexthop_lookup.type = NEXTHOP_TYPE_IPV6; + else if (strmatch(nh_type_str, "ip6-ifindex")) + nexthop_lookup.type = NEXTHOP_TYPE_IPV6_IFINDEX; + else if (strmatch(nh_type_str, "blackhole")) + nexthop_lookup.type = NEXTHOP_TYPE_BLACKHOLE; + else + /* unexpected */ + return NULL; + + /* Get nexthop address. */ + switch (nexthop_lookup.type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + yang_str2ipv4(args->keys->key[1], &nexthop_lookup.gate.ipv4); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + yang_str2ipv6(args->keys->key[1], &nexthop_lookup.gate.ipv6); + break; + case NEXTHOP_TYPE_IFINDEX: + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + /* Get nexthop interface. */ + switch (nexthop_lookup.type) { + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV6_IFINDEX: + case NEXTHOP_TYPE_IFINDEX: + nexthop_lookup.ifindex = + ifname2ifindex(args->keys->key[2], nhe->vrf_id); + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + /* Lookup requested nexthop (ignore weight and metric). */ + for (ALL_NEXTHOPS(nhe->nhg, nexthop)) { + nexthop_lookup.weight = nexthop->weight; + nexthop_lookup.src = nexthop->src; + if (nexthop_same_no_labels(&nexthop_lookup, nexthop)) + return nexthop; + } + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/nh-type + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_nh_type_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + return yang_data_new_string(args->xpath, "ifindex"); + break; + case NEXTHOP_TYPE_IPV4: + return yang_data_new_string(args->xpath, "ip4"); + break; + case NEXTHOP_TYPE_IPV4_IFINDEX: + return yang_data_new_string(args->xpath, "ip4-ifindex"); + break; + case NEXTHOP_TYPE_IPV6: + return yang_data_new_string(args->xpath, "ip6"); + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + return yang_data_new_string(args->xpath, "ip6-ifindex"); + break; + default: + break; + } + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/vrf + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_vrf_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + return yang_data_new_string(args->xpath, + vrf_id_to_name(nexthop->vrf_id)); +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/gateway + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_gateway_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + struct ipaddr addr; + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + addr.ipa_type = IPADDR_V4; + memcpy(&addr.ipaddr_v4, &(nexthop->gate.ipv4), + sizeof(struct in_addr)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + addr.ipa_type = IPADDR_V6; + memcpy(&addr.ipaddr_v6, &(nexthop->gate.ipv6), + sizeof(struct in6_addr)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + case NEXTHOP_TYPE_IFINDEX: + /* No addr here */ + return yang_data_new_string(args->xpath, ""); + break; + default: + break; + } + + return yang_data_new_ip(args->xpath, &addr); +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/interface + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_interface_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (nexthop->ifindex) + return yang_data_new_string( + args->xpath, + ifindex2ifname(nexthop->ifindex, nexthop->vrf_id)); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/bh-type + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_bh_type_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + const char *type_str = ""; + + if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) + return NULL; + + switch (nexthop->bh_type) { + case BLACKHOLE_NULL: + type_str = "null"; + break; + case BLACKHOLE_REJECT: + type_str = "reject"; + break; + case BLACKHOLE_ADMINPROHIB: + type_str = "prohibited"; + break; + case BLACKHOLE_UNSPEC: + type_str = "unspec"; + break; + } + + return yang_data_new_string(args->xpath, type_str); +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/onlink + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_onlink_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + return yang_data_new_bool(args->xpath, true); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/srte-color + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_color_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE)) + return yang_data_new_uint32(args->xpath, nexthop->srte_color); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry + */ +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_get_next( + struct nb_cb_get_next_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +int lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_get_keys( + struct nb_cb_get_keys_args *args) +{ + /* TODO: implement me. */ + return NB_OK; +} + +const void * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_lookup_entry( + struct nb_cb_lookup_entry_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/id + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_id_get_elem( + struct nb_cb_get_elem_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/label + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_label_get_elem( + struct nb_cb_get_elem_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/ttl + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_ttl_get_elem( + struct nb_cb_get_elem_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/mpls-label-stack/entry/traffic-class + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_mpls_label_stack_entry_traffic_class_get_elem( + struct nb_cb_get_elem_args *args) +{ + /* TODO: implement me. */ + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/duplicate + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_duplicate_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/recursive + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_recursive_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/active + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_active_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/fib + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_fib_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + return yang_data_new_empty(args->xpath); + + return NULL; +} + +/* + * XPath: + * /frr-vrf:lib/vrf/frr-zebra:zebra/ribs/rib/route/route-entry/nexthop-group/nexthop/weight + */ +struct yang_data * +lib_vrf_zebra_ribs_rib_route_route_entry_nexthop_group_nexthop_weight_get_elem( + struct nb_cb_get_elem_args *args) +{ + struct nexthop *nexthop = (struct nexthop *)args->list_entry; + + if (nexthop->weight) + return yang_data_new_uint8(args->xpath, nexthop->weight); + + return NULL; +} diff --git a/zebra/zebra_neigh.c b/zebra/zebra_neigh.c new file mode 100644 index 0000000..09e9a62 --- /dev/null +++ b/zebra/zebra_neigh.c @@ -0,0 +1,284 @@ +/* + * Zebra neighbor table management + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <zebra.h> + +#include "command.h" +#include "hash.h" +#include "if.h" +#include "jhash.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "stream.h" +#include "table.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/interface.h" +#include "zebra/zebra_neigh.h" +#include "zebra/zebra_pbr.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZNEIGH_INFO, "Zebra neigh table"); +DEFINE_MTYPE_STATIC(ZEBRA, ZNEIGH_ENT, "Zebra neigh entry"); + +static int zebra_neigh_rb_cmp(const struct zebra_neigh_ent *n1, + const struct zebra_neigh_ent *n2) +{ + if (n1->ifindex < n2->ifindex) + return -1; + + if (n1->ifindex > n2->ifindex) + return 1; + + if (n1->ip.ipa_type < n2->ip.ipa_type) + return -1; + + if (n1->ip.ipa_type > n2->ip.ipa_type) + return 1; + + if (n1->ip.ipa_type == AF_INET) { + if (n1->ip.ipaddr_v4.s_addr < n2->ip.ipaddr_v4.s_addr) + return -1; + + if (n1->ip.ipaddr_v4.s_addr > n2->ip.ipaddr_v4.s_addr) + return 1; + + return 0; + } + + return memcmp(&n1->ip.ipaddr_v6, &n2->ip.ipaddr_v6, IPV6_MAX_BYTELEN); +} +RB_GENERATE(zebra_neigh_rb_head, zebra_neigh_ent, rb_node, zebra_neigh_rb_cmp); + +static struct zebra_neigh_ent *zebra_neigh_find(ifindex_t ifindex, + struct ipaddr *ip) +{ + struct zebra_neigh_ent tmp; + + tmp.ifindex = ifindex; + memcpy(&tmp.ip, ip, sizeof(*ip)); + return RB_FIND(zebra_neigh_rb_head, &zneigh_info->neigh_rb_tree, &tmp); +} + +static struct zebra_neigh_ent * +zebra_neigh_new(ifindex_t ifindex, struct ipaddr *ip, struct ethaddr *mac) +{ + struct zebra_neigh_ent *n; + + n = XCALLOC(MTYPE_ZNEIGH_ENT, sizeof(struct zebra_neigh_ent)); + + memcpy(&n->ip, ip, sizeof(*ip)); + n->ifindex = ifindex; + if (mac) { + memcpy(&n->mac, mac, sizeof(*mac)); + n->flags |= ZEBRA_NEIGH_ENT_ACTIVE; + } + + /* Add to rb_tree */ + if (RB_INSERT(zebra_neigh_rb_head, &zneigh_info->neigh_rb_tree, n)) { + XFREE(MTYPE_ZNEIGH_ENT, n); + return NULL; + } + + /* Initialise the pbr rule list */ + n->pbr_rule_list = list_new(); + listset_app_node_mem(n->pbr_rule_list); + + if (IS_ZEBRA_DEBUG_NEIGH) + zlog_debug("zebra neigh new if %d %pIA %pEA", n->ifindex, + &n->ip, &n->mac); + + return n; +} + +static void zebra_neigh_pbr_rules_update(struct zebra_neigh_ent *n) +{ + struct zebra_pbr_rule *rule; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(n->pbr_rule_list, node, rule)) + dplane_pbr_rule_update(rule, rule); +} + +static void zebra_neigh_free(struct zebra_neigh_ent *n) +{ + if (listcount(n->pbr_rule_list)) { + /* if rules are still using the neigh mark it as inactive and + * update the dataplane + */ + if (n->flags & ZEBRA_NEIGH_ENT_ACTIVE) { + n->flags &= ~ZEBRA_NEIGH_ENT_ACTIVE; + memset(&n->mac, 0, sizeof(n->mac)); + } + zebra_neigh_pbr_rules_update(n); + return; + } + if (IS_ZEBRA_DEBUG_NEIGH) + zlog_debug("zebra neigh free if %d %pIA %pEA", n->ifindex, + &n->ip, &n->mac); + + /* cleanup resources maintained against the neigh */ + list_delete(&n->pbr_rule_list); + + RB_REMOVE(zebra_neigh_rb_head, &zneigh_info->neigh_rb_tree, n); + + XFREE(MTYPE_ZNEIGH_ENT, n); +} + +/* kernel neigh del */ +void zebra_neigh_del(struct interface *ifp, struct ipaddr *ip) +{ + struct zebra_neigh_ent *n; + + if (IS_ZEBRA_DEBUG_NEIGH) + zlog_debug("zebra neigh del if %s/%d %pIA", ifp->name, + ifp->ifindex, ip); + + n = zebra_neigh_find(ifp->ifindex, ip); + if (!n) + return; + zebra_neigh_free(n); +} + +/* kernel neigh add */ +void zebra_neigh_add(struct interface *ifp, struct ipaddr *ip, + struct ethaddr *mac) +{ + struct zebra_neigh_ent *n; + + if (IS_ZEBRA_DEBUG_NEIGH) + zlog_debug("zebra neigh add if %s/%d %pIA %pEA", ifp->name, + ifp->ifindex, ip, mac); + + n = zebra_neigh_find(ifp->ifindex, ip); + if (n) { + if (!memcmp(&n->mac, mac, sizeof(*mac))) + return; + + memcpy(&n->mac, mac, sizeof(*mac)); + n->flags |= ZEBRA_NEIGH_ENT_ACTIVE; + + /* update rules linked to the neigh */ + zebra_neigh_pbr_rules_update(n); + } else { + zebra_neigh_new(ifp->ifindex, ip, mac); + } +} + +void zebra_neigh_deref(struct zebra_pbr_rule *rule) +{ + struct zebra_neigh_ent *n = rule->action.neigh; + + if (IS_ZEBRA_DEBUG_NEIGH) + zlog_debug("zebra neigh deref if %d %pIA by pbr rule %u", + n->ifindex, &n->ip, rule->rule.seq); + + rule->action.neigh = NULL; + /* remove rule from the list and free if it is inactive */ + list_delete_node(n->pbr_rule_list, &rule->action.neigh_listnode); + if (!(n->flags & ZEBRA_NEIGH_ENT_ACTIVE)) + zebra_neigh_free(n); +} + +/* XXX - this needs to work with evpn's neigh read */ +static void zebra_neigh_read_on_first_ref(void) +{ + static bool neigh_read_done; + + if (!neigh_read_done) { + neigh_read(zebra_ns_lookup(NS_DEFAULT)); + neigh_read_done = true; + } +} + +void zebra_neigh_ref(int ifindex, struct ipaddr *ip, + struct zebra_pbr_rule *rule) +{ + struct zebra_neigh_ent *n; + + if (IS_ZEBRA_DEBUG_NEIGH) + zlog_debug("zebra neigh ref if %d %pIA by pbr rule %u", ifindex, + ip, rule->rule.seq); + + zebra_neigh_read_on_first_ref(); + n = zebra_neigh_find(ifindex, ip); + if (!n) + n = zebra_neigh_new(ifindex, ip, NULL); + + /* link the pbr entry to the neigh */ + if (rule->action.neigh == n) + return; + + if (rule->action.neigh) + zebra_neigh_deref(rule); + + rule->action.neigh = n; + listnode_init(&rule->action.neigh_listnode, rule); + listnode_add(n->pbr_rule_list, &rule->action.neigh_listnode); +} + +static void zebra_neigh_show_one(struct vty *vty, struct zebra_neigh_ent *n) +{ + char mac_buf[ETHER_ADDR_STRLEN]; + char ip_buf[INET6_ADDRSTRLEN]; + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + n->ifindex); + ipaddr2str(&n->ip, ip_buf, sizeof(ip_buf)); + prefix_mac2str(&n->mac, mac_buf, sizeof(mac_buf)); + vty_out(vty, "%-20s %-30s %-18s %u\n", ifp ? ifp->name : "-", ip_buf, + mac_buf, listcount(n->pbr_rule_list)); +} + +void zebra_neigh_show(struct vty *vty) +{ + struct zebra_neigh_ent *n; + + vty_out(vty, "%-20s %-30s %-18s %s\n", "Interface", "Neighbor", "MAC", + "#Rules"); + RB_FOREACH (n, zebra_neigh_rb_head, &zneigh_info->neigh_rb_tree) + zebra_neigh_show_one(vty, n); +} + +void zebra_neigh_init(void) +{ + zneigh_info = XCALLOC(MTYPE_ZNEIGH_INFO, sizeof(*zrouter.neigh_info)); + RB_INIT(zebra_neigh_rb_head, &zneigh_info->neigh_rb_tree); +} + +void zebra_neigh_terminate(void) +{ + struct zebra_neigh_ent *n, *next; + + if (!zrouter.neigh_info) + return; + + RB_FOREACH_SAFE (n, zebra_neigh_rb_head, &zneigh_info->neigh_rb_tree, + next) + zebra_neigh_free(n); + XFREE(MTYPE_ZNEIGH_INFO, zneigh_info); +} diff --git a/zebra/zebra_neigh.h b/zebra/zebra_neigh.h new file mode 100644 index 0000000..953f2e3 --- /dev/null +++ b/zebra/zebra_neigh.h @@ -0,0 +1,64 @@ +/* + * Zebra neighbor table management + * + * Copyright (C) 2021 Nvidia + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ZEBRA_NEIGH_H +#define _ZEBRA_NEIGH_H + +#include <zebra.h> + +#include "if.h" + +#define zneigh_info zrouter.neigh_info + +struct zebra_neigh_ent { + ifindex_t ifindex; + struct ipaddr ip; + + struct ethaddr mac; + + uint32_t flags; +#define ZEBRA_NEIGH_ENT_ACTIVE (1 << 0) /* can be used for traffic */ + + /* memory used for adding the neigt entry to zneigh_info->es_rb_tree */ + RB_ENTRY(zebra_neigh_ent) rb_node; + + /* list of pbr rules associated with this neigh */ + struct list *pbr_rule_list; +}; +RB_HEAD(zebra_neigh_rb_head, zebra_neigh_ent); +RB_PROTOTYPE(zebra_neigh_rb_head, zebra_neigh_ent, rb_node, zebra_es_rb_cmp); + +struct zebra_neigh_info { + /* RB tree of neighbor entries */ + struct zebra_neigh_rb_head neigh_rb_tree; +}; + + +/****************************************************************************/ +extern void zebra_neigh_add(struct interface *ifp, struct ipaddr *ip, + struct ethaddr *mac); +extern void zebra_neigh_del(struct interface *ifp, struct ipaddr *ip); +extern void zebra_neigh_show(struct vty *vty); +extern void zebra_neigh_init(void); +extern void zebra_neigh_terminate(void); +extern void zebra_neigh_deref(struct zebra_pbr_rule *rule); +extern void zebra_neigh_ref(int ifindex, struct ipaddr *ip, + struct zebra_pbr_rule *rule); + +#endif /* _ZEBRA_NEIGH_H */ diff --git a/zebra/zebra_netns_id.c b/zebra/zebra_netns_id.c new file mode 100644 index 0000000..73d585c --- /dev/null +++ b/zebra/zebra_netns_id.c @@ -0,0 +1,363 @@ +/* zebra NETNS ID handling routines + * those routines are implemented locally to avoid having external dependencies. + * Copyright (C) 2018 6WIND + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "ns.h" +#include "vrf.h" +#include "log.h" +#include "lib_errors.h" + +#include "zebra/rib.h" +#include "zebra/zebra_dplane.h" +#if defined(HAVE_NETLINK) + +#include <linux/net_namespace.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include "zebra_ns.h" +#include "kernel_netlink.h" +#endif /* defined(HAVE_NETLINK) */ + +#include "zebra/zebra_netns_id.h" +#include "zebra/zebra_errors.h" + +/* in case NEWNSID not available, the NSID will be locally obtained + */ +#define NS_BASE_NSID 0 + +#if defined(HAVE_NETLINK) + +#define NETLINK_SOCKET_BUFFER_SIZE 512 +#define NETLINK_ALIGNTO 4 +#define NETLINK_ALIGN(len) \ + (((len) + NETLINK_ALIGNTO - 1) & ~(NETLINK_ALIGNTO - 1)) +#define NETLINK_NLATTR_LEN(_a, _b) (unsigned int)((char *)_a - (char *)_b) + +#endif /* defined(HAVE_NETLINK) */ + +static ns_id_t zebra_ns_id_get_fallback(const char *netnspath) +{ + static int zebra_ns_id_local; + + return zebra_ns_id_local++; +} + +#if defined(HAVE_NETLINK) + +static struct nlmsghdr *initiate_nlh(char *buf, unsigned int *seq, int type) +{ + struct nlmsghdr *nlh; + + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = NETLINK_ALIGN(sizeof(struct nlmsghdr)); + + nlh->nlmsg_type = type; + nlh->nlmsg_flags = NLM_F_REQUEST; + if (type == RTM_NEWNSID) + nlh->nlmsg_flags |= NLM_F_ACK; + nlh->nlmsg_seq = *seq = time(NULL); + return nlh; +} + +static int send_receive(int sock, struct nlmsghdr *nlh, unsigned int seq, + char *buf) +{ + int ret; + static const struct sockaddr_nl snl = {.nl_family = AF_NETLINK}; + + ret = sendto(sock, (const void *)nlh, (size_t)nlh->nlmsg_len, 0, + (struct sockaddr *)&snl, (socklen_t)sizeof(snl)); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, "netlink( %u) sendmsg() error: %s", + sock, safe_strerror(errno)); + return -1; + } + + /* reception */ + struct sockaddr_nl addr; + struct iovec iov = { + .iov_base = buf, .iov_len = NETLINK_SOCKET_BUFFER_SIZE, + }; + struct msghdr msg = { + .msg_name = &addr, + .msg_namelen = sizeof(struct sockaddr_nl), + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0, + }; + ret = recvmsg(sock, &msg, 0); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "netlink recvmsg: error %d (errno %u)", ret, + errno); + return -1; + } + if (msg.msg_flags & MSG_TRUNC) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "netlink recvmsg : error message truncated"); + return -1; + } + /* nlh already points to buf */ + if (nlh->nlmsg_seq != seq) { + flog_err( + EC_ZEBRA_NETLINK_BAD_SEQUENCE, + "netlink recvmsg: bad sequence number %x (expected %x)", + seq, nlh->nlmsg_seq); + return -1; + } + return ret; +} + +/* extract on a valid nlmsg the nsid + * valid nlmsghdr - not a nlmsgerr + */ +static ns_id_t extract_nsid(struct nlmsghdr *nlh, char *buf) +{ + ns_id_t ns_id = NS_UNKNOWN; + int offset = NETLINK_ALIGN(sizeof(struct nlmsghdr)) + + NETLINK_ALIGN(sizeof(struct rtgenmsg)); + void *tail = (void *)((char *)nlh + NETLINK_ALIGN(nlh->nlmsg_len)); + struct nlattr *attr; + + for (attr = (struct nlattr *)(buf + offset); + NETLINK_NLATTR_LEN(tail, attr) >= sizeof(struct nlattr) + && attr->nla_len >= sizeof(struct nlattr) + && attr->nla_len <= NETLINK_NLATTR_LEN(tail, attr); + attr += NETLINK_ALIGN(attr->nla_len)) { + if ((attr->nla_type & NLA_TYPE_MASK) == NETNSA_NSID) { + uint32_t *ptr = (uint32_t *)(attr); + + ns_id = ptr[1]; + break; + } + } + return ns_id; +} + +/* fd_param = -1 is ignored. + * netnspath set to null is ignored. + * one of the 2 params is mandatory. netnspath is looked in priority + */ +ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) +{ + int ns_id = -1; + struct sockaddr_nl snl; + int fd = -1, sock, ret; + unsigned int seq; + ns_id_t return_nsid = NS_UNKNOWN; + + /* netns path check */ + if (!netnspath && fd_param == -1) + return NS_UNKNOWN; + if (netnspath) { + fd = open(netnspath, O_RDONLY); + if (fd == -1) + return NS_UNKNOWN; + } else if (fd_param != -1) + fd = fd_param; + /* netlink socket */ + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "netlink( %u) socket() error: %s", + sock, safe_strerror(errno)); + if (netnspath) + close(fd); + return NS_UNKNOWN; + } + memset(&snl, 0, sizeof(snl)); + snl.nl_family = AF_NETLINK; + snl.nl_groups = RTNLGRP_NSID; + snl.nl_pid = 0; /* AUTO PID */ + ret = bind(sock, (struct sockaddr *)&snl, sizeof(snl)); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "netlink( %u) socket() bind error: %s", sock, + safe_strerror(errno)); + close(sock); + if (netnspath) + close(fd); + return NS_UNKNOWN; + } + + /* message to send to netlink,and response : NEWNSID */ + char buf[NETLINK_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct rtgenmsg *rt; + int len; + + memset(buf, 0, NETLINK_SOCKET_BUFFER_SIZE); + nlh = initiate_nlh(buf, &seq, RTM_NEWNSID); + rt = (struct rtgenmsg *)(buf + nlh->nlmsg_len); + nlh->nlmsg_len += NETLINK_ALIGN(sizeof(struct rtgenmsg)); + rt->rtgen_family = AF_UNSPEC; + + nl_attr_put32(nlh, NETLINK_SOCKET_BUFFER_SIZE, NETNSA_FD, fd); + nl_attr_put32(nlh, NETLINK_SOCKET_BUFFER_SIZE, NETNSA_NSID, ns_id); + + ret = send_receive(sock, nlh, seq, buf); + if (ret < 0) { + close(sock); + if (netnspath) + close(fd); + return NS_UNKNOWN; + } + nlh = (struct nlmsghdr *)buf; + + /* message to analyse : NEWNSID response */ + ret = 0; + if (nlh->nlmsg_type >= NLMSG_MIN_TYPE) { + return_nsid = extract_nsid(nlh, buf); + } else { + if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = + (struct nlmsgerr + *)((char *)nlh + + NETLINK_ALIGN( + sizeof(struct nlmsghdr))); + + ret = -1; + if (err->error < 0) + errno = -err->error; + else + errno = err->error; + if (errno == 0) { + /* request NEWNSID was successfull + * return EEXIST error to get GETNSID + */ + errno = EEXIST; + } + } else { + /* other errors ignored + * attempt to get nsid + */ + ret = -1; + errno = EEXIST; + } + } + + if (errno != EEXIST && ret != 0) { + flog_err(EC_LIB_SOCKET, + "netlink( %u) recvfrom() error 2 when reading: %s", fd, + safe_strerror(errno)); + close(sock); + if (netnspath) + close(fd); + if (errno == ENOTSUP) { + zlog_debug("NEWNSID locally generated"); + return zebra_ns_id_get_fallback(netnspath); + } + return NS_UNKNOWN; + } + /* message to send to netlink : GETNSID */ + memset(buf, 0, NETLINK_SOCKET_BUFFER_SIZE); + nlh = initiate_nlh(buf, &seq, RTM_GETNSID); + rt = (struct rtgenmsg *)(buf + nlh->nlmsg_len); + nlh->nlmsg_len += NETLINK_ALIGN(sizeof(struct rtgenmsg)); + rt->rtgen_family = AF_UNSPEC; + + nl_attr_put32(nlh, NETLINK_SOCKET_BUFFER_SIZE, NETNSA_FD, fd); + nl_attr_put32(nlh, NETLINK_SOCKET_BUFFER_SIZE, NETNSA_NSID, ns_id); + + ret = send_receive(sock, nlh, seq, buf); + if (ret < 0) { + close(sock); + if (netnspath) + close(fd); + return NS_UNKNOWN; + } + nlh = (struct nlmsghdr *)buf; + len = ret; + ret = 0; + do { + if (nlh->nlmsg_type >= NLMSG_MIN_TYPE) { + return_nsid = extract_nsid(nlh, buf); + if (return_nsid != NS_UNKNOWN) + break; + } else if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = + (struct nlmsgerr *)((char *)nlh + + NETLINK_ALIGN(sizeof( + struct nlmsghdr))); + if (err->error < 0) + errno = -err->error; + else + errno = err->error; + break; + } + len = len - NETLINK_ALIGN(nlh->nlmsg_len); + nlh = (struct nlmsghdr *)((char *)nlh + + NETLINK_ALIGN(nlh->nlmsg_len)); + } while (len != 0 && ret == 0); + + if (netnspath) + close(fd); + close(sock); + return return_nsid; +} + +#else +ns_id_t zebra_ns_id_get(const char *netnspath, int fd __attribute__ ((unused))) +{ + return zebra_ns_id_get_fallback(netnspath); +} + +#endif /* ! defined(HAVE_NETLINK) */ + +#ifdef HAVE_NETNS +static void zebra_ns_create_netns_directory(void) +{ + /* check that /var/run/netns is created */ + /* S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH */ + if (mkdir(NS_RUN_DIR, 0755)) { + if (errno != EEXIST) { + flog_warn(EC_ZEBRA_NAMESPACE_DIR_INACCESSIBLE, + "NS check: failed to access %s", NS_RUN_DIR); + return; + } + } +} +#endif + +ns_id_t zebra_ns_id_get_default(void) +{ +#ifdef HAVE_NETNS + int fd; +#endif /* !HAVE_NETNS */ + +#ifdef HAVE_NETNS + if (vrf_is_backend_netns()) + zebra_ns_create_netns_directory(); + fd = open(NS_DEFAULT_NAME, O_RDONLY); + + if (fd == -1) + return NS_DEFAULT; + if (!vrf_is_backend_netns()) { + close(fd); + return NS_DEFAULT; + } + close(fd); + return zebra_ns_id_get((char *)NS_DEFAULT_NAME, -1); +#else /* HAVE_NETNS */ + return NS_DEFAULT; +#endif /* !HAVE_NETNS */ +} diff --git a/zebra/zebra_netns_id.h b/zebra/zebra_netns_id.h new file mode 100644 index 0000000..dd9eab1 --- /dev/null +++ b/zebra/zebra_netns_id.h @@ -0,0 +1,34 @@ +/* zebra NETNS ID handling routines + * Copyright (C) 2018 6WIND + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if !defined(__ZEBRA_NS_ID_H__) +#define __ZEBRA_NS_ID_H__ +#include "zebra.h" +#include "ns.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern ns_id_t zebra_ns_id_get(const char *netnspath, int fd); +extern ns_id_t zebra_ns_id_get_default(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __ZEBRA_NS_ID_H__ */ diff --git a/zebra/zebra_netns_notify.c b/zebra/zebra_netns_notify.c new file mode 100644 index 0000000..cc4138e --- /dev/null +++ b/zebra/zebra_netns_notify.c @@ -0,0 +1,446 @@ +/* + * Zebra NS collector and notifier for Network NameSpaces + * Copyright (C) 2017 6WIND + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifdef HAVE_NETLINK +#ifdef HAVE_NETNS +#undef _GNU_SOURCE +#define _GNU_SOURCE + +#include <sched.h> +#endif +#include <dirent.h> +#include <sys/inotify.h> +#include <sys/stat.h> + +#include "thread.h" +#include "ns.h" +#include "command.h" +#include "memory.h" +#include "lib_errors.h" + +#include "zebra_router.h" +#endif /* defined(HAVE_NETLINK) */ + +#include "zebra_netns_notify.h" +#include "zebra_netns_id.h" +#include "zebra_errors.h" +#include "interface.h" + +#ifdef HAVE_NETLINK + +/* upon creation of folder under /var/run/netns, + * wait that netns context is bound to + * that folder 10 seconds + */ +#define ZEBRA_NS_POLLING_INTERVAL_MSEC 1000 +#define ZEBRA_NS_POLLING_MAX_RETRIES 200 + +DEFINE_MTYPE_STATIC(ZEBRA, NETNS_MISC, "ZebraNetNSInfo"); +static struct thread *zebra_netns_notify_current; + +struct zebra_netns_info { + const char *netnspath; + unsigned int retries; +}; + +static void zebra_ns_ready_read(struct thread *t); +static void zebra_ns_notify_create_context_from_entry_name(const char *name); +static int zebra_ns_continue_read(struct zebra_netns_info *zns_info, + int stop_retry); +static void zebra_ns_notify_read(struct thread *t); + +static struct vrf *vrf_handler_create(struct vty *vty, const char *vrfname) +{ + if (strlen(vrfname) > VRF_NAMSIZ) { + flog_warn(EC_LIB_VRF_LENGTH, + "%% VRF name %s invalid: length exceeds %d bytes", + vrfname, VRF_NAMSIZ); + return NULL; + } + + return vrf_get(VRF_UNKNOWN, vrfname); +} + +static void zebra_ns_notify_create_context_from_entry_name(const char *name) +{ + char *netnspath = ns_netns_pathname(NULL, name); + struct vrf *vrf; + int ret; + ns_id_t ns_id, ns_id_external, ns_id_relative = NS_UNKNOWN; + struct ns *default_ns; + + if (netnspath == NULL) + return; + + frr_with_privs(&zserv_privs) { + ns_id = zebra_ns_id_get(netnspath, -1); + } + if (ns_id == NS_UNKNOWN) + return; + ns_id_external = ns_map_nsid_with_external(ns_id, true); + /* if VRF with NS ID already present */ + vrf = vrf_lookup_by_id((vrf_id_t)ns_id_external); + if (vrf) { + zlog_debug( + "NS notify : same NSID used by VRF %s. Ignore NS %s creation", + vrf->name, netnspath); + return; + } + vrf = vrf_handler_create(NULL, name); + if (!vrf) { + flog_warn(EC_ZEBRA_NS_VRF_CREATION_FAILED, + "NS notify : failed to create VRF %s", name); + ns_map_nsid_with_external(ns_id, false); + return; + } + + default_ns = ns_get_default(); + + /* force kernel ns_id creation in that new vrf */ + frr_with_privs(&zserv_privs) { + ns_switch_to_netns(netnspath); + ns_id_relative = zebra_ns_id_get(NULL, default_ns->fd); + ns_switchback_to_initial(); + } + + frr_with_privs(&zserv_privs) { + ret = zebra_vrf_netns_handler_create(NULL, vrf, netnspath, + ns_id_external, ns_id, + ns_id_relative); + } + if (ret != CMD_SUCCESS) { + flog_warn(EC_ZEBRA_NS_VRF_CREATION_FAILED, + "NS notify : failed to create NS %s", netnspath); + ns_map_nsid_with_external(ns_id, false); + vrf_delete(vrf); + return; + } + zlog_info("NS notify : created VRF %s NS %s", name, netnspath); +} + +static int zebra_ns_continue_read(struct zebra_netns_info *zns_info, + int stop_retry) +{ + void *ns_path_ptr = (void *)zns_info->netnspath; + + if (stop_retry) { + XFREE(MTYPE_NETNS_MISC, ns_path_ptr); + XFREE(MTYPE_NETNS_MISC, zns_info); + return 0; + } + thread_add_timer_msec(zrouter.master, zebra_ns_ready_read, + (void *)zns_info, ZEBRA_NS_POLLING_INTERVAL_MSEC, + NULL); + return 0; +} + +static int zebra_ns_delete(char *name) +{ + struct vrf *vrf = vrf_lookup_by_name(name); + struct interface *ifp, *tmp; + struct ns *ns; + + if (!vrf) { + flog_warn(EC_ZEBRA_NS_DELETION_FAILED_NO_VRF, + "NS notify : no VRF found using NS %s", name); + return 0; + } + + /* + * We don't receive interface down/delete notifications from kernel + * when a netns is deleted. Therefore we have to manually replicate + * the necessary actions here. + */ + RB_FOREACH_SAFE (ifp, if_name_head, &vrf->ifaces_by_name, tmp) { + if (!CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_ACTIVE)) + continue; + + if (if_is_no_ptm_operative(ifp)) { + UNSET_FLAG(ifp->flags, IFF_RUNNING); + if_down(ifp); + } + + if (IS_ZEBRA_IF_BOND(ifp)) + zebra_l2if_update_bond(ifp, false); + if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) + zebra_l2if_update_bond_slave(ifp, IFINDEX_INTERNAL, + false); + /* Special handling for bridge or VxLAN interfaces. */ + if (IS_ZEBRA_IF_BRIDGE(ifp)) + zebra_l2_bridge_del(ifp); + else if (IS_ZEBRA_IF_VXLAN(ifp)) + zebra_l2_vxlanif_del(ifp); + + UNSET_FLAG(ifp->flags, IFF_UP); + if_delete_update(&ifp); + } + + ns = (struct ns *)vrf->ns_ctxt; + /* the deletion order is the same + * as the one used when siging signal is received + */ + vrf->ns_ctxt = NULL; + vrf_delete(vrf); + if (ns) + ns_delete(ns); + + zlog_info("NS notify : deleted VRF %s", name); + return 0; +} + +static int zebra_ns_notify_self_identify(struct stat *netst) +{ + char net_path[PATH_MAX]; + int netns; + + snprintf(net_path, sizeof(net_path), "/proc/self/ns/net"); + netns = open(net_path, O_RDONLY); + if (netns < 0) + return -1; + if (fstat(netns, netst) < 0) { + close(netns); + return -1; + } + close(netns); + return 0; +} + +static bool zebra_ns_notify_is_default_netns(const char *name) +{ + struct stat default_netns_stat; + struct stat st; + char netnspath[PATH_MAX]; + + if (zebra_ns_notify_self_identify(&default_netns_stat)) + return false; + + memset(&st, 0, sizeof(st)); + snprintf(netnspath, sizeof(netnspath), "%s/%s", NS_RUN_DIR, name); + /* compare with local stat */ + if (stat(netnspath, &st) == 0 && + (st.st_dev == default_netns_stat.st_dev) && + (st.st_ino == default_netns_stat.st_ino)) + return true; + return false; +} + +static void zebra_ns_ready_read(struct thread *t) +{ + struct zebra_netns_info *zns_info = THREAD_ARG(t); + const char *netnspath; + int err, stop_retry = 0; + + if (!zns_info) + return; + if (!zns_info->netnspath) { + XFREE(MTYPE_NETNS_MISC, zns_info); + return; + } + netnspath = zns_info->netnspath; + if (--zns_info->retries == 0) + stop_retry = 1; + frr_with_privs(&zserv_privs) { + err = ns_switch_to_netns(netnspath); + } + if (err < 0) { + zebra_ns_continue_read(zns_info, stop_retry); + return; + } + + /* go back to default ns */ + frr_with_privs(&zserv_privs) { + err = ns_switchback_to_initial(); + } + if (err < 0) { + zebra_ns_continue_read(zns_info, stop_retry); + return; + } + + /* check default name is not already set */ + if (strmatch(VRF_DEFAULT_NAME, basename(netnspath))) { + zlog_warn("NS notify : NS %s is already default VRF.Cancel VRF Creation", basename(netnspath)); + zebra_ns_continue_read(zns_info, 1); + return; + } + if (zebra_ns_notify_is_default_netns(basename(netnspath))) { + zlog_warn( + "NS notify : NS %s is default VRF. Ignore VRF creation", + basename(netnspath)); + zebra_ns_continue_read(zns_info, 1); + return; + } + + /* success : close fd and create zns context */ + zebra_ns_notify_create_context_from_entry_name(basename(netnspath)); + zebra_ns_continue_read(zns_info, 1); +} + +static void zebra_ns_notify_read(struct thread *t) +{ + int fd_monitor = THREAD_FD(t); + struct inotify_event *event; + char buf[BUFSIZ]; + ssize_t len; + + thread_add_read(zrouter.master, zebra_ns_notify_read, NULL, fd_monitor, + &zebra_netns_notify_current); + len = read(fd_monitor, buf, sizeof(buf)); + if (len < 0) { + flog_err_sys(EC_ZEBRA_NS_NOTIFY_READ, + "NS notify read: failed to read (%s)", + safe_strerror(errno)); + return; + } + for (event = (struct inotify_event *)buf; (char *)event < &buf[len]; + event = (struct inotify_event *)((char *)event + sizeof(*event) + + event->len)) { + char *netnspath; + struct zebra_netns_info *netnsinfo; + + if (!(event->mask & (IN_CREATE | IN_DELETE))) + continue; + + if (offsetof(struct inotify_event, name) + event->len + >= sizeof(buf)) { + flog_err(EC_ZEBRA_NS_NOTIFY_READ, + "NS notify read: buffer underflow"); + break; + } + + if (strnlen(event->name, event->len) == event->len) { + flog_err(EC_ZEBRA_NS_NOTIFY_READ, + "NS notify error: bad event name"); + break; + } + + if (event->mask & IN_DELETE) { + zebra_ns_delete(event->name); + continue; + } + netnspath = ns_netns_pathname(NULL, event->name); + if (!netnspath) + continue; + netnspath = XSTRDUP(MTYPE_NETNS_MISC, netnspath); + netnsinfo = XCALLOC(MTYPE_NETNS_MISC, + sizeof(struct zebra_netns_info)); + netnsinfo->retries = ZEBRA_NS_POLLING_MAX_RETRIES; + netnsinfo->netnspath = netnspath; + thread_add_timer_msec(zrouter.master, zebra_ns_ready_read, + (void *)netnsinfo, 0, NULL); + } +} + +void zebra_ns_notify_parse(void) +{ + struct dirent *dent; + DIR *srcdir = opendir(NS_RUN_DIR); + + if (srcdir == NULL) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "NS parsing init: failed to parse %s", NS_RUN_DIR); + return; + } + while ((dent = readdir(srcdir)) != NULL) { + struct stat st; + + if (strcmp(dent->d_name, ".") == 0 + || strcmp(dent->d_name, "..") == 0) + continue; + if (fstatat(dirfd(srcdir), dent->d_name, &st, 0) < 0) { + flog_err_sys( + EC_LIB_SYSTEM_CALL, + "NS parsing init: failed to parse entry %s", + dent->d_name); + continue; + } + if (S_ISDIR(st.st_mode)) { + zlog_debug("NS parsing init: %s is not a NS", + dent->d_name); + continue; + } + /* check default name is not already set */ + if (strmatch(VRF_DEFAULT_NAME, basename(dent->d_name))) { + zlog_warn("NS notify : NS %s is already default VRF.Cancel VRF Creation", dent->d_name); + continue; + } + if (zebra_ns_notify_is_default_netns(dent->d_name)) { + zlog_warn( + "NS notify : NS %s is default VRF. Ignore VRF creation", + dent->d_name); + continue; + } + zebra_ns_notify_create_context_from_entry_name(dent->d_name); + } + closedir(srcdir); +} + +void zebra_ns_notify_init(void) +{ + int fd_monitor; + + fd_monitor = inotify_init(); + if (fd_monitor < 0) { + flog_err_sys( + EC_LIB_SYSTEM_CALL, + "NS notify init: failed to initialize inotify (%s)", + safe_strerror(errno)); + } + if (inotify_add_watch(fd_monitor, NS_RUN_DIR, + IN_CREATE | IN_DELETE) < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "NS notify watch: failed to add watch (%s)", + safe_strerror(errno)); + } + thread_add_read(zrouter.master, zebra_ns_notify_read, NULL, fd_monitor, + &zebra_netns_notify_current); +} + +void zebra_ns_notify_close(void) +{ + if (zebra_netns_notify_current == NULL) + return; + + int fd = 0; + + if (zebra_netns_notify_current->u.fd > 0) + fd = zebra_netns_notify_current->u.fd; + + if (zebra_netns_notify_current->master != NULL) + THREAD_OFF(zebra_netns_notify_current); + + /* auto-removal of notify items */ + if (fd > 0) + close(fd); +} + +#else +void zebra_ns_notify_parse(void) +{ +} + +void zebra_ns_notify_init(void) +{ +} + +void zebra_ns_notify_close(void) +{ +} +#endif /* !HAVE_NETLINK */ diff --git a/zebra/zebra_netns_notify.h b/zebra/zebra_netns_notify.h new file mode 100644 index 0000000..1893928 --- /dev/null +++ b/zebra/zebra_netns_notify.h @@ -0,0 +1,37 @@ +/* + * Zebra NS collector and notifier for Network NameSpaces + * Copyright (C) 2017 6WIND + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _NETNS_NOTIFY_H +#define _NETNS_NOTIFY_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void zebra_ns_notify_init(void); +extern void zebra_ns_notify_parse(void); +extern void zebra_ns_notify_close(void); + +extern struct zebra_privs_t zserv_privs; + +#ifdef __cplusplus +} +#endif + +#endif /* NETNS_NOTIFY_H */ diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c new file mode 100644 index 0000000..e500232 --- /dev/null +++ b/zebra/zebra_nhg.c @@ -0,0 +1,3524 @@ +/* Zebra Nexthop Group Code. + * Copyright (C) 2019 Cumulus Networks, Inc. + * Donald Sharp + * Stephen Worley + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include <zebra.h> + +#include "lib/nexthop.h" +#include "lib/nexthop_group_private.h" +#include "lib/routemap.h" +#include "lib/mpls.h" +#include "lib/jhash.h" +#include "lib/debug.h" +#include "lib/lib_errors.h" + +#include "zebra/connected.h" +#include "zebra/debug.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_nhg_private.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_routemap.h" +#include "zebra/zebra_srte.h" +#include "zebra/zserv.h" +#include "zebra/rt.h" +#include "zebra_errors.h" +#include "zebra_dplane.h" +#include "zebra/interface.h" +#include "zebra/zapi_msg.h" +#include "zebra/rib.h" + +DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry"); +DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected"); +DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context"); + +/* Map backup nexthop indices between two nhes */ +struct backup_nh_map_s { + int map_count; + + struct { + uint8_t orig_idx; + uint8_t new_idx; + } map[MULTIPATH_NUM]; +}; + +/* id counter to keep in sync with kernel */ +uint32_t id_counter; + +/* Controlled through ui */ +static bool g_nexthops_enabled = true; +static bool proto_nexthops_only; +static bool use_recursive_backups = true; + +static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi, + int type, bool from_dplane); +static void depends_add(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend); +static struct nhg_hash_entry * +depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, + afi_t afi, int type, bool from_dplane); +static struct nhg_hash_entry * +depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id); +static void depends_decrement_free(struct nhg_connected_tree_head *head); + +static struct nhg_backup_info * +nhg_backup_copy(const struct nhg_backup_info *orig); + +/* Helper function for getting the next allocatable ID */ +static uint32_t nhg_get_next_id(void) +{ + while (1) { + id_counter++; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: ID %u checking", __func__, id_counter); + + if (id_counter == ZEBRA_NHG_PROTO_LOWER) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: ID counter wrapped", __func__); + + id_counter = 0; + continue; + } + + if (zebra_nhg_lookup_id(id_counter)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: ID already exists", __func__); + + continue; + } + + break; + } + + return id_counter; +} + +static void nhg_connected_free(struct nhg_connected *dep) +{ + XFREE(MTYPE_NHG_CONNECTED, dep); +} + +static struct nhg_connected *nhg_connected_new(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *new = NULL; + + new = XCALLOC(MTYPE_NHG_CONNECTED, sizeof(struct nhg_connected)); + new->nhe = nhe; + + return new; +} + +void nhg_connected_tree_free(struct nhg_connected_tree_head *head) +{ + struct nhg_connected *rb_node_dep = NULL; + + if (!nhg_connected_tree_is_empty(head)) { + frr_each_safe(nhg_connected_tree, head, rb_node_dep) { + nhg_connected_tree_del(head, rb_node_dep); + nhg_connected_free(rb_node_dep); + } + } +} + +bool nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head) +{ + return nhg_connected_tree_count(head) ? false : true; +} + +struct nhg_connected * +nhg_connected_tree_root(struct nhg_connected_tree_head *head) +{ + return nhg_connected_tree_first(head); +} + +struct nhg_hash_entry * +nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend) +{ + struct nhg_connected lookup = {}; + struct nhg_connected *remove = NULL; + struct nhg_hash_entry *removed_nhe; + + lookup.nhe = depend; + + /* Lookup to find the element, then remove it */ + remove = nhg_connected_tree_find(head, &lookup); + if (remove) + /* Re-returning here just in case this API changes.. + * the _del list api's are a bit undefined at the moment. + * + * So hopefully returning here will make it fail if the api + * changes to something different than currently expected. + */ + remove = nhg_connected_tree_del(head, remove); + + /* If the entry was sucessfully removed, free the 'connected` struct */ + if (remove) { + removed_nhe = remove->nhe; + nhg_connected_free(remove); + return removed_nhe; + } + + return NULL; +} + +/* Assuming UNIQUE RB tree. If this changes, assumptions here about + * insertion need to change. + */ +struct nhg_hash_entry * +nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend) +{ + struct nhg_connected *new = NULL; + + new = nhg_connected_new(depend); + + /* On success, NULL will be returned from the + * RB code. + */ + if (new && (nhg_connected_tree_add(head, new) == NULL)) + return NULL; + + /* If it wasn't successful, it must be a duplicate. We enforce the + * unique property for the `nhg_connected` tree. + */ + nhg_connected_free(new); + + return depend; +} + +static void +nhg_connected_tree_decrement_ref(struct nhg_connected_tree_head *head) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each_safe(nhg_connected_tree, head, rb_node_dep) { + zebra_nhg_decrement_ref(rb_node_dep->nhe); + } +} + +static void +nhg_connected_tree_increment_ref(struct nhg_connected_tree_head *head) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each(nhg_connected_tree, head, rb_node_dep) { + zebra_nhg_increment_ref(rb_node_dep->nhe); + } +} + +struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe) +{ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE) + && !zebra_nhg_depends_is_empty(nhe)) { + nhe = nhg_connected_tree_root(&nhe->nhg_depends)->nhe; + return zebra_nhg_resolve(nhe); + } + + return nhe; +} + +unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_count(&nhe->nhg_depends); +} + +bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_is_empty(&nhe->nhg_depends); +} + +static void zebra_nhg_depends_del(struct nhg_hash_entry *from, + struct nhg_hash_entry *depend) +{ + nhg_connected_tree_del_nhe(&from->nhg_depends, depend); +} + +static void zebra_nhg_depends_init(struct nhg_hash_entry *nhe) +{ + nhg_connected_tree_init(&nhe->nhg_depends); +} + +unsigned int zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_count(&nhe->nhg_dependents); +} + + +bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe) +{ + return nhg_connected_tree_is_empty(&nhe->nhg_dependents); +} + +static void zebra_nhg_dependents_del(struct nhg_hash_entry *from, + struct nhg_hash_entry *dependent) +{ + nhg_connected_tree_del_nhe(&from->nhg_dependents, dependent); +} + +static void zebra_nhg_dependents_add(struct nhg_hash_entry *to, + struct nhg_hash_entry *dependent) +{ + nhg_connected_tree_add_nhe(&to->nhg_dependents, dependent); +} + +static void zebra_nhg_dependents_init(struct nhg_hash_entry *nhe) +{ + nhg_connected_tree_init(&nhe->nhg_dependents); +} + +/* Release this nhe from anything depending on it */ +static void zebra_nhg_dependents_release(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + + frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) { + zebra_nhg_depends_del(rb_node_dep->nhe, nhe); + /* recheck validity of the dependent */ + zebra_nhg_check_valid(rb_node_dep->nhe); + } +} + +/* Release this nhe from anything that it depends on */ +static void zebra_nhg_depends_release(struct nhg_hash_entry *nhe) +{ + if (!zebra_nhg_depends_is_empty(nhe)) { + struct nhg_connected *rb_node_dep = NULL; + + frr_each_safe(nhg_connected_tree, &nhe->nhg_depends, + rb_node_dep) { + zebra_nhg_dependents_del(rb_node_dep->nhe, nhe); + } + } +} + + +struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id) +{ + struct nhg_hash_entry lookup = {}; + + lookup.id = id; + return hash_lookup(zrouter.nhgs_id, &lookup); +} + +static int zebra_nhg_insert_id(struct nhg_hash_entry *nhe) +{ + if (hash_lookup(zrouter.nhgs_id, nhe)) { + flog_err( + EC_ZEBRA_NHG_TABLE_INSERT_FAILED, + "Failed inserting NHG %pNG into the ID hash table, entry already exists", + nhe); + return -1; + } + + (void)hash_get(zrouter.nhgs_id, nhe, hash_alloc_intern); + + return 0; +} + +static void zebra_nhg_set_if(struct nhg_hash_entry *nhe, struct interface *ifp) +{ + nhe->ifp = ifp; + if_nhg_dependents_add(ifp, nhe); +} + +static void +zebra_nhg_connect_depends(struct nhg_hash_entry *nhe, + struct nhg_connected_tree_head *nhg_depends) +{ + struct nhg_connected *rb_node_dep = NULL; + + /* This has been allocated higher above in the stack. Could probably + * re-allocate and free the old stuff but just using the same memory + * for now. Otherwise, their might be a time trade-off for repeated + * alloc/frees as startup. + */ + nhe->nhg_depends = *nhg_depends; + + /* Attach backpointer to anything that it depends on */ + zebra_nhg_dependents_init(nhe); + if (!zebra_nhg_depends_is_empty(nhe)) { + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nhe %p (%pNG), dep %p (%pNG)", + __func__, nhe, nhe, rb_node_dep->nhe, + rb_node_dep->nhe); + + zebra_nhg_dependents_add(rb_node_dep->nhe, nhe); + } + } +} + +/* Init an nhe, for use in a hash lookup for example */ +void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi, + const struct nexthop *nh) +{ + memset(nhe, 0, sizeof(struct nhg_hash_entry)); + nhe->vrf_id = VRF_DEFAULT; + nhe->type = ZEBRA_ROUTE_NHG; + nhe->afi = AFI_UNSPEC; + + /* There are some special rules that apply to groups representing + * a single nexthop. + */ + if (nh && (nh->next == NULL)) { + switch (nh->type) { + case NEXTHOP_TYPE_IFINDEX: + case NEXTHOP_TYPE_BLACKHOLE: + /* + * This switch case handles setting the afi different + * for ipv4/v6 routes. Ifindex/blackhole nexthop + * objects cannot be ambiguous, they must be Address + * Family specific. If we get here, we will either use + * the AF of the route, or the one we got passed from + * here from the kernel. + */ + nhe->afi = afi; + break; + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV4: + nhe->afi = AFI_IP; + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + case NEXTHOP_TYPE_IPV6: + nhe->afi = AFI_IP6; + break; + } + } +} + +struct nhg_hash_entry *zebra_nhg_alloc(void) +{ + struct nhg_hash_entry *nhe; + + nhe = XCALLOC(MTYPE_NHG, sizeof(struct nhg_hash_entry)); + + return nhe; +} + +/* + * Allocate new nhe and make shallow copy of 'orig'; no + * recursive info is copied. + */ +struct nhg_hash_entry *zebra_nhe_copy(const struct nhg_hash_entry *orig, + uint32_t id) +{ + struct nhg_hash_entry *nhe; + + nhe = zebra_nhg_alloc(); + + nhe->id = id; + + nexthop_group_copy(&(nhe->nhg), &(orig->nhg)); + + nhe->vrf_id = orig->vrf_id; + nhe->afi = orig->afi; + nhe->type = orig->type ? orig->type : ZEBRA_ROUTE_NHG; + nhe->refcnt = 0; + nhe->dplane_ref = zebra_router_get_next_sequence(); + + /* Copy backup info also, if present */ + if (orig->backup_info) + nhe->backup_info = nhg_backup_copy(orig->backup_info); + + return nhe; +} + +/* Allocation via hash handler */ +static void *zebra_nhg_hash_alloc(void *arg) +{ + struct nhg_hash_entry *nhe = NULL; + struct nhg_hash_entry *copy = arg; + + nhe = zebra_nhe_copy(copy, copy->id); + + /* Mark duplicate nexthops in a group at creation time. */ + nexthop_group_mark_duplicates(&(nhe->nhg)); + + /* + * Add the ifp now if it's not a group or recursive and has ifindex. + * + * A proto-owned ID is always a group. + */ + if (!PROTO_OWNED(nhe) && nhe->nhg.nexthop && !nhe->nhg.nexthop->next + && !nhe->nhg.nexthop->resolved && nhe->nhg.nexthop->ifindex) { + struct interface *ifp = NULL; + + ifp = if_lookup_by_index(nhe->nhg.nexthop->ifindex, + nhe->nhg.nexthop->vrf_id); + if (ifp) + zebra_nhg_set_if(nhe, ifp); + else { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "Failed to lookup an interface with ifindex=%d in vrf=%u for NHE %pNG", + nhe->nhg.nexthop->ifindex, + nhe->nhg.nexthop->vrf_id, nhe); + } + } + + return nhe; +} + +uint32_t zebra_nhg_hash_key(const void *arg) +{ + const struct nhg_hash_entry *nhe = arg; + uint32_t key = 0x5a351234; + uint32_t primary = 0; + uint32_t backup = 0; + + primary = nexthop_group_hash(&(nhe->nhg)); + if (nhe->backup_info) + backup = nexthop_group_hash(&(nhe->backup_info->nhe->nhg)); + + key = jhash_3words(primary, backup, nhe->type, key); + + key = jhash_2words(nhe->vrf_id, nhe->afi, key); + + return key; +} + +uint32_t zebra_nhg_id_key(const void *arg) +{ + const struct nhg_hash_entry *nhe = arg; + + return nhe->id; +} + +/* Helper with common nhg/nhe nexthop comparison logic */ +static bool nhg_compare_nexthops(const struct nexthop *nh1, + const struct nexthop *nh2) +{ + assert(nh1 != NULL && nh2 != NULL); + + /* + * We have to check the active flag of each individual one, + * not just the overall active_num. This solves the special case + * issue of a route with a nexthop group with one nexthop + * resolving to itself and thus marking it inactive. If we + * have two different routes each wanting to mark a different + * nexthop inactive, they need to hash to two different groups. + * + * If we just hashed on num_active, they would hash the same + * which is incorrect. + * + * ex) + * 1.1.1.0/24 + * -> 1.1.1.1 dummy1 (inactive) + * -> 1.1.2.1 dummy2 + * + * 1.1.2.0/24 + * -> 1.1.1.1 dummy1 + * -> 1.1.2.1 dummy2 (inactive) + * + * Without checking each individual one, they would hash to + * the same group and both have 1.1.1.1 dummy1 marked inactive. + * + */ + if (CHECK_FLAG(nh1->flags, NEXTHOP_FLAG_ACTIVE) + != CHECK_FLAG(nh2->flags, NEXTHOP_FLAG_ACTIVE)) + return false; + + if (!nexthop_same(nh1, nh2)) + return false; + + return true; +} + +bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) +{ + const struct nhg_hash_entry *nhe1 = arg1; + const struct nhg_hash_entry *nhe2 = arg2; + struct nexthop *nexthop1; + struct nexthop *nexthop2; + + /* No matter what if they equal IDs, assume equal */ + if (nhe1->id && nhe2->id && (nhe1->id == nhe2->id)) + return true; + + if (nhe1->type != nhe2->type) + return false; + + if (nhe1->vrf_id != nhe2->vrf_id) + return false; + + if (nhe1->afi != nhe2->afi) + return false; + + /* Nexthops should be in-order, so we simply compare them in-place */ + for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop; + nexthop1 && nexthop2; + nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { + + if (!nhg_compare_nexthops(nexthop1, nexthop2)) + return false; + } + + /* Check for unequal list lengths */ + if (nexthop1 || nexthop2) + return false; + + /* If there's no backup info, comparison is done. */ + if ((nhe1->backup_info == NULL) && (nhe2->backup_info == NULL)) + return true; + + /* Compare backup info also - test the easy things first */ + if (nhe1->backup_info && (nhe2->backup_info == NULL)) + return false; + if (nhe2->backup_info && (nhe1->backup_info == NULL)) + return false; + + /* Compare number of backups before actually comparing any */ + for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop, + nexthop2 = nhe2->backup_info->nhe->nhg.nexthop; + nexthop1 && nexthop2; + nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { + ; + } + + /* Did we find the end of one list before the other? */ + if (nexthop1 || nexthop2) + return false; + + /* Have to compare the backup nexthops */ + for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop, + nexthop2 = nhe2->backup_info->nhe->nhg.nexthop; + nexthop1 && nexthop2; + nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { + + if (!nhg_compare_nexthops(nexthop1, nexthop2)) + return false; + } + + return true; +} + +bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2) +{ + const struct nhg_hash_entry *nhe1 = arg1; + const struct nhg_hash_entry *nhe2 = arg2; + + return nhe1->id == nhe2->id; +} + +static int zebra_nhg_process_grp(struct nexthop_group *nhg, + struct nhg_connected_tree_head *depends, + struct nh_grp *grp, uint8_t count) +{ + nhg_connected_tree_init(depends); + + for (int i = 0; i < count; i++) { + struct nhg_hash_entry *depend = NULL; + /* We do not care about nexthop_grp.weight at + * this time. But we should figure out + * how to adapt this to our code in + * the future. + */ + depend = depends_find_id_add(depends, grp[i].id); + + if (!depend) { + flog_err( + EC_ZEBRA_NHG_SYNC, + "Received Nexthop Group from the kernel with a dependent Nexthop ID (%u) which we do not have in our table", + grp[i].id); + return -1; + } + + /* + * If this is a nexthop with its own group + * dependencies, add them as well. Not sure its + * even possible to have a group within a group + * in the kernel. + */ + + copy_nexthops(&nhg->nexthop, depend->nhg.nexthop, NULL); + } + + return 0; +} + +static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends, + struct nexthop *nh, afi_t afi, int type) +{ + struct nhg_hash_entry *depend = NULL; + struct nexthop_group resolved_ng = {}; + + resolved_ng.nexthop = nh; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: head %p, nh %pNHv", + __func__, nhg_depends, nh); + + depend = zebra_nhg_rib_find(0, &resolved_ng, afi, type); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nh %pNHv => %p (%u)", + __func__, nh, depend, + depend ? depend->id : 0); + + if (depend) + depends_add(nhg_depends, depend); +} + +/* + * Lookup an nhe in the global hash, using data from another nhe. If 'lookup' + * has an id value, that's used. Create a new global/shared nhe if not found. + */ +static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */ + struct nhg_hash_entry *lookup, + struct nhg_connected_tree_head *nhg_depends, + afi_t afi, bool from_dplane) +{ + bool created = false; + bool recursive = false; + struct nhg_hash_entry *newnhe, *backup_nhe; + struct nexthop *nh = NULL; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: id %u, lookup %p, vrf %d, type %d, depends %p%s", + __func__, lookup->id, lookup, lookup->vrf_id, + lookup->type, nhg_depends, + (from_dplane ? " (from dplane)" : "")); + + if (lookup->id) + (*nhe) = zebra_nhg_lookup_id(lookup->id); + else + (*nhe) = hash_lookup(zrouter.nhgs, lookup); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: lookup => %p (%pNG)", __func__, *nhe, *nhe); + + /* If we found an existing object, we're done */ + if (*nhe) + goto done; + + /* We're going to create/insert a new nhe: + * assign the next global id value if necessary. + */ + if (lookup->id == 0) + lookup->id = nhg_get_next_id(); + + if (!from_dplane && lookup->id < ZEBRA_NHG_PROTO_LOWER) { + /* + * This is a zebra hashed/owned NHG. + * + * It goes in HASH and ID table. + */ + newnhe = hash_get(zrouter.nhgs, lookup, zebra_nhg_hash_alloc); + zebra_nhg_insert_id(newnhe); + } else { + /* + * This is upperproto owned NHG or one we read in from dataplane + * and should not be hashed to. + * + * It goes in ID table. + */ + newnhe = + hash_get(zrouter.nhgs_id, lookup, zebra_nhg_hash_alloc); + } + + created = true; + + /* Mail back the new object */ + *nhe = newnhe; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: => created %p (%pNG)", __func__, newnhe, + newnhe); + + /* Only hash/lookup the depends if the first lookup + * fails to find something. This should hopefully save a + * lot of cycles for larger ecmp sizes. + */ + if (nhg_depends) { + /* If you don't want to hash on each nexthop in the + * nexthop group struct you can pass the depends + * directly. Kernel-side we do this since it just looks + * them up via IDs. + */ + zebra_nhg_connect_depends(newnhe, nhg_depends); + goto done; + } + + /* Prepare dependency relationships if this is not a + * singleton nexthop. There are two cases: a single + * recursive nexthop, where we need a relationship to the + * resolving nexthop; or a group of nexthops, where we need + * relationships with the corresponding singletons. + */ + zebra_nhg_depends_init(newnhe); + + nh = newnhe->nhg.nexthop; + + if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE)) + SET_FLAG(newnhe->flags, NEXTHOP_GROUP_VALID); + + if (nh->next == NULL && newnhe->id < ZEBRA_NHG_PROTO_LOWER) { + if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) { + /* Single recursive nexthop */ + handle_recursive_depend(&newnhe->nhg_depends, + nh->resolved, afi, + newnhe->type); + recursive = true; + } + } else { + /* Proto-owned are groups by default */ + /* List of nexthops */ + for (nh = newnhe->nhg.nexthop; nh; nh = nh->next) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: depends NH %pNHv %s", + __func__, nh, + CHECK_FLAG(nh->flags, + NEXTHOP_FLAG_RECURSIVE) ? + "(R)" : ""); + + depends_find_add(&newnhe->nhg_depends, nh, afi, + newnhe->type, from_dplane); + } + } + + if (recursive) + SET_FLAG(newnhe->flags, NEXTHOP_GROUP_RECURSIVE); + + /* Attach dependent backpointers to singletons */ + zebra_nhg_connect_depends(newnhe, &newnhe->nhg_depends); + + /** + * Backup Nexthops + */ + + if (zebra_nhg_get_backup_nhg(newnhe) == NULL || + zebra_nhg_get_backup_nhg(newnhe)->nexthop == NULL) + goto done; + + /* If there are backup nexthops, add them to the backup + * depends tree. The rules here are a little different. + */ + recursive = false; + backup_nhe = newnhe->backup_info->nhe; + + nh = backup_nhe->nhg.nexthop; + + /* Singleton recursive NH */ + if (nh->next == NULL && + CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: backup depend NH %pNHv (R)", + __func__, nh); + + /* Single recursive nexthop */ + handle_recursive_depend(&backup_nhe->nhg_depends, nh->resolved, + afi, backup_nhe->type); + recursive = true; + } else { + /* One or more backup NHs */ + for (; nh; nh = nh->next) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: backup depend NH %pNHv %s", + __func__, nh, + CHECK_FLAG(nh->flags, + NEXTHOP_FLAG_RECURSIVE) ? + "(R)" : ""); + + depends_find_add(&backup_nhe->nhg_depends, nh, afi, + backup_nhe->type, from_dplane); + } + } + + if (recursive) + SET_FLAG(backup_nhe->flags, NEXTHOP_GROUP_RECURSIVE); + +done: + /* Reset time since last update */ + (*nhe)->uptime = monotime(NULL); + + return created; +} + +/* + * Lookup or create an nhe, based on an nhg or an nhe id. + */ +static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id, + struct nexthop_group *nhg, + struct nhg_connected_tree_head *nhg_depends, + vrf_id_t vrf_id, afi_t afi, int type, + bool from_dplane) +{ + struct nhg_hash_entry lookup = {}; + bool created = false; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: id %u, nhg %p, vrf %d, type %d, depends %p", + __func__, id, nhg, vrf_id, type, + nhg_depends); + + /* Use a temporary nhe and call into the superset/common code */ + lookup.id = id; + lookup.type = type ? type : ZEBRA_ROUTE_NHG; + lookup.nhg = *nhg; + + lookup.vrf_id = vrf_id; + if (nhg_depends || lookup.nhg.nexthop->next) { + /* Groups can have all vrfs and AF's in them */ + lookup.afi = AFI_UNSPEC; + } else { + switch (lookup.nhg.nexthop->type) { + case (NEXTHOP_TYPE_IFINDEX): + case (NEXTHOP_TYPE_BLACKHOLE): + /* + * This switch case handles setting the afi different + * for ipv4/v6 routes. Ifindex/blackhole nexthop + * objects cannot be ambiguous, they must be Address + * Family specific. If we get here, we will either use + * the AF of the route, or the one we got passed from + * here from the kernel. + */ + lookup.afi = afi; + break; + case (NEXTHOP_TYPE_IPV4_IFINDEX): + case (NEXTHOP_TYPE_IPV4): + lookup.afi = AFI_IP; + break; + case (NEXTHOP_TYPE_IPV6_IFINDEX): + case (NEXTHOP_TYPE_IPV6): + lookup.afi = AFI_IP6; + break; + } + } + + created = zebra_nhe_find(nhe, &lookup, nhg_depends, afi, from_dplane); + + return created; +} + +/* Find/create a single nexthop */ +static struct nhg_hash_entry *zebra_nhg_find_nexthop(uint32_t id, + struct nexthop *nh, + afi_t afi, int type, + bool from_dplane) +{ + struct nhg_hash_entry *nhe = NULL; + struct nexthop_group nhg = {}; + vrf_id_t vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nh->vrf_id; + + nexthop_group_add_sorted(&nhg, nh); + + zebra_nhg_find(&nhe, id, &nhg, NULL, vrf_id, afi, type, from_dplane); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe); + + return nhe; +} + +static uint32_t nhg_ctx_get_id(const struct nhg_ctx *ctx) +{ + return ctx->id; +} + +static void nhg_ctx_set_status(struct nhg_ctx *ctx, enum nhg_ctx_status status) +{ + ctx->status = status; +} + +static enum nhg_ctx_status nhg_ctx_get_status(const struct nhg_ctx *ctx) +{ + return ctx->status; +} + +static void nhg_ctx_set_op(struct nhg_ctx *ctx, enum nhg_ctx_op_e op) +{ + ctx->op = op; +} + +static enum nhg_ctx_op_e nhg_ctx_get_op(const struct nhg_ctx *ctx) +{ + return ctx->op; +} + +static vrf_id_t nhg_ctx_get_vrf_id(const struct nhg_ctx *ctx) +{ + return ctx->vrf_id; +} + +static int nhg_ctx_get_type(const struct nhg_ctx *ctx) +{ + return ctx->type; +} + +static int nhg_ctx_get_afi(const struct nhg_ctx *ctx) +{ + return ctx->afi; +} + +static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx) +{ + return &ctx->u.nh; +} + +static uint8_t nhg_ctx_get_count(const struct nhg_ctx *ctx) +{ + return ctx->count; +} + +static struct nh_grp *nhg_ctx_get_grp(struct nhg_ctx *ctx) +{ + return ctx->u.grp; +} + +static struct nhg_ctx *nhg_ctx_new(void) +{ + struct nhg_ctx *new; + + new = XCALLOC(MTYPE_NHG_CTX, sizeof(struct nhg_ctx)); + + return new; +} + +void nhg_ctx_free(struct nhg_ctx **ctx) +{ + struct nexthop *nh; + + if (ctx == NULL) + return; + + assert((*ctx) != NULL); + + if (nhg_ctx_get_count(*ctx)) + goto done; + + nh = nhg_ctx_get_nh(*ctx); + + nexthop_del_labels(nh); + nexthop_del_srv6_seg6local(nh); + nexthop_del_srv6_seg6(nh); + +done: + XFREE(MTYPE_NHG_CTX, *ctx); +} + +static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, + struct nh_grp *grp, vrf_id_t vrf_id, + afi_t afi, int type, uint8_t count) +{ + struct nhg_ctx *ctx = NULL; + + ctx = nhg_ctx_new(); + + ctx->id = id; + ctx->vrf_id = vrf_id; + ctx->afi = afi; + ctx->type = type; + ctx->count = count; + + if (count) + /* Copy over the array */ + memcpy(&ctx->u.grp, grp, count * sizeof(struct nh_grp)); + else if (nh) + ctx->u.nh = *nh; + + return ctx; +} + +static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep; + + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + + frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) + zebra_nhg_set_valid(rb_node_dep->nhe); +} + +static void zebra_nhg_set_invalid(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep; + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + + /* If we're in shutdown, this interface event needs to clean + * up installed NHGs, so don't clear that flag directly. + */ + if (!zebra_router_in_shutdown()) + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + + /* Update validity of nexthops depending on it */ + frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) + zebra_nhg_check_valid(rb_node_dep->nhe); +} + +void zebra_nhg_check_valid(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + bool valid = false; + + /* If anthing else in the group is valid, the group is valid */ + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + if (CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_VALID)) { + valid = true; + goto done; + } + } + +done: + if (valid) + zebra_nhg_set_valid(nhe); + else + zebra_nhg_set_invalid(nhe); +} + +static void zebra_nhg_release_all_deps(struct nhg_hash_entry *nhe) +{ + /* Remove it from any lists it may be on */ + zebra_nhg_depends_release(nhe); + zebra_nhg_dependents_release(nhe); + if (nhe->ifp) + if_nhg_dependents_del(nhe->ifp, nhe); +} + +static void zebra_nhg_release(struct nhg_hash_entry *nhe) +{ + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nhe %p (%pNG)", __func__, nhe, nhe); + + zebra_nhg_release_all_deps(nhe); + + /* + * If its not zebra owned, we didn't store it here and have to be + * sure we don't clear one thats actually being used. + */ + if (nhe->id < ZEBRA_NHG_PROTO_LOWER) + hash_release(zrouter.nhgs, nhe); + + hash_release(zrouter.nhgs_id, nhe); +} + +static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe) +{ + zebra_nhg_release(nhe); + zebra_nhg_free(nhe); +} + +static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe) +{ + /* Update validity of groups depending on it */ + struct nhg_connected *rb_node_dep; + + frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) + zebra_nhg_set_valid(rb_node_dep->nhe); +} + +/* + * The kernel/other program has changed the state of a nexthop object we are + * using. + */ +static void zebra_nhg_handle_kernel_state_change(struct nhg_hash_entry *nhe, + bool is_delete) +{ + if (nhe->refcnt) { + flog_err( + EC_ZEBRA_NHG_SYNC, + "Kernel %s a nexthop group with ID (%pNG) that we are still using for a route, sending it back down", + (is_delete ? "deleted" : "updated"), nhe); + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + zebra_nhg_install_kernel(nhe); + } else + zebra_nhg_handle_uninstall(nhe); +} + +static int nhg_ctx_process_new(struct nhg_ctx *ctx) +{ + struct nexthop_group *nhg = NULL; + struct nhg_connected_tree_head nhg_depends = {}; + struct nhg_hash_entry *lookup = NULL; + struct nhg_hash_entry *nhe = NULL; + + uint32_t id = nhg_ctx_get_id(ctx); + uint8_t count = nhg_ctx_get_count(ctx); + vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx); + int type = nhg_ctx_get_type(ctx); + afi_t afi = nhg_ctx_get_afi(ctx); + + lookup = zebra_nhg_lookup_id(id); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: id %u, count %d, lookup => %p", + __func__, id, count, lookup); + + if (lookup) { + /* This is already present in our table, hence an update + * that we did not initate. + */ + zebra_nhg_handle_kernel_state_change(lookup, false); + return 0; + } + + if (nhg_ctx_get_count(ctx)) { + nhg = nexthop_group_new(); + if (zebra_nhg_process_grp(nhg, &nhg_depends, + nhg_ctx_get_grp(ctx), count)) { + depends_decrement_free(&nhg_depends); + nexthop_group_delete(&nhg); + return -ENOENT; + } + + if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, afi, + type, true)) + depends_decrement_free(&nhg_depends); + + /* These got copied over in zebra_nhg_alloc() */ + nexthop_group_delete(&nhg); + } else + nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi, type, + true); + + if (!nhe) { + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Zebra failed to find or create a nexthop hash entry for ID (%u)", + id); + return -1; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nhe %p (%pNG) is new", __func__, nhe, nhe); + + /* + * If daemon nhg from the kernel, add a refcnt here to indicate the + * daemon owns it. + */ + if (PROTO_OWNED(nhe)) + zebra_nhg_increment_ref(nhe); + + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + + return 0; +} + +static int nhg_ctx_process_del(struct nhg_ctx *ctx) +{ + struct nhg_hash_entry *nhe = NULL; + uint32_t id = nhg_ctx_get_id(ctx); + + nhe = zebra_nhg_lookup_id(id); + + if (!nhe) { + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Kernel delete message received for nexthop group ID (%u) that we do not have in our ID table", + id); + return -1; + } + + zebra_nhg_handle_kernel_state_change(nhe, true); + + return 0; +} + +static void nhg_ctx_fini(struct nhg_ctx **ctx) +{ + /* + * Just freeing for now, maybe do something more in the future + * based on flag. + */ + + nhg_ctx_free(ctx); +} + +static int queue_add(struct nhg_ctx *ctx) +{ + /* If its queued or already processed do nothing */ + if (nhg_ctx_get_status(ctx) == NHG_CTX_QUEUED) + return 0; + + if (rib_queue_nhg_ctx_add(ctx)) { + nhg_ctx_set_status(ctx, NHG_CTX_FAILURE); + return -1; + } + + nhg_ctx_set_status(ctx, NHG_CTX_QUEUED); + + return 0; +} + +int nhg_ctx_process(struct nhg_ctx *ctx) +{ + int ret = 0; + + switch (nhg_ctx_get_op(ctx)) { + case NHG_CTX_OP_NEW: + ret = nhg_ctx_process_new(ctx); + if (nhg_ctx_get_count(ctx) && ret == -ENOENT + && nhg_ctx_get_status(ctx) != NHG_CTX_REQUEUED) { + /** + * We have entered a situation where we are + * processing a group from the kernel + * that has a contained nexthop which + * we have not yet processed. + * + * Re-enqueue this ctx to be handled exactly one + * more time (indicated by the flag). + * + * By the time we get back to it, we + * should have processed its depends. + */ + nhg_ctx_set_status(ctx, NHG_CTX_NONE); + if (queue_add(ctx) == 0) { + nhg_ctx_set_status(ctx, NHG_CTX_REQUEUED); + return 0; + } + } + break; + case NHG_CTX_OP_DEL: + ret = nhg_ctx_process_del(ctx); + case NHG_CTX_OP_NONE: + break; + } + + nhg_ctx_set_status(ctx, (ret ? NHG_CTX_FAILURE : NHG_CTX_SUCCESS)); + + nhg_ctx_fini(&ctx); + + return ret; +} + +/* Kernel-side, you either get a single new nexthop or a array of ID's */ +int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp, + uint8_t count, vrf_id_t vrf_id, afi_t afi, int type, + int startup) +{ + struct nhg_ctx *ctx = NULL; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nh %pNHv, id %u, count %d", + __func__, nh, id, (int)count); + + if (id > id_counter && id < ZEBRA_NHG_PROTO_LOWER) + /* Increase our counter so we don't try to create + * an ID that already exists + */ + id_counter = id; + + ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count); + nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW); + + /* Under statup conditions, we need to handle them immediately + * like we do for routes. Otherwise, we are going to get a route + * with a nhe_id that we have not handled. + */ + if (startup) + return nhg_ctx_process(ctx); + + if (queue_add(ctx)) { + nhg_ctx_fini(&ctx); + return -1; + } + + return 0; +} + +/* Kernel-side, received delete message */ +int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id) +{ + struct nhg_ctx *ctx = NULL; + + ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0); + + nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL); + + if (queue_add(ctx)) { + nhg_ctx_fini(&ctx); + return -1; + } + + return 0; +} + +/* Some dependency helper functions */ +static struct nhg_hash_entry *depends_find_recursive(const struct nexthop *nh, + afi_t afi, int type) +{ + struct nhg_hash_entry *nhe; + struct nexthop *lookup = NULL; + + lookup = nexthop_dup(nh, NULL); + + nhe = zebra_nhg_find_nexthop(0, lookup, afi, type, false); + + nexthops_free(lookup); + + return nhe; +} + +static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh, + afi_t afi, int type, + bool from_dplane) +{ + struct nhg_hash_entry *nhe; + struct nexthop lookup = {}; + + /* Capture a snapshot of this single nh; it might be part of a list, + * so we need to make a standalone copy. + */ + nexthop_copy_no_recurse(&lookup, nh, NULL); + + nhe = zebra_nhg_find_nexthop(0, &lookup, afi, type, from_dplane); + + /* The copy may have allocated labels; free them if necessary. */ + nexthop_del_labels(&lookup); + nexthop_del_srv6_seg6local(&lookup); + nexthop_del_srv6_seg6(&lookup); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe); + + return nhe; +} + +static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi, + int type, bool from_dplane) +{ + struct nhg_hash_entry *nhe = NULL; + + if (!nh) + goto done; + + /* We are separating these functions out to increase handling speed + * in the non-recursive case (by not alloc/freeing) + */ + if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) + nhe = depends_find_recursive(nh, afi, type); + else + nhe = depends_find_singleton(nh, afi, type, from_dplane); + + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) { + zlog_debug("%s: nh %pNHv %s => %p (%pNG)", __func__, nh, + CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE) ? "(R)" + : "", + nhe, nhe); + } + +done: + return nhe; +} + +static void depends_add(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *depend) +{ + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: head %p nh %pNHv", + __func__, head, depend->nhg.nexthop); + + /* If NULL is returned, it was successfully added and + * needs to have its refcnt incremented. + * + * Else the NHE is already present in the tree and doesn't + * need to increment the refcnt. + */ + if (nhg_connected_tree_add_nhe(head, depend) == NULL) + zebra_nhg_increment_ref(depend); +} + +static struct nhg_hash_entry * +depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, + afi_t afi, int type, bool from_dplane) +{ + struct nhg_hash_entry *depend = NULL; + + depend = depends_find(nh, afi, type, from_dplane); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nh %pNHv => %p", + __func__, nh, depend); + + if (depend) + depends_add(head, depend); + + return depend; +} + +static struct nhg_hash_entry * +depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id) +{ + struct nhg_hash_entry *depend = NULL; + + depend = zebra_nhg_lookup_id(id); + + if (depend) + depends_add(head, depend); + + return depend; +} + +static void depends_decrement_free(struct nhg_connected_tree_head *head) +{ + nhg_connected_tree_decrement_ref(head); + nhg_connected_tree_free(head); +} + +/* Find an nhe based on a list of nexthops */ +struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id, + struct nexthop_group *nhg, + afi_t rt_afi, int type) +{ + struct nhg_hash_entry *nhe = NULL; + vrf_id_t vrf_id; + + /* + * CLANG SA is complaining that nexthop may be NULL + * Make it happy but this is ridonc + */ + assert(nhg->nexthop); + vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nhg->nexthop->vrf_id; + + zebra_nhg_find(&nhe, id, nhg, NULL, vrf_id, rt_afi, type, false); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe); + + return nhe; +} + +/* Find an nhe based on a route's nhe */ +struct nhg_hash_entry * +zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi) +{ + struct nhg_hash_entry *nhe = NULL; + + if (!(rt_nhe && rt_nhe->nhg.nexthop)) { + flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED, + "No nexthop passed to %s", __func__); + return NULL; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: rt_nhe %p (%pNG)", __func__, rt_nhe, rt_nhe); + + zebra_nhe_find(&nhe, rt_nhe, NULL, rt_afi, false); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe); + + return nhe; +} + +/* + * Allocate backup nexthop info object. Typically these are embedded in + * nhg_hash_entry objects. + */ +struct nhg_backup_info *zebra_nhg_backup_alloc(void) +{ + struct nhg_backup_info *p; + + p = XCALLOC(MTYPE_NHG, sizeof(struct nhg_backup_info)); + + p->nhe = zebra_nhg_alloc(); + + /* Identify the embedded group used to hold the list of backups */ + SET_FLAG(p->nhe->flags, NEXTHOP_GROUP_BACKUP); + + return p; +} + +/* + * Free backup nexthop info object, deal with any embedded allocations + */ +void zebra_nhg_backup_free(struct nhg_backup_info **p) +{ + if (p && *p) { + if ((*p)->nhe) + zebra_nhg_free((*p)->nhe); + + XFREE(MTYPE_NHG, (*p)); + } +} + +/* Accessor for backup nexthop group */ +struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe) +{ + struct nexthop_group *p = NULL; + + if (nhe) { + if (nhe->backup_info && nhe->backup_info->nhe) + p = &(nhe->backup_info->nhe->nhg); + } + + return p; +} + +/* + * Helper to return a copy of a backup_info - note that this is a shallow + * copy, meant to be used when creating a new nhe from info passed in with + * a route e.g. + */ +static struct nhg_backup_info * +nhg_backup_copy(const struct nhg_backup_info *orig) +{ + struct nhg_backup_info *b; + + b = zebra_nhg_backup_alloc(); + + /* Copy list of nexthops */ + nexthop_group_copy(&(b->nhe->nhg), &(orig->nhe->nhg)); + + return b; +} + +static void zebra_nhg_free_members(struct nhg_hash_entry *nhe) +{ + nexthops_free(nhe->nhg.nexthop); + + zebra_nhg_backup_free(&nhe->backup_info); + + /* Decrement to remove connection ref */ + nhg_connected_tree_decrement_ref(&nhe->nhg_depends); + nhg_connected_tree_free(&nhe->nhg_depends); + nhg_connected_tree_free(&nhe->nhg_dependents); +} + +void zebra_nhg_free(struct nhg_hash_entry *nhe) +{ + if (IS_ZEBRA_DEBUG_NHG_DETAIL) { + /* Group or singleton? */ + if (nhe->nhg.nexthop && nhe->nhg.nexthop->next) + zlog_debug("%s: nhe %p (%pNG), refcnt %d", __func__, + nhe, nhe, nhe->refcnt); + else + zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv", + __func__, nhe, nhe, nhe->refcnt, + nhe->nhg.nexthop); + } + + THREAD_OFF(nhe->timer); + + zebra_nhg_free_members(nhe); + + XFREE(MTYPE_NHG, nhe); +} + +/* + * Let's just drop the memory associated with each item + */ +void zebra_nhg_hash_free(void *p) +{ + struct nhg_hash_entry *nhe = p; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) { + /* Group or singleton? */ + if (nhe->nhg.nexthop && nhe->nhg.nexthop->next) + zlog_debug("%s: nhe %p (%u), refcnt %d", __func__, nhe, + nhe->id, nhe->refcnt); + else + zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv", + __func__, nhe, nhe, nhe->refcnt, + nhe->nhg.nexthop); + } + + THREAD_OFF(nhe->timer); + + nexthops_free(nhe->nhg.nexthop); + + XFREE(MTYPE_NHG, nhe); +} + +/* + * On cleanup there are nexthop groups that have not + * been resolved at all( a nhe->id of 0 ). As such + * zebra needs to clean up the memory associated with + * those entries. + */ +void zebra_nhg_hash_free_zero_id(struct hash_bucket *b, void *arg) +{ + struct nhg_hash_entry *nhe = b->data; + struct nhg_connected *dep; + + while ((dep = nhg_connected_tree_pop(&nhe->nhg_depends))) { + if (dep->nhe->id == 0) + zebra_nhg_hash_free(dep->nhe); + + nhg_connected_free(dep); + } + + while ((dep = nhg_connected_tree_pop(&nhe->nhg_dependents))) + nhg_connected_free(dep); + + if (nhe->backup_info && nhe->backup_info->nhe->id == 0) { + while ((dep = nhg_connected_tree_pop( + &nhe->backup_info->nhe->nhg_depends))) + nhg_connected_free(dep); + + zebra_nhg_hash_free(nhe->backup_info->nhe); + + XFREE(MTYPE_NHG, nhe->backup_info); + } +} + +static void zebra_nhg_timer(struct thread *thread) +{ + struct nhg_hash_entry *nhe = THREAD_ARG(thread); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("Nexthop Timer for nhe: %pNG", nhe); + + if (nhe->refcnt == 1) + zebra_nhg_decrement_ref(nhe); +} + +void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe) +{ + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe, + nhe->refcnt, nhe->refcnt - 1); + + nhe->refcnt--; + + if (!zebra_router_in_shutdown() && nhe->refcnt <= 0 && + CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) && + !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND)) { + nhe->refcnt = 1; + SET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND); + thread_add_timer(zrouter.master, zebra_nhg_timer, nhe, + zrouter.nhg_keep, &nhe->timer); + return; + } + + if (!zebra_nhg_depends_is_empty(nhe)) + nhg_connected_tree_decrement_ref(&nhe->nhg_depends); + + if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) + zebra_nhg_uninstall_kernel(nhe); +} + +void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe) +{ + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe, + nhe->refcnt, nhe->refcnt + 1); + + nhe->refcnt++; + + if (thread_is_scheduled(nhe->timer)) { + THREAD_OFF(nhe->timer); + nhe->refcnt--; + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND); + } + + if (!zebra_nhg_depends_is_empty(nhe)) + nhg_connected_tree_increment_ref(&nhe->nhg_depends); +} + +static struct nexthop *nexthop_set_resolved(afi_t afi, + const struct nexthop *newhop, + struct nexthop *nexthop, + struct zebra_sr_policy *policy) +{ + struct nexthop *resolved_hop; + uint8_t num_labels = 0; + mpls_label_t labels[MPLS_MAX_LABELS]; + enum lsp_types_t label_type = ZEBRA_LSP_NONE; + int i = 0; + + resolved_hop = nexthop_new(); + SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE); + + resolved_hop->vrf_id = nexthop->vrf_id; + switch (newhop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + /* If the resolving route specifies a gateway, use it */ + resolved_hop->type = newhop->type; + resolved_hop->gate.ipv4 = newhop->gate.ipv4; + + if (newhop->ifindex) { + resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX; + resolved_hop->ifindex = newhop->ifindex; + } + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + resolved_hop->type = newhop->type; + resolved_hop->gate.ipv6 = newhop->gate.ipv6; + + if (newhop->ifindex) { + resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX; + resolved_hop->ifindex = newhop->ifindex; + } + break; + case NEXTHOP_TYPE_IFINDEX: + /* If the resolving route is an interface route, + * it means the gateway we are looking up is connected + * to that interface. (The actual network is _not_ onlink). + * Therefore, the resolved route should have the original + * gateway as nexthop as it is directly connected. + * + * On Linux, we have to set the onlink netlink flag because + * otherwise, the kernel won't accept the route. + */ + resolved_hop->flags |= NEXTHOP_FLAG_ONLINK; + if (afi == AFI_IP) { + resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX; + resolved_hop->gate.ipv4 = nexthop->gate.ipv4; + } else if (afi == AFI_IP6) { + resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX; + resolved_hop->gate.ipv6 = nexthop->gate.ipv6; + } + resolved_hop->ifindex = newhop->ifindex; + break; + case NEXTHOP_TYPE_BLACKHOLE: + resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE; + resolved_hop->bh_type = newhop->bh_type; + break; + } + + if (newhop->flags & NEXTHOP_FLAG_ONLINK) + resolved_hop->flags |= NEXTHOP_FLAG_ONLINK; + + /* Copy labels of the resolved route and the parent resolving to it */ + if (policy) { + int i = 0; + + /* + * Don't push the first SID if the corresponding action in the + * LFIB is POP. + */ + if (!newhop->nh_label || !newhop->nh_label->num_labels + || newhop->nh_label->label[0] == MPLS_LABEL_IMPLICIT_NULL) + i = 1; + + for (; i < policy->segment_list.label_num; i++) + labels[num_labels++] = policy->segment_list.labels[i]; + label_type = policy->segment_list.type; + } else if (newhop->nh_label) { + for (i = 0; i < newhop->nh_label->num_labels; i++) { + /* Be a bit picky about overrunning the local array */ + if (num_labels >= MPLS_MAX_LABELS) { + if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: too many labels in newhop %pNHv", + __func__, newhop); + break; + } + labels[num_labels++] = newhop->nh_label->label[i]; + } + /* Use the "outer" type */ + label_type = newhop->nh_label_type; + } + + if (nexthop->nh_label) { + for (i = 0; i < nexthop->nh_label->num_labels; i++) { + /* Be a bit picky about overrunning the local array */ + if (num_labels >= MPLS_MAX_LABELS) { + if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: too many labels in nexthop %pNHv", + __func__, nexthop); + break; + } + labels[num_labels++] = nexthop->nh_label->label[i]; + } + + /* If the parent has labels, use its type if + * we don't already have one. + */ + if (label_type == ZEBRA_LSP_NONE) + label_type = nexthop->nh_label_type; + } + + if (num_labels) + nexthop_add_labels(resolved_hop, label_type, num_labels, + labels); + + if (nexthop->nh_srv6) { + nexthop_add_srv6_seg6local(resolved_hop, + nexthop->nh_srv6->seg6local_action, + &nexthop->nh_srv6->seg6local_ctx); + nexthop_add_srv6_seg6(resolved_hop, + &nexthop->nh_srv6->seg6_segs); + } + + resolved_hop->rparent = nexthop; + _nexthop_add(&nexthop->resolved, resolved_hop); + + return resolved_hop; +} + +/* Checks if nexthop we are trying to resolve to is valid */ +static bool nexthop_valid_resolve(const struct nexthop *nexthop, + const struct nexthop *resolved) +{ + /* Can't resolve to a recursive nexthop */ + if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_RECURSIVE)) + return false; + + /* Must be ACTIVE */ + if (!CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_ACTIVE)) + return false; + + /* Must not be duplicate */ + if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_DUPLICATE)) + return false; + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV6_IFINDEX: + /* If the nexthop we are resolving to does not match the + * ifindex for the nexthop the route wanted, its not valid. + */ + if (nexthop->ifindex != resolved->ifindex) + return false; + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IFINDEX: + case NEXTHOP_TYPE_BLACKHOLE: + break; + } + + return true; +} + +/* + * When resolving a recursive nexthop, capture backup nexthop(s) also + * so they can be conveyed through the dataplane to the FIB. We'll look + * at the backups in the resolving nh 'nexthop' and its nhe, and copy them + * into the route's resolved nh 'resolved' and its nhe 'nhe'. + */ +static int resolve_backup_nexthops(const struct nexthop *nexthop, + const struct nhg_hash_entry *nhe, + struct nexthop *resolved, + struct nhg_hash_entry *resolve_nhe, + struct backup_nh_map_s *map) +{ + int i, j, idx; + const struct nexthop *bnh; + struct nexthop *nh, *newnh; + mpls_label_t labels[MPLS_MAX_LABELS]; + uint8_t num_labels; + + assert(nexthop->backup_num <= NEXTHOP_MAX_BACKUPS); + + /* Locate backups from the original nexthop's backup index and nhe */ + for (i = 0; i < nexthop->backup_num; i++) { + idx = nexthop->backup_idx[i]; + + /* Do we already know about this particular backup? */ + for (j = 0; j < map->map_count; j++) { + if (map->map[j].orig_idx == idx) + break; + } + + if (j < map->map_count) { + resolved->backup_idx[resolved->backup_num] = + map->map[j].new_idx; + resolved->backup_num++; + + SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: found map idx orig %d, new %d", + __func__, map->map[j].orig_idx, + map->map[j].new_idx); + + continue; + } + + /* We can't handle any new map entries at this point. */ + if (map->map_count == MULTIPATH_NUM) + break; + + /* Need to create/copy a new backup */ + bnh = nhe->backup_info->nhe->nhg.nexthop; + for (j = 0; j < idx; j++) { + if (bnh == NULL) + break; + bnh = bnh->next; + } + + /* Whoops - bad index in the nexthop? */ + if (bnh == NULL) + continue; + + if (resolve_nhe->backup_info == NULL) + resolve_nhe->backup_info = zebra_nhg_backup_alloc(); + + /* Update backup info in the resolving nexthop and its nhe */ + newnh = nexthop_dup_no_recurse(bnh, NULL); + + /* We may need some special handling for mpls labels: the new + * backup needs to carry the recursive nexthop's labels, + * if any: they may be vrf labels e.g. + * The original/inner labels are in the stack of 'resolve_nhe', + * if that is longer than the stack in 'nexthop'. + */ + if (newnh->nh_label && resolved->nh_label && + nexthop->nh_label) { + if (resolved->nh_label->num_labels > + nexthop->nh_label->num_labels) { + /* Prepare new label stack */ + num_labels = 0; + for (j = 0; j < newnh->nh_label->num_labels; + j++) { + labels[j] = newnh->nh_label->label[j]; + num_labels++; + } + + /* Include inner labels */ + for (j = nexthop->nh_label->num_labels; + j < resolved->nh_label->num_labels; + j++) { + labels[num_labels] = + resolved->nh_label->label[j]; + num_labels++; + } + + /* Replace existing label stack in the backup */ + nexthop_del_labels(newnh); + nexthop_add_labels(newnh, bnh->nh_label_type, + num_labels, labels); + } + } + + /* Need to compute the new backup index in the new + * backup list, and add to map struct. + */ + j = 0; + nh = resolve_nhe->backup_info->nhe->nhg.nexthop; + if (nh) { + while (nh->next) { + nh = nh->next; + j++; + } + + nh->next = newnh; + j++; + + } else /* First one */ + resolve_nhe->backup_info->nhe->nhg.nexthop = newnh; + + /* Capture index */ + resolved->backup_idx[resolved->backup_num] = j; + resolved->backup_num++; + + SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: added idx orig %d, new %d", + __func__, idx, j); + + /* Update map/cache */ + map->map[map->map_count].orig_idx = idx; + map->map[map->map_count].new_idx = j; + map->map_count++; + } + + return 0; +} + +/* + * So this nexthop resolution has decided that a connected route + * is the correct choice. At this point in time if FRR has multiple + * connected routes that all point to the same prefix one will be + * selected, *but* the particular interface may not be the one + * that the nexthop points at. Let's look at all the available + * connected routes on this node and if any of them auto match + * the routes nexthops ifindex that is good enough for a match + * + * This code is depending on the fact that a nexthop->ifindex is 0 + * if it is not known, if this assumption changes, yummy! + * Additionally a ifindx of 0 means figure it out for us. + */ +static struct route_entry * +zebra_nhg_connected_ifindex(struct route_node *rn, struct route_entry *match, + int32_t curr_ifindex) +{ + struct nexthop *newhop = match->nhe->nhg.nexthop; + struct route_entry *re; + + assert(newhop); /* What a kick in the patooey */ + + if (curr_ifindex == 0) + return match; + + if (curr_ifindex == newhop->ifindex) + return match; + + /* + * At this point we know that this route is matching a connected + * but there are possibly a bunch of connected routes that are + * alive that should be considered as well. So let's iterate over + * all the re's and see if they are connected as well and maybe one + * of those ifindexes match as well. + */ + RNODE_FOREACH_RE (rn, re) { + if (re->type != ZEBRA_ROUTE_CONNECT) + continue; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + + /* + * zebra has a connected route that is not removed + * let's test if it is good + */ + newhop = re->nhe->nhg.nexthop; + assert(newhop); + if (curr_ifindex == newhop->ifindex) + return re; + } + + return match; +} + +/* + * Given a nexthop we need to properly recursively resolve, + * do a table lookup to find and match if at all possible. + * Set the nexthop->ifindex and resolution info as appropriate. + */ +static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, + const struct prefix *top, int type, uint32_t flags, + uint32_t *pmtu, vrf_id_t vrf_id) +{ + struct prefix p; + struct route_table *table; + struct route_node *rn; + struct route_entry *match = NULL; + int resolved; + struct zebra_nhlfe *nhlfe; + struct nexthop *newhop; + struct interface *ifp; + rib_dest_t *dest; + struct zebra_vrf *zvrf; + struct in_addr local_ipv4; + struct in_addr *ipv4; + afi_t afi = AFI_IP; + + /* Reset some nexthop attributes that we'll recompute if necessary */ + if ((nexthop->type == NEXTHOP_TYPE_IPV4) + || (nexthop->type == NEXTHOP_TYPE_IPV6)) + nexthop->ifindex = 0; + + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); + nexthops_free(nexthop->resolved); + nexthop->resolved = NULL; + + /* + * Set afi based on nexthop type. + * Some nexthop types get special handling, possibly skipping + * the normal processing. + */ + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + + ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); + /* + * If the interface exists and its operative or its a kernel + * route and interface is up, its active. We trust kernel routes + * to be good. + */ + if (ifp && (if_is_operative(ifp))) + return 1; + else + return 0; + break; + + case NEXTHOP_TYPE_IPV6_IFINDEX: + afi = AFI_IP6; + + if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) { + ifp = if_lookup_by_index(nexthop->ifindex, + nexthop->vrf_id); + if (ifp && if_is_operative(ifp)) + return 1; + else + return 0; + } + break; + + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + afi = AFI_IP; + break; + case NEXTHOP_TYPE_IPV6: + afi = AFI_IP6; + break; + + case NEXTHOP_TYPE_BLACKHOLE: + return 1; + } + + /* + * If the nexthop has been marked as 'onlink' we just need to make + * sure the nexthop's interface is known and is operational. + */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) { + ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); + if (!ifp) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("nexthop %pNHv marked onlink but nhif %u doesn't exist", + nexthop, nexthop->ifindex); + return 0; + } + if (!if_is_operative(ifp)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("nexthop %pNHv marked onlink but nhif %s is not operational", + nexthop, ifp->name); + return 0; + } + return 1; + } + + if (top && + ((top->family == AF_INET && top->prefixlen == IPV4_MAX_BITLEN && + nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) || + (top->family == AF_INET6 && top->prefixlen == IPV6_MAX_BITLEN && + memcmp(&nexthop->gate.ipv6, &top->u.prefix6, IPV6_MAX_BYTELEN) == + 0)) && + nexthop->vrf_id == vrf_id) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + " :%s: Attempting to install a max prefixlength route through itself", + __func__); + return 0; + } + + /* Validation for ipv4 mapped ipv6 nexthop. */ + if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) { + afi = AFI_IP; + ipv4 = &local_ipv4; + ipv4_mapped_ipv6_to_ipv4(&nexthop->gate.ipv6, ipv4); + } else { + ipv4 = &nexthop->gate.ipv4; + } + + /* Processing for nexthops with SR 'color' attribute, using + * the corresponding SR policy object. + */ + if (nexthop->srte_color) { + struct ipaddr endpoint = {0}; + struct zebra_sr_policy *policy; + + switch (afi) { + case AFI_IP: + endpoint.ipa_type = IPADDR_V4; + endpoint.ipaddr_v4 = *ipv4; + break; + case AFI_IP6: + endpoint.ipa_type = IPADDR_V6; + endpoint.ipaddr_v6 = nexthop->gate.ipv6; + break; + default: + flog_err(EC_LIB_DEVELOPMENT, + "%s: unknown address-family: %u", __func__, + afi); + exit(1); + } + + policy = zebra_sr_policy_find(nexthop->srte_color, &endpoint); + if (policy && policy->status == ZEBRA_SR_POLICY_UP) { + resolved = 0; + frr_each_safe (nhlfe_list, &policy->lsp->nhlfe_list, + nhlfe) { + if (!CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_SELECTED) + || CHECK_FLAG(nhlfe->flags, + NHLFE_FLAG_DELETED)) + continue; + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE); + nexthop_set_resolved(afi, nhlfe->nexthop, + nexthop, policy); + resolved = 1; + } + if (resolved) + return 1; + } + } + + /* Make lookup prefix. */ + memset(&p, 0, sizeof(struct prefix)); + switch (afi) { + case AFI_IP: + p.family = AF_INET; + p.prefixlen = IPV4_MAX_BITLEN; + p.u.prefix4 = *ipv4; + break; + case AFI_IP6: + p.family = AF_INET6; + p.prefixlen = IPV6_MAX_BITLEN; + p.u.prefix6 = nexthop->gate.ipv6; + break; + default: + assert(afi != AFI_IP && afi != AFI_IP6); + break; + } + /* Lookup table. */ + table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id); + /* get zvrf */ + zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id); + if (!table || !zvrf) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug(" %s: Table not found", __func__); + return 0; + } + + rn = route_node_match(table, (struct prefix *)&p); + while (rn) { + route_unlock_node(rn); + + /* Lookup should halt if we've matched against ourselves ('top', + * if specified) - i.e., we cannot have a nexthop NH1 is + * resolved by a route NH1. The exception is if the route is a + * host route. + */ + if (prefix_same(&rn->p, top)) + if (((afi == AFI_IP) + && (rn->p.prefixlen != IPV4_MAX_BITLEN)) + || ((afi == AFI_IP6) + && (rn->p.prefixlen != IPV6_MAX_BITLEN))) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + " %s: Matched against ourself and prefix length is not max bit length", + __func__); + return 0; + } + + /* Pick up selected route. */ + /* However, do not resolve over default route unless explicitly + * allowed. + */ + if (is_default_prefix(&rn->p) + && !rnh_resolve_via_default(zvrf, p.family)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + " :%s: Resolved against default route", + __func__); + return 0; + } + + dest = rib_dest_from_rnode(rn); + if (dest && dest->selected_fib + && !CHECK_FLAG(dest->selected_fib->status, + ROUTE_ENTRY_REMOVED) + && dest->selected_fib->type != ZEBRA_ROUTE_TABLE) + match = dest->selected_fib; + + /* If there is no selected route or matched route is EGP, go up + * tree. + */ + if (!match) { + do { + rn = rn->parent; + } while (rn && rn->info == NULL); + if (rn) + route_lock_node(rn); + + continue; + } + + if ((match->type == ZEBRA_ROUTE_CONNECT) || + (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type))) { + match = zebra_nhg_connected_ifindex(rn, match, + nexthop->ifindex); + + newhop = match->nhe->nhg.nexthop; + if (nexthop->type == NEXTHOP_TYPE_IPV4 || + nexthop->type == NEXTHOP_TYPE_IPV6) + nexthop->ifindex = newhop->ifindex; + else if (nexthop->ifindex != newhop->ifindex) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv", + __func__, nexthop, newhop); + /* + * NEXTHOP_TYPE_*_IFINDEX but ifindex + * doesn't match what we found. + */ + return 0; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: CONNECT match %p (%pNG), newhop %pNHv", + __func__, match, match->nhe, newhop); + + return 1; + } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) { + struct nexthop_group *nhg; + struct nexthop *resolver; + struct backup_nh_map_s map = {}; + + resolved = 0; + + /* Only useful if installed */ + if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "%s: match %p (%pNG) not installed", + __func__, match, match->nhe); + + goto done_with_match; + } + + /* Examine installed nexthops; note that there + * may not be any installed primary nexthops if + * only backups are installed. + */ + nhg = rib_get_fib_nhg(match); + for (ALL_NEXTHOPS_PTR(nhg, newhop)) { + if (!nexthop_valid_resolve(nexthop, newhop)) + continue; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: RECURSIVE match %p (%pNG), newhop %pNHv", + __func__, match, match->nhe, + newhop); + + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE); + resolver = nexthop_set_resolved(afi, newhop, + nexthop, NULL); + resolved = 1; + + /* If there are backup nexthops, capture + * that info with the resolving nexthop. + */ + if (resolver && newhop->backup_num > 0) { + resolve_backup_nexthops(newhop, + match->nhe, + resolver, nhe, + &map); + } + } + + /* Examine installed backup nexthops, if any. There + * are only installed backups *if* there is a + * dedicated fib list. The UI can also control use + * of backups for resolution. + */ + nhg = rib_get_fib_backup_nhg(match); + if (!use_recursive_backups || + nhg == NULL || nhg->nexthop == NULL) + goto done_with_match; + + for (ALL_NEXTHOPS_PTR(nhg, newhop)) { + if (!nexthop_valid_resolve(nexthop, newhop)) + continue; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: RECURSIVE match backup %p (%pNG), newhop %pNHv", + __func__, match, match->nhe, + newhop); + + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE); + nexthop_set_resolved(afi, newhop, nexthop, + NULL); + resolved = 1; + } + +done_with_match: + /* Capture resolving mtu */ + if (resolved) { + if (pmtu) + *pmtu = match->mtu; + + } else if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + " %s: Recursion failed to find", + __func__); + + return resolved; + } else { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + zlog_debug( + " %s: Route Type %s has not turned on recursion", + __func__, zebra_route_string(type)); + if (type == ZEBRA_ROUTE_BGP + && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP)) + zlog_debug( + " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\""); + } + return 0; + } + } + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug(" %s: Nexthop did not lookup in table", + __func__); + return 0; +} + +/* This function verifies reachability of one given nexthop, which can be + * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored + * in nexthop->flags field. The nexthop->ifindex will be updated + * appropriately as well. + * + * An existing route map can turn an otherwise active nexthop into inactive, + * but not vice versa. + * + * The return value is the final value of 'ACTIVE' flag. + */ +static unsigned nexthop_active_check(struct route_node *rn, + struct route_entry *re, + struct nexthop *nexthop, + struct nhg_hash_entry *nhe) +{ + route_map_result_t ret = RMAP_PERMITMATCH; + afi_t family; + const struct prefix *p, *src_p; + struct zebra_vrf *zvrf; + uint32_t mtu = 0; + vrf_id_t vrf_id; + + srcdest_rnode_prefixes(rn, &p, &src_p); + + if (rn->p.family == AF_INET) + family = AFI_IP; + else if (rn->p.family == AF_INET6) + family = AFI_IP6; + else + family = AF_UNSPEC; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: re %p, nexthop %pNHv", __func__, re, nexthop); + + /* + * If this is a kernel route, then if the interface is *up* then + * by golly gee whiz it's a good route. + */ + if (re->type == ZEBRA_ROUTE_KERNEL || re->type == ZEBRA_ROUTE_SYSTEM) { + struct interface *ifp; + + ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); + + if (ifp && (if_is_operative(ifp) || if_is_up(ifp))) { + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + goto skip_check; + } + } + + vrf_id = zvrf_id(rib_dest_vrf(rib_dest_from_rnode(rn))); + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags, + &mtu, vrf_id)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + family = AFI_IP; + if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags, + &mtu, vrf_id)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + case NEXTHOP_TYPE_IPV6: + family = AFI_IP6; + if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags, + &mtu, vrf_id)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + /* RFC 5549, v4 prefix with v6 NH */ + if (rn->p.family != AF_INET) + family = AFI_IP6; + + if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags, + &mtu, vrf_id)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + case NEXTHOP_TYPE_BLACKHOLE: + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + break; + default: + break; + } + +skip_check: + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug(" %s: Unable to find active nexthop", + __func__); + return 0; + } + + /* Capture recursive nexthop mtu. + * TODO -- the code used to just reset the re's value to zero + * for each nexthop, and then jam any resolving route's mtu value in, + * whether or not that was zero, or lt/gt any existing value? The + * way this is used appears to be as a floor value, so let's try + * using it that way here. + */ + if (mtu > 0) { + if (re->nexthop_mtu == 0 || re->nexthop_mtu > mtu) + re->nexthop_mtu = mtu; + } + + /* XXX: What exactly do those checks do? Do we support + * e.g. IPv4 routes with IPv6 nexthops or vice versa? + */ + if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET) + || (family == AFI_IP6 && p->family != AF_INET6)) + return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + + /* The original code didn't determine the family correctly + * e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi + * from the rib_table_info in those cases. + * Possibly it may be better to use only the rib_table_info + * in every case. + */ + if (family == 0) { + struct rib_table_info *info; + + info = srcdest_rnode_table_info(rn); + family = info->afi; + } + + memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr)); + + zvrf = zebra_vrf_lookup_by_id(re->vrf_id); + if (!zvrf) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug(" %s: zvrf is NULL", __func__); + return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + } + + /* It'll get set if required inside */ + ret = zebra_route_map_check(family, re->type, re->instance, p, nexthop, + zvrf, re->tag); + if (ret == RMAP_DENYMATCH) { + if (IS_ZEBRA_DEBUG_RIB) { + zlog_debug( + "%u:%pRN: Filtering out with NH %pNHv due to route map", + re->vrf_id, rn, nexthop); + } + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + } + return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); +} + +/* Helper function called after resolution to walk nhg rb trees + * and toggle the NEXTHOP_GROUP_VALID flag if the nexthop + * is active on singleton NHEs. + */ +static bool zebra_nhg_set_valid_if_active(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + bool valid = false; + + if (!zebra_nhg_depends_is_empty(nhe)) { + /* Is at least one depend valid? */ + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + if (zebra_nhg_set_valid_if_active(rb_node_dep->nhe)) + valid = true; + } + + goto done; + } + + /* should be fully resolved singleton at this point */ + if (CHECK_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + valid = true; + +done: + if (valid) + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + + return valid; +} + +/* + * Process a list of nexthops, given an nhe, determining + * whether each one is ACTIVE/installable at this time. + */ +static uint32_t nexthop_list_active_update(struct route_node *rn, + struct route_entry *re, + struct nhg_hash_entry *nhe, + bool is_backup) +{ + union g_addr prev_src; + unsigned int prev_active, new_active; + ifindex_t prev_index; + uint32_t counter = 0; + struct nexthop *nexthop; + struct nexthop_group *nhg = &nhe->nhg; + + nexthop = nhg->nexthop; + + /* Init recursive nh mtu */ + re->nexthop_mtu = 0; + + /* Process nexthops one-by-one */ + for ( ; nexthop; nexthop = nexthop->next) { + + /* No protocol daemon provides src and so we're skipping + * tracking it + */ + prev_src = nexthop->rmap_src; + prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + prev_index = nexthop->ifindex; + + /* Include the containing nhe for primary nexthops: if there's + * recursive resolution, we capture the backup info also. + */ + new_active = + nexthop_active_check(rn, re, nexthop, + (is_backup ? NULL : nhe)); + + /* + * We need to respect the multipath_num here + * as that what we should be able to install from + * a multipath perspective should not be a data plane + * decision point. + */ + if (new_active && counter >= zrouter.multipath_num) { + struct nexthop *nh; + + /* Set it and its resolved nexthop as inactive. */ + for (nh = nexthop; nh; nh = nh->resolved) + UNSET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE); + + new_active = 0; + } + + if (new_active) + counter++; + + /* Check for changes to the nexthop - set ROUTE_ENTRY_CHANGED */ + if (prev_active != new_active || prev_index != nexthop->ifindex + || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX + && nexthop->type < NEXTHOP_TYPE_IPV6) + && prev_src.ipv4.s_addr + != nexthop->rmap_src.ipv4.s_addr) + || ((nexthop->type >= NEXTHOP_TYPE_IPV6 + && nexthop->type < NEXTHOP_TYPE_BLACKHOLE) + && !(IPV6_ADDR_SAME(&prev_src.ipv6, + &nexthop->rmap_src.ipv6))) + || CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)) + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + } + + return counter; +} + + +static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg) +{ + struct nexthop *nh; + uint32_t curr_active = 0; + + /* Assume all active for now */ + + for (nh = nhg->nexthop; nh; nh = nh->next) { + SET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE); + curr_active++; + } + + return curr_active; +} + +/* + * Iterate over all nexthops of the given RIB entry and refresh their + * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag, + * the whole re structure is flagged with ROUTE_ENTRY_CHANGED. + * + * Return value is the new number of active nexthops. + */ +int nexthop_active_update(struct route_node *rn, struct route_entry *re) +{ + struct nhg_hash_entry *curr_nhe; + uint32_t curr_active = 0, backup_active = 0; + + if (PROTO_OWNED(re->nhe)) + return proto_nhg_nexthop_active_update(&re->nhe->nhg); + + afi_t rt_afi = family2afi(rn->p.family); + + UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + + /* Make a local copy of the existing nhe, so we don't work on/modify + * the shared nhe. + */ + curr_nhe = zebra_nhe_copy(re->nhe, re->nhe->id); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: re %p nhe %p (%pNG), curr_nhe %p", __func__, re, + re->nhe, re->nhe, curr_nhe); + + /* Clear the existing id, if any: this will avoid any confusion + * if the id exists, and will also force the creation + * of a new nhe reflecting the changes we may make in this local copy. + */ + curr_nhe->id = 0; + + /* Process nexthops */ + curr_active = nexthop_list_active_update(rn, re, curr_nhe, false); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: re %p curr_active %u", __func__, re, + curr_active); + + /* If there are no backup nexthops, we are done */ + if (zebra_nhg_get_backup_nhg(curr_nhe) == NULL) + goto backups_done; + + backup_active = nexthop_list_active_update( + rn, re, curr_nhe->backup_info->nhe, true /*is_backup*/); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: re %p backup_active %u", __func__, re, + backup_active); + +backups_done: + + /* + * Ref or create an nhe that matches the current state of the + * nexthop(s). + */ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) { + struct nhg_hash_entry *new_nhe = NULL; + + new_nhe = zebra_nhg_rib_find_nhe(curr_nhe, rt_afi); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: re %p CHANGED: nhe %p (%pNG) => new_nhe %p (%pNG)", + __func__, re, re->nhe, re->nhe, new_nhe, + new_nhe); + + route_entry_update_nhe(re, new_nhe); + } + + + /* Walk the NHE depends tree and toggle NEXTHOP_GROUP_VALID + * flag where appropriate. + */ + if (curr_active) + zebra_nhg_set_valid_if_active(re->nhe); + + /* + * Do not need the old / copied nhe anymore since it + * was either copied over into a new nhe or not + * used at all. + */ + zebra_nhg_free(curr_nhe); + return curr_active; +} + +/* Recursively construct a grp array of fully resolved IDs. + * + * This function allows us to account for groups within groups, + * by converting them into a flat array of IDs. + * + * nh_grp is modified at every level of recursion to append + * to it the next unique, fully resolved ID from the entire tree. + * + * + * Note: + * I'm pretty sure we only allow ONE level of group within group currently. + * But making this recursive just in case that ever changes. + */ +static uint8_t zebra_nhg_nhe2grp_internal(struct nh_grp *grp, + uint8_t curr_index, + struct nhg_hash_entry *nhe, + int max_num) +{ + struct nhg_connected *rb_node_dep = NULL; + struct nhg_hash_entry *depend = NULL; + uint8_t i = curr_index; + + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + bool duplicate = false; + + if (i >= max_num) + goto done; + + depend = rb_node_dep->nhe; + + /* + * If its recursive, use its resolved nhe in the group + */ + if (CHECK_FLAG(depend->flags, NEXTHOP_GROUP_RECURSIVE)) { + depend = zebra_nhg_resolve(depend); + if (!depend) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed to recursively resolve Nexthop Hash Entry in the group id=%pNG", + nhe); + continue; + } + } + + if (!zebra_nhg_depends_is_empty(depend)) { + /* This is a group within a group */ + i = zebra_nhg_nhe2grp_internal(grp, i, depend, max_num); + } else { + if (!CHECK_FLAG(depend->flags, NEXTHOP_GROUP_VALID)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED + || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: Nexthop ID (%u) not valid, not appending to dataplane install group", + __func__, depend->id); + continue; + } + + /* If the nexthop not installed/queued for install don't + * put in the ID array. + */ + if (!(CHECK_FLAG(depend->flags, NEXTHOP_GROUP_INSTALLED) + || CHECK_FLAG(depend->flags, + NEXTHOP_GROUP_QUEUED))) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED + || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: Nexthop ID (%u) not installed or queued for install, not appending to dataplane install group", + __func__, depend->id); + continue; + } + + /* Check for duplicate IDs, ignore if found. */ + for (int j = 0; j < i; j++) { + if (depend->id == grp[j].id) { + duplicate = true; + break; + } + } + + if (duplicate) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED + || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: Nexthop ID (%u) is duplicate, not appending to dataplane install group", + __func__, depend->id); + continue; + } + + grp[i].id = depend->id; + grp[i].weight = depend->nhg.nexthop->weight; + i++; + } + } + + if (nhe->backup_info == NULL || nhe->backup_info->nhe == NULL) + goto done; + + /* TODO -- For now, we are not trying to use or install any + * backup info in this nexthop-id path: we aren't prepared + * to use the backups here yet. We're just debugging what we find. + */ + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: skipping backup nhe", __func__); + +done: + return i; +} + +/* Convert a nhe into a group array */ +uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe, + int max_num) +{ + /* Call into the recursive function */ + return zebra_nhg_nhe2grp_internal(grp, 0, nhe, max_num); +} + +void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe) +{ + struct nhg_connected *rb_node_dep = NULL; + + /* Resolve it first */ + nhe = zebra_nhg_resolve(nhe); + + /* Make sure all depends are installed/queued */ + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + zebra_nhg_install_kernel(rb_node_dep->nhe); + } + + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) + && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) { + /* Change its type to us since we are installing it */ + if (!ZEBRA_NHG_CREATED(nhe)) + nhe->type = ZEBRA_ROUTE_NHG; + + int ret = dplane_nexthop_add(nhe); + + switch (ret) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED); + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_err( + EC_ZEBRA_DP_INSTALL_FAIL, + "Failed to install Nexthop ID (%pNG) into the kernel", + nhe); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + zebra_nhg_handle_install(nhe); + break; + } + } +} + +void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe) +{ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) { + int ret = dplane_nexthop_delete(nhe); + + switch (ret) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED); + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_err( + EC_ZEBRA_DP_DELETE_FAIL, + "Failed to uninstall Nexthop ID (%pNG) from the kernel", + nhe); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + break; + } + } + + zebra_nhg_handle_uninstall(nhe); +} + +void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) +{ + enum dplane_op_e op; + enum zebra_dplane_result status; + uint32_t id = 0; + struct nhg_hash_entry *nhe = NULL; + + op = dplane_ctx_get_op(ctx); + status = dplane_ctx_get_status(ctx); + + id = dplane_ctx_get_nhe_id(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "Nexthop dplane ctx %p, op %s, nexthop ID (%u), result %s", + ctx, dplane_op2str(op), id, dplane_res2str(status)); + + switch (op) { + case DPLANE_OP_NH_DELETE: + if (status != ZEBRA_DPLANE_REQUEST_SUCCESS) + flog_err( + EC_ZEBRA_DP_DELETE_FAIL, + "Failed to uninstall Nexthop ID (%u) from the kernel", + id); + + /* We already free'd the data, nothing to do */ + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + nhe = zebra_nhg_lookup_id(id); + + if (!nhe) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s operation preformed on Nexthop ID (%u) in the kernel, that we no longer have in our table", + dplane_op2str(op), id); + + break; + } + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED); + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + zebra_nhg_handle_install(nhe); + + /* If daemon nhg, send it an update */ + if (PROTO_OWNED(nhe)) + zsend_nhg_notify(nhe->type, nhe->zapi_instance, + nhe->zapi_session, nhe->id, + ZAPI_NHG_INSTALLED); + } else { + /* If daemon nhg, send it an update */ + if (PROTO_OWNED(nhe)) + zsend_nhg_notify(nhe->type, nhe->zapi_instance, + nhe->zapi_session, nhe->id, + ZAPI_NHG_FAIL_INSTALL); + + if (!(zebra_nhg_proto_nexthops_only() && + !PROTO_OWNED(nhe))) + flog_err( + EC_ZEBRA_DP_INSTALL_FAIL, + "Failed to install Nexthop (%pNG) into the kernel", + nhe); + } + break; + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_NONE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; + } +} + +static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = NULL; + + nhe = (struct nhg_hash_entry *)bucket->data; + + /* + * same logic as with routes. + * + * If older than startup time, we know we read them in from the + * kernel and have not gotten and update for them since startup + * from an upper level proto. + */ + if (zrouter.startup_time < nhe->uptime) + return HASHWALK_CONTINUE; + + /* + * If it's proto-owned and not being used by a route, remove it since + * we haven't gotten an update about it from the proto since startup. + * This means that either the config for it was removed or the daemon + * didn't get started. This handles graceful restart & retain scenario. + */ + if (PROTO_OWNED(nhe) && nhe->refcnt == 1) { + zebra_nhg_decrement_ref(nhe); + return HASHWALK_ABORT; + } + + /* + * If its being ref'd by routes, just let it be uninstalled via a route + * removal. + */ + if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) { + zebra_nhg_uninstall_kernel(nhe); + return HASHWALK_ABORT; + } + + return HASHWALK_CONTINUE; +} + +void zebra_nhg_sweep_table(struct hash *hash) +{ + uint32_t count; + + /* + * Yes this is extremely odd. Effectively nhg's have + * other nexthop groups that depend on them and when you + * remove them, you can have other entries blown up. + * our hash code does not work with deleting multiple + * entries at a time and will possibly cause crashes + * So what to do? Whenever zebra_nhg_sweep_entry + * deletes an entry it will return HASHWALK_ABORT, + * cause that deletion might have triggered more. + * then we can just keep sweeping this table + * until nothing more is found to do. + */ + do { + count = hashcount(hash); + hash_walk(hash, zebra_nhg_sweep_entry, NULL); + } while (count != hashcount(hash)); +} + +static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); +} + +/* + * When we are shutting down and we have retain mode enabled + * in zebra the process is to mark each vrf that it's + * routes should not be deleted. The problem with that + * is that shutdown actually free's up memory which + * causes the nexthop group's ref counts to go to zero + * we need a way to subtly tell the system to not remove + * the nexthop groups from the kernel at the same time. + * The easiest just looks like that we should not mark + * the nhg's as installed any more and when the ref count + * goes to zero we'll attempt to delete and do nothing + */ +void zebra_nhg_mark_keep(void) +{ + hash_iterate(zrouter.nhgs_id, zebra_nhg_mark_keep_entry, NULL); +} + +/* Global control to disable use of kernel nexthops, if available. We can't + * force the kernel to support nexthop ids, of course, but we can disable + * zebra's use of them, for testing e.g. By default, if the kernel supports + * nexthop ids, zebra uses them. + */ +void zebra_nhg_enable_kernel_nexthops(bool set) +{ + g_nexthops_enabled = set; +} + +bool zebra_nhg_kernel_nexthops_enabled(void) +{ + return g_nexthops_enabled; +} + +/* Global control for use of activated backups for recursive resolution. */ +void zebra_nhg_set_recursive_use_backups(bool set) +{ + use_recursive_backups = set; +} + +bool zebra_nhg_recursive_use_backups(void) +{ + return use_recursive_backups; +} + +/* + * Global control to only use kernel nexthops for protocol created NHGs. + * There are some use cases where you may not want zebra to implicitly + * create kernel nexthops for all routes and only create them for NHGs + * passed down by upper level protos. + * + * Default is off. + */ +void zebra_nhg_set_proto_nexthops_only(bool set) +{ + proto_nexthops_only = set; +} + +bool zebra_nhg_proto_nexthops_only(void) +{ + return proto_nexthops_only; +} + +/* Add NHE from upper level proto */ +struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, + uint16_t instance, uint32_t session, + struct nexthop_group *nhg, afi_t afi) +{ + struct nhg_hash_entry lookup; + struct nhg_hash_entry *new, *old; + struct nhg_connected *rb_node_dep = NULL; + struct nexthop *newhop; + bool replace = false; + + if (!nhg->nexthop) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug("%s: id %u, no nexthops passed to add", + __func__, id); + return NULL; + } + + + /* Set nexthop list as active, since they wont go through rib + * processing. + * + * Assuming valid/onlink for now. + * + * Once resolution is figured out, we won't need this! + */ + for (ALL_NEXTHOPS_PTR(nhg, newhop)) { + if (CHECK_FLAG(newhop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, backup nexthops not supported", + __func__, id); + return NULL; + } + + if (newhop->type == NEXTHOP_TYPE_BLACKHOLE) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, blackhole nexthop not supported", + __func__, id); + return NULL; + } + + if (newhop->type == NEXTHOP_TYPE_IFINDEX) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, nexthop without gateway not supported", + __func__, id); + return NULL; + } + + if (!newhop->ifindex) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, nexthop without ifindex is not supported", + __func__, id); + return NULL; + } + SET_FLAG(newhop->flags, NEXTHOP_FLAG_ACTIVE); + } + + zebra_nhe_init(&lookup, afi, nhg->nexthop); + lookup.nhg.nexthop = nhg->nexthop; + lookup.id = id; + lookup.type = type; + + old = zebra_nhg_lookup_id(id); + + if (old) { + /* + * This is a replace, just release NHE from ID for now, The + * depends/dependents may still be used in the replacement so + * we don't touch them other than to remove their refs to their + * old parent. + */ + replace = true; + hash_release(zrouter.nhgs_id, old); + + /* Free all the things */ + zebra_nhg_release_all_deps(old); + } + + new = zebra_nhg_rib_find_nhe(&lookup, afi); + + zebra_nhg_increment_ref(new); + + /* Capture zapi client info */ + new->zapi_instance = instance; + new->zapi_session = session; + + zebra_nhg_set_valid_if_active(new); + + zebra_nhg_install_kernel(new); + + if (old) { + /* + * Check to handle recving DEL while routes still in use then + * a replace. + * + * In this case we would have decremented the refcnt already + * but set the FLAG here. Go ahead and increment once to fix + * the misordering we have been sent. + */ + if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED)) + zebra_nhg_increment_ref(old); + + rib_handle_nhg_replace(old, new); + + /* We have to decrement its singletons + * because some might not exist in NEW. + */ + if (!zebra_nhg_depends_is_empty(old)) { + frr_each (nhg_connected_tree, &old->nhg_depends, + rb_node_dep) + zebra_nhg_decrement_ref(rb_node_dep->nhe); + } + + /* Dont call the dec API, we dont want to uninstall the ID */ + old->refcnt = 0; + THREAD_OFF(old->timer); + zebra_nhg_free(old); + old = NULL; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: %s nhe %p (%u), vrf %d, type %s", __func__, + (replace ? "replaced" : "added"), new, new->id, + new->vrf_id, zebra_route_string(new->type)); + + return new; +} + +/* Delete NHE from upper level proto, caller must decrement ref */ +struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type) +{ + struct nhg_hash_entry *nhe; + + nhe = zebra_nhg_lookup_id(id); + + if (!nhe) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug("%s: id %u, lookup failed", __func__, id); + + return NULL; + } + + if (type != nhe->type) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, type %s mismatch, sent by %s, ignoring", + __func__, id, zebra_route_string(nhe->type), + zebra_route_string(type)); + return NULL; + } + + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED)) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug("%s: id %u, already released", __func__, id); + + return NULL; + } + + SET_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED); + + if (nhe->refcnt > 1) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: %pNG, still being used by routes refcnt %u", + __func__, nhe, nhe->refcnt); + return nhe; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: deleted nhe %p (%pNG), vrf %d, type %s", + __func__, nhe, nhe, nhe->vrf_id, + zebra_route_string(nhe->type)); + + return nhe; +} + +struct nhg_score_proto_iter { + int type; + struct list *found; +}; + +static void zebra_nhg_score_proto_entry(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe; + struct nhg_score_proto_iter *iter; + + nhe = (struct nhg_hash_entry *)bucket->data; + iter = arg; + + /* Needs to match type and outside zebra ID space */ + if (nhe->type == iter->type && PROTO_OWNED(nhe)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: found nhe %p (%pNG), vrf %d, type %s after client disconnect", + __func__, nhe, nhe, nhe->vrf_id, + zebra_route_string(nhe->type)); + + /* Add to removal list */ + listnode_add(iter->found, nhe); + } +} + +/* Remove specific by proto NHGs */ +unsigned long zebra_nhg_score_proto(int type) +{ + struct nhg_hash_entry *nhe; + struct nhg_score_proto_iter iter = {}; + struct listnode *ln; + unsigned long count; + + iter.type = type; + iter.found = list_new(); + + /* Find matching entries to remove */ + hash_iterate(zrouter.nhgs_id, zebra_nhg_score_proto_entry, &iter); + + /* Now remove them */ + for (ALL_LIST_ELEMENTS_RO(iter.found, ln, nhe)) { + /* + * This should be the last ref if we remove client routes too, + * and thus should remove and free them. + */ + zebra_nhg_decrement_ref(nhe); + } + + count = iter.found->count; + list_delete(&iter.found); + + return count; +} + +printfrr_ext_autoreg_p("NG", printfrr_nhghe); +static ssize_t printfrr_nhghe(struct fbuf *buf, struct printfrr_eargs *ea, + const void *ptr) +{ + const struct nhg_hash_entry *nhe = ptr; + const struct nhg_connected *dep; + ssize_t ret = 0; + + if (!nhe) + return bputs(buf, "[NULL]"); + + ret += bprintfrr(buf, "%u[", nhe->id); + if (nhe->ifp) + ret += printfrr_nhs(buf, nhe->nhg.nexthop); + else { + int count = zebra_nhg_depends_count(nhe); + + frr_each (nhg_connected_tree_const, &nhe->nhg_depends, dep) { + ret += bprintfrr(buf, "%u", dep->nhe->id); + if (count > 1) + ret += bputs(buf, "/"); + count--; + } + } + + ret += bputs(buf, "]"); + return ret; +} diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h new file mode 100644 index 0000000..62f71f9 --- /dev/null +++ b/zebra/zebra_nhg.h @@ -0,0 +1,403 @@ +/* Zebra Nexthop Group header. + * Copyright (C) 2019 Cumulus Networks, Inc. + * Donald Sharp + * Stephen Worley + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#ifndef __ZEBRA_NHG_H__ +#define __ZEBRA_NHG_H__ + +#include "lib/nexthop.h" +#include "lib/nexthop_group.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* This struct is used exclusively for dataplane + * interaction via a dataplane context. + * + * It is designed to mimic the netlink nexthop_grp + * struct in include/linux/nexthop.h + */ +struct nh_grp { + uint32_t id; + uint8_t weight; +}; + +PREDECL_RBTREE_UNIQ(nhg_connected_tree); + +/* + * Hashtables containing nhg entries is in `zebra_router`. + */ +struct nhg_hash_entry { + uint32_t id; + afi_t afi; + vrf_id_t vrf_id; + + /* Time since last update */ + time_t uptime; + + /* Source protocol - zebra or another daemon */ + int type; + + /* zapi instance and session id, for groups from other daemons */ + uint16_t zapi_instance; + uint32_t zapi_session; + + struct nexthop_group nhg; + + /* If supported, a mapping of backup nexthops. */ + struct nhg_backup_info *backup_info; + + /* If this is not a group, it + * will be a single nexthop + * and must have an interface + * associated with it. + * Otherwise, this will be null. + */ + struct interface *ifp; + + uint32_t refcnt; + uint32_t dplane_ref; + + uint32_t flags; + + /* Dependency trees for other entries. + * For instance a group with two + * nexthops will have two dependencies + * pointing to those nhg_hash_entries. + * + * Using a rb tree here to make lookups + * faster with ID's. + * + * nhg_depends the RB tree of entries that this + * group contains. + * + * nhg_dependents the RB tree of entries that + * this group is being used by + * + * NHG id 3 with nexthops id 1/2 + * nhg(3)->nhg_depends has 1 and 2 in the tree + * nhg(3)->nhg_dependents is empty + * + * nhg(1)->nhg_depends is empty + * nhg(1)->nhg_dependents is 3 in the tree + * + * nhg(2)->nhg_depends is empty + * nhg(3)->nhg_dependents is 3 in the tree + */ + struct nhg_connected_tree_head nhg_depends, nhg_dependents; + + struct thread *timer; + +/* + * Is this nexthop group valid, ie all nexthops are fully resolved. + * What is fully resolved? It's a nexthop that is either self contained + * and correct( ie no recursive pointer ) or a nexthop that is recursively + * resolved and correct. + */ +#define NEXTHOP_GROUP_VALID (1 << 0) +/* + * Has this nexthop group been installed? At this point in time, this + * means that the data-plane has been told about this nexthop group + * and it's possible usage by a route entry. + */ +#define NEXTHOP_GROUP_INSTALLED (1 << 1) +/* + * Has the nexthop group been queued to be send to the FIB? + * The NEXTHOP_GROUP_VALID flag should also be set by this point. + */ +#define NEXTHOP_GROUP_QUEUED (1 << 2) +/* + * Is this a nexthop that is recursively resolved? + */ +#define NEXTHOP_GROUP_RECURSIVE (1 << 3) + +/* + * Backup nexthop support - identify groups that are backups for + * another group. + */ +#define NEXTHOP_GROUP_BACKUP (1 << 4) + +/* + * The NHG has been release by an upper level protocol via the + * `zebra_nhg_proto_del()` API. + * + * We use this flag to track this state in case the NHG is still being used + * by routes therefore holding their refcnts as well. Otherwise, the NHG will + * be removed and uninstalled. + * + */ +#define NEXTHOP_GROUP_PROTO_RELEASED (1 << 5) + +/* + * When deleting a NHG notice that it is still installed + * and if it is, slightly delay the actual removal to + * the future. So that upper level protocols might + * be able to take advantage of some NHG's that + * are there + */ +#define NEXTHOP_GROUP_KEEP_AROUND (1 << 6) + +/* + * Track FPM installation status.. + */ +#define NEXTHOP_GROUP_FPM (1 << 6) +}; + +/* Upper 4 bits of the NHG are reserved for indicating the NHG type */ +#define NHG_ID_TYPE_POS 28 +enum nhg_type { + NHG_TYPE_L3 = 0, + NHG_TYPE_L2_NH, /* NHs in a L2 NHG used as a MAC/FDB dest */ + NHG_TYPE_L2, /* L2 NHG used as a MAC/FDB dest */ +}; + +/* Was this one we created, either this session or previously? */ +#define ZEBRA_NHG_CREATED(NHE) \ + (((NHE->type) <= ZEBRA_ROUTE_MAX) && (NHE->type != ZEBRA_ROUTE_KERNEL)) + +/* Is this an NHE owned by zebra and not an upper level protocol? */ +#define ZEBRA_OWNED(NHE) (NHE->type == ZEBRA_ROUTE_NHG) + +#define PROTO_OWNED(NHE) (NHE->id >= ZEBRA_NHG_PROTO_LOWER) + +/* + * Backup nexthops: this is a group object itself, so + * that the backup nexthops can use the same code as a normal object. + */ +struct nhg_backup_info { + struct nhg_hash_entry *nhe; +}; + +enum nhg_ctx_op_e { + NHG_CTX_OP_NONE = 0, + NHG_CTX_OP_NEW, + NHG_CTX_OP_DEL, +}; + +enum nhg_ctx_status { + NHG_CTX_NONE = 0, + NHG_CTX_QUEUED, + NHG_CTX_REQUEUED, + NHG_CTX_SUCCESS, + NHG_CTX_FAILURE, +}; + +/* + * Context needed to queue nhg updates on the + * work queue. + */ +struct nhg_ctx { + + /* Unique ID */ + uint32_t id; + + vrf_id_t vrf_id; + afi_t afi; + + /* + * This should only ever be ZEBRA_ROUTE_NHG unless we get a a kernel + * created nexthop not made by us. + */ + int type; + + /* If its a group array, how many? */ + uint8_t count; + + /* Its either a single nexthop or an array of ID's */ + union { + struct nexthop nh; + struct nh_grp grp[MULTIPATH_NUM]; + } u; + + enum nhg_ctx_op_e op; + enum nhg_ctx_status status; +}; + +/* Global control to disable use of kernel nexthops, if available. We can't + * force the kernel to support nexthop ids, of course, but we can disable + * zebra's use of them, for testing e.g. By default, if the kernel supports + * nexthop ids, zebra uses them. + */ +void zebra_nhg_enable_kernel_nexthops(bool set); +bool zebra_nhg_kernel_nexthops_enabled(void); + +/* Global control for zebra to only use proto-owned nexthops */ +void zebra_nhg_set_proto_nexthops_only(bool set); +bool zebra_nhg_proto_nexthops_only(void); + +/* Global control for use of activated backups for recursive resolution. */ +void zebra_nhg_set_recursive_use_backups(bool set); +bool zebra_nhg_recursive_use_backups(void); + +/** + * NHE abstracted tree functions. + * Use these where possible instead of direct access. + */ +struct nhg_hash_entry *zebra_nhg_alloc(void); +void zebra_nhg_free(struct nhg_hash_entry *nhe); +/* In order to clear a generic hash, we need a generic api, sigh. */ +void zebra_nhg_hash_free(void *p); +void zebra_nhg_hash_free_zero_id(struct hash_bucket *b, void *arg); + +/* Init an nhe, for use in a hash lookup for example. There's some fuzziness + * if the nhe represents only a single nexthop, so we try to capture that + * variant also. + */ +void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi, + const struct nexthop *nh); + +/* + * Shallow copy of 'orig', into new/allocated nhe. + */ +struct nhg_hash_entry *zebra_nhe_copy(const struct nhg_hash_entry *orig, + uint32_t id); + +/* Allocate, free backup nexthop info objects */ +struct nhg_backup_info *zebra_nhg_backup_alloc(void); +void zebra_nhg_backup_free(struct nhg_backup_info **p); + +struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe); + +extern struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe); + +extern unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe); +extern bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe); + +extern unsigned int +zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe); +extern bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe); + +/* Lookup ID, doesn't create */ +extern struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id); + +/* Hash functions */ +extern uint32_t zebra_nhg_hash_key(const void *arg); +extern uint32_t zebra_nhg_id_key(const void *arg); + +extern bool zebra_nhg_hash_equal(const void *arg1, const void *arg2); +extern bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2); + +/* + * Process a context off of a queue. + * Specifically this should be from + * the rib meta queue. + */ +extern int nhg_ctx_process(struct nhg_ctx *ctx); +void nhg_ctx_free(struct nhg_ctx **ctx); + +/* Find via kernel nh creation */ +extern int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, + struct nh_grp *grp, uint8_t count, + vrf_id_t vrf_id, afi_t afi, int type, + int startup); +/* Del via kernel */ +extern int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id); + +/* Find an nhe based on a nexthop_group */ +extern struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id, + struct nexthop_group *nhg, + afi_t rt_afi, int type); + +/* Find an nhe based on a route's nhe, used during route creation */ +struct nhg_hash_entry * +zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi); + + +/** + * Functions for Add/Del/Replace via protocol NHG creation. + * + * The NHEs will not be hashed. They will only be present in the + * ID table and therefore not sharable. + * + * It is the owning protocols job to manage these. + */ + +/* + * Add NHE. If already exists, Replace. + * + * Returns allocated NHE on success, otherwise NULL. + */ +struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, + uint16_t instance, uint32_t session, + struct nexthop_group *nhg, + afi_t afi); + +/* + * Del NHE. + * + * Returns deleted NHE on success, otherwise NULL. + * + * Caller must decrement ref with zebra_nhg_decrement_ref() when done. + */ +struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type); + +/* + * Remove specific by proto NHGs. + * + * Called after client disconnect. + * + */ +unsigned long zebra_nhg_score_proto(int type); + +/* Reference counter functions */ +extern void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe); +extern void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe); + +/* Check validity of nhe, if invalid will update dependents as well */ +extern void zebra_nhg_check_valid(struct nhg_hash_entry *nhe); + +/* Convert nhe depends to a grp context that can be passed around safely */ +extern uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe, + int size); + +/* Dataplane install/uninstall */ +extern void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe); +extern void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe); + +/* Forward ref of dplane update context type */ +struct zebra_dplane_ctx; +extern void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx); + + +/* Sweep the nhg hash tables for old entries on restart */ +extern void zebra_nhg_sweep_table(struct hash *hash); + +/* + * We are shutting down but the nexthops should be kept + * as that -r has been specified and we don't want to delete + * the routes unintentionally + */ +extern void zebra_nhg_mark_keep(void); + +/* Nexthop resolution processing */ +struct route_entry; /* Forward ref to avoid circular includes */ +extern int nexthop_active_update(struct route_node *rn, struct route_entry *re); + +#ifdef _FRR_ATTRIBUTE_PRINTFRR +#pragma FRR printfrr_ext "%pNG" (const struct nhg_hash_entry *) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __ZEBRA_NHG_H__ */ diff --git a/zebra/zebra_nhg_private.h b/zebra/zebra_nhg_private.h new file mode 100644 index 0000000..2504825 --- /dev/null +++ b/zebra/zebra_nhg_private.h @@ -0,0 +1,81 @@ +/* + * Nexthop Group Private Functions. + * Copyright (C) 2019 Cumulus Networks, Inc. + * Stephen Worley + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * These functions should only be used internally for nhg_hash_entry + * manipulation and in certain special cases. + * + * Please use `zebra/zebra_nhg.h` for any general nhg_hash_entry api needs. + */ + +#ifndef __ZEBRA_NHG_PRIVATE_H__ +#define __ZEBRA_NHG_PRIVATE_H__ + +#include "zebra/zebra_nhg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Abstraction for connected trees */ +struct nhg_connected { + struct nhg_connected_tree_item tree_item; + struct nhg_hash_entry *nhe; +}; + +static int nhg_connected_cmp(const struct nhg_connected *con1, + const struct nhg_connected *con2) +{ + return (con1->nhe->id - con2->nhe->id); +} + +DECLARE_RBTREE_UNIQ(nhg_connected_tree, struct nhg_connected, tree_item, + nhg_connected_cmp); + +/* nhg connected tree direct access functions */ +extern void nhg_connected_tree_init(struct nhg_connected_tree_head *head); +extern void nhg_connected_tree_free(struct nhg_connected_tree_head *head); +extern bool +nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head); +extern struct nhg_connected * +nhg_connected_tree_root(struct nhg_connected_tree_head *head); + +/* I realize _add/_del returns are backwords. + * + * Currently the list APIs are not standardized for what happens in + * the _del() function when the item isn't present. + * + * We are choosing to return NULL if not found in the _del case for now. + */ + +/* Delete NHE from the tree. On success, return the NHE, otherwise NULL. */ +extern struct nhg_hash_entry * +nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *nhe); +/* ADD NHE to the tree. On success, return NULL, otherwise return the NHE. */ +extern struct nhg_hash_entry * +nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head, + struct nhg_hash_entry *nhe); + +#ifdef __cplusplus +} +#endif + +#endif /* __ZEBRA_NHG_PRIVATE_H__ */ diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c new file mode 100644 index 0000000..13fd972 --- /dev/null +++ b/zebra/zebra_ns.c @@ -0,0 +1,233 @@ +/* zebra NS Routines + * Copyright (C) 2016 Cumulus Networks, Inc. + * Donald Sharp + * Copyright (C) 2017/2018 6WIND + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "zebra.h" + +#include "lib/ns.h" +#include "lib/vrf.h" +#include "lib/prefix.h" +#include "lib/memory.h" + +#include "zebra_ns.h" +#include "zebra_vrf.h" +#include "rt.h" +#include "zebra_vxlan.h" +#include "debug.h" +#include "zebra_netns_notify.h" +#include "zebra_netns_id.h" +#include "zebra_pbr.h" +#include "rib.h" +#include "table_manager.h" +#include "zebra_errors.h" + +extern struct zebra_privs_t zserv_privs; + +DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_NS, "Zebra Name Space"); + +static struct zebra_ns *dzns; + +static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete); + +struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id) +{ + if (ns_id == NS_DEFAULT) + return dzns; + struct zebra_ns *info = (struct zebra_ns *)ns_info_lookup(ns_id); + + return (info == NULL) ? dzns : info; +} + +static struct zebra_ns *zebra_ns_alloc(void) +{ + return XCALLOC(MTYPE_ZEBRA_NS, sizeof(struct zebra_ns)); +} + +static int zebra_ns_new(struct ns *ns) +{ + struct zebra_ns *zns; + + if (!ns) + return -1; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_info("ZNS %s with id %u (created)", ns->name, ns->ns_id); + + zns = zebra_ns_alloc(); + ns->info = zns; + zns->ns = ns; + zns->ns_id = ns->ns_id; + + /* Do any needed per-NS data structure allocation. */ + zns->if_table = route_table_init(); + + return 0; +} + +static int zebra_ns_delete(struct ns *ns) +{ + struct zebra_ns *zns = (struct zebra_ns *)ns->info; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_info("ZNS %s with id %u (deleted)", ns->name, ns->ns_id); + if (!zns) + return 0; + XFREE(MTYPE_ZEBRA_NS, ns->info); + return 0; +} + +static int zebra_ns_enabled(struct ns *ns) +{ + struct zebra_ns *zns = ns->info; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_info("ZNS %s with id %u (enabled)", ns->name, ns->ns_id); + if (!zns) + return 0; + return zebra_ns_enable(ns->ns_id, (void **)&zns); +} + +int zebra_ns_disabled(struct ns *ns) +{ + struct zebra_ns *zns = ns->info; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id); + if (!zns) + return 0; + return zebra_ns_disable_internal(zns, true); +} + +/* Do global enable actions - open sockets, read kernel config etc. */ +int zebra_ns_enable(ns_id_t ns_id, void **info) +{ + struct zebra_ns *zns = (struct zebra_ns *)(*info); + + zns->ns_id = ns_id; + + kernel_init(zns); + zebra_dplane_ns_enable(zns, true); + interface_list(zns); + route_read(zns); + kernel_read_pbr_rules(zns); + + return 0; +} + +/* Common handler for ns disable - this can be called during ns config, + * or during zebra shutdown. + */ +static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete) +{ + route_table_finish(zns->if_table); + + zebra_dplane_ns_enable(zns, false /*Disable*/); + + kernel_terminate(zns, complete); + + zns->ns_id = NS_DEFAULT; + + return 0; +} + +/* During zebra shutdown, do partial cleanup while the async dataplane + * is still running. + */ +int zebra_ns_early_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + + if (zns == NULL) + return 0; + + zebra_ns_disable_internal(zns, false); + return NS_WALK_CONTINUE; +} + +/* During zebra shutdown, do final cleanup + * after all dataplane work is complete. + */ +int zebra_ns_final_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + + if (zns == NULL) + return 0; + + kernel_terminate(zns, true); + + return NS_WALK_CONTINUE; +} + +int zebra_ns_init(void) +{ + struct ns *default_ns; + ns_id_t ns_id; + ns_id_t ns_id_external; + struct ns *ns; + + frr_with_privs(&zserv_privs) { + ns_id = zebra_ns_id_get_default(); + } + ns_id_external = ns_map_nsid_with_external(ns_id, true); + ns_init_management(ns_id_external, ns_id); + ns = ns_get_default(); + if (ns) + ns->relative_default_ns = ns_id; + + default_ns = ns_lookup(NS_DEFAULT); + if (!default_ns) { + flog_err(EC_ZEBRA_NS_NO_DEFAULT, + "%s: failed to find default ns", __func__); + exit(EXIT_FAILURE); /* This is non-recoverable */ + } + + /* Do any needed per-NS data structure allocation. */ + zebra_ns_new(default_ns); + dzns = default_ns->info; + + /* Register zebra VRF callbacks, create and activate default VRF. */ + zebra_vrf_init(); + + /* Default NS is activated */ + zebra_ns_enable(ns_id_external, (void **)&dzns); + + if (vrf_is_backend_netns()) { + ns_add_hook(NS_NEW_HOOK, zebra_ns_new); + ns_add_hook(NS_ENABLE_HOOK, zebra_ns_enabled); + ns_add_hook(NS_DISABLE_HOOK, zebra_ns_disabled); + ns_add_hook(NS_DELETE_HOOK, zebra_ns_delete); + zebra_ns_notify_parse(); + zebra_ns_notify_init(); + } + + return 0; +} + +int zebra_ns_config_write(struct vty *vty, struct ns *ns) +{ + if (ns && ns->name != NULL) + vty_out(vty, " netns %s\n", ns->name); + return 0; +} diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h new file mode 100644 index 0000000..7a0ffbc --- /dev/null +++ b/zebra/zebra_ns.h @@ -0,0 +1,90 @@ +/* + * Zebra NS header + * Copyright (C) 2016 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if !defined(__ZEBRA_NS_H__) +#define __ZEBRA_NS_H__ + +#include <lib/ns.h> +#include <lib/vrf.h> + +#include "zebra/rib.h" +#include "zebra/zebra_vrf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_NETLINK +/* Socket interface to kernel */ +struct nlsock { + int sock; + int seq; + struct sockaddr_nl snl; + char name[64]; + + uint8_t *buf; + size_t buflen; +}; +#endif + +struct zebra_ns { + /* net-ns name. */ + char name[VRF_NAMSIZ]; + + /* Identifier. */ + ns_id_t ns_id; + +#ifdef HAVE_NETLINK + struct nlsock netlink; /* kernel messages */ + struct nlsock netlink_cmd; /* command channel */ + + /* dplane system's channels: one for outgoing programming, + * for the FIB e.g., and one for incoming events from the OS. + */ + struct nlsock netlink_dplane_out; + struct nlsock netlink_dplane_in; + struct thread *t_netlink; +#endif + + struct route_table *if_table; + + /* Back pointer */ + struct ns *ns; +}; + +struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id); + +int zebra_ns_init(void); +int zebra_ns_enable(ns_id_t ns_id, void **info); +int zebra_ns_disabled(struct ns *ns); +int zebra_ns_early_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))); +int zebra_ns_final_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))); +int zebra_ns_config_write(struct vty *vty, struct ns *ns); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_opaque.c b/zebra/zebra_opaque.c new file mode 100644 index 0000000..d18c5fd --- /dev/null +++ b/zebra/zebra_opaque.c @@ -0,0 +1,719 @@ +/* + * Zebra opaque message handler module + * Copyright (c) 2020 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#include <zebra.h> +#include "lib/debug.h" +#include "lib/frr_pthread.h" +#include "lib/stream.h" +#include "zebra/debug.h" +#include "zebra/zserv.h" +#include "zebra/zebra_opaque.h" +#include "zebra/rib.h" + +/* Mem type */ +DEFINE_MTYPE_STATIC(ZEBRA, OPQ, "ZAPI Opaque Information"); + +/* Hash to hold message registration info from zapi clients */ +PREDECL_HASH(opq_regh); + +/* Registered client info */ +struct opq_client_reg { + int proto; + int instance; + uint32_t session_id; + + struct opq_client_reg *next; + struct opq_client_reg *prev; +}; + +/* Opaque message registration info */ +struct opq_msg_reg { + struct opq_regh_item item; + + /* Message type */ + uint32_t type; + + struct opq_client_reg *clients; +}; + +/* Registration helper prototypes */ +static uint32_t registration_hash(const struct opq_msg_reg *reg); +static int registration_compare(const struct opq_msg_reg *reg1, + const struct opq_msg_reg *reg2); + +DECLARE_HASH(opq_regh, struct opq_msg_reg, item, registration_compare, + registration_hash); + +static struct opq_regh_head opq_reg_hash; + +/* + * Globals + */ +static struct zebra_opaque_globals { + + /* Sentinel for run or start of shutdown */ + _Atomic uint32_t run; + + /* Limit number of pending, unprocessed updates */ + _Atomic uint32_t max_queued_updates; + + /* Limit number of new messages dequeued at once, to pace an + * incoming burst. + */ + uint32_t msgs_per_cycle; + + /* Stats: counters of incoming messages, errors, and yields (when + * the limit has been reached.) + */ + _Atomic uint32_t msgs_in; + _Atomic uint32_t msg_errors; + _Atomic uint32_t yields; + + /* pthread */ + struct frr_pthread *pthread; + + /* Event-delivery context 'master' for the module */ + struct thread_master *master; + + /* Event/'thread' pointer for queued zapi messages */ + struct thread *t_msgs; + + /* Input fifo queue to the module, and lock to protect it. */ + pthread_mutex_t mutex; + struct stream_fifo in_fifo; + +} zo_info; + +/* Name string for debugs/logs */ +static const char LOG_NAME[] = "Zebra Opaque"; + +/* Prototypes */ + +/* Main event loop, processing incoming message queue */ +static void process_messages(struct thread *event); +static int handle_opq_registration(const struct zmsghdr *hdr, + struct stream *msg); +static int handle_opq_unregistration(const struct zmsghdr *hdr, + struct stream *msg); +static int dispatch_opq_messages(struct stream_fifo *msg_fifo); +static struct opq_msg_reg *opq_reg_lookup(uint32_t type); +static bool opq_client_match(const struct opq_client_reg *client, + const struct zapi_opaque_reg_info *info); +static struct opq_msg_reg *opq_reg_alloc(uint32_t type); +static void opq_reg_free(struct opq_msg_reg **reg); +static struct opq_client_reg *opq_client_alloc( + const struct zapi_opaque_reg_info *info); +static void opq_client_free(struct opq_client_reg **client); +static const char *opq_client2str(char *buf, size_t buflen, + const struct opq_client_reg *client); + +/* + * Initialize the module at startup + */ +void zebra_opaque_init(void) +{ + memset(&zo_info, 0, sizeof(zo_info)); + + pthread_mutex_init(&zo_info.mutex, NULL); + stream_fifo_init(&zo_info.in_fifo); + + zo_info.msgs_per_cycle = ZEBRA_OPAQUE_MSG_LIMIT; +} + +/* + * Start the module pthread. This step is run later than the + * 'init' step, in case zebra has fork-ed. + */ +void zebra_opaque_start(void) +{ + struct frr_pthread_attr pattr = { + .start = frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop + }; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s module starting", LOG_NAME); + + /* Start pthread */ + zo_info.pthread = frr_pthread_new(&pattr, "Zebra Opaque thread", + "zebra_opaque"); + + /* Associate event 'master' */ + zo_info.master = zo_info.pthread->master; + + atomic_store_explicit(&zo_info.run, 1, memory_order_relaxed); + + /* Enqueue an initial event for the pthread */ + thread_add_event(zo_info.master, process_messages, NULL, 0, + &zo_info.t_msgs); + + /* And start the pthread */ + frr_pthread_run(zo_info.pthread, NULL); +} + +/* + * Module stop, halting the dedicated pthread; called from the main pthread. + */ +void zebra_opaque_stop(void) +{ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s module stop", LOG_NAME); + + atomic_store_explicit(&zo_info.run, 0, memory_order_relaxed); + + frr_pthread_stop(zo_info.pthread, NULL); + + frr_pthread_destroy(zo_info.pthread); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s module stop complete", LOG_NAME); +} + +/* + * Module final cleanup, called from the zebra main pthread. + */ +void zebra_opaque_finish(void) +{ + struct opq_msg_reg *reg; + struct opq_client_reg *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s module shutdown", LOG_NAME); + + /* Clear out registration info */ + while ((reg = opq_regh_pop(&opq_reg_hash)) != NULL) { + client = reg->clients; + while (client) { + reg->clients = client->next; + opq_client_free(&client); + client = reg->clients; + } + + opq_reg_free(®); + } + + opq_regh_fini(&opq_reg_hash); + + pthread_mutex_destroy(&zo_info.mutex); + stream_fifo_deinit(&zo_info.in_fifo); +} + +/* + * Does this module handle (intercept) the specified zapi message type? + */ +bool zebra_opaque_handles_msgid(uint16_t id) +{ + bool ret = false; + + switch (id) { + case ZEBRA_OPAQUE_MESSAGE: + case ZEBRA_OPAQUE_REGISTER: + case ZEBRA_OPAQUE_UNREGISTER: + ret = true; + break; + default: + break; + } + + return ret; +} + +/* + * Enqueue a batch of messages for processing - this is the public api + * used from the zapi processing threads. + */ +uint32_t zebra_opaque_enqueue_batch(struct stream_fifo *batch) +{ + uint32_t counter = 0; + struct stream *msg; + + /* Dequeue messages from the incoming batch, and save them + * on the module fifo. + */ + frr_with_mutex (&zo_info.mutex) { + msg = stream_fifo_pop(batch); + while (msg) { + stream_fifo_push(&zo_info.in_fifo, msg); + counter++; + msg = stream_fifo_pop(batch); + } + } + + /* Schedule module pthread to process the batch */ + if (counter > 0) { + if (IS_ZEBRA_DEBUG_RECV && IS_ZEBRA_DEBUG_DETAIL) + zlog_debug("%s: received %u messages", + __func__, counter); + thread_add_event(zo_info.master, process_messages, NULL, 0, + &zo_info.t_msgs); + } + + return counter; +} + +/* + * Pthread event loop, process the incoming message queue. + */ +static void process_messages(struct thread *event) +{ + struct stream_fifo fifo; + struct stream *msg; + uint32_t i; + bool need_resched = false; + + stream_fifo_init(&fifo); + + /* Check for zebra shutdown */ + if (atomic_load_explicit(&zo_info.run, memory_order_relaxed) == 0) + goto done; + + /* + * Dequeue some messages from the incoming queue, temporarily + * save them on the local fifo + */ + frr_with_mutex (&zo_info.mutex) { + + for (i = 0; i < zo_info.msgs_per_cycle; i++) { + msg = stream_fifo_pop(&zo_info.in_fifo); + if (msg == NULL) + break; + + stream_fifo_push(&fifo, msg); + } + + /* + * We may need to reschedule, if there are still + * queued messages + */ + if (stream_fifo_head(&zo_info.in_fifo) != NULL) + need_resched = true; + } + + /* Update stats */ + atomic_fetch_add_explicit(&zo_info.msgs_in, i, memory_order_relaxed); + + /* Check for zebra shutdown */ + if (atomic_load_explicit(&zo_info.run, memory_order_relaxed) == 0) { + need_resched = false; + goto done; + } + + if (IS_ZEBRA_DEBUG_RECV && IS_ZEBRA_DEBUG_DETAIL) + zlog_debug("%s: processing %u messages", __func__, i); + + /* + * Process the messages from the temporary fifo. We send the whole + * fifo so that we can take advantage of batching internally. Note + * that registration/deregistration messages are handled here also. + */ + dispatch_opq_messages(&fifo); + +done: + + if (need_resched) { + atomic_fetch_add_explicit(&zo_info.yields, 1, + memory_order_relaxed); + thread_add_event(zo_info.master, process_messages, NULL, 0, + &zo_info.t_msgs); + } + + /* This will also free any leftover messages, in the shutdown case */ + stream_fifo_deinit(&fifo); +} + +/* + * Process (dispatch) or drop opaque messages. + */ +static int dispatch_opq_messages(struct stream_fifo *msg_fifo) +{ + struct stream *msg, *dup; + struct zmsghdr hdr; + struct zapi_opaque_msg info; + struct opq_msg_reg *reg; + int ret; + struct opq_client_reg *client; + struct zserv *zclient; + char buf[50]; + + while ((msg = stream_fifo_pop(msg_fifo)) != NULL) { + zapi_parse_header(msg, &hdr); + hdr.length -= ZEBRA_HEADER_SIZE; + + /* Handle client registration messages */ + if (hdr.command == ZEBRA_OPAQUE_REGISTER) { + handle_opq_registration(&hdr, msg); + continue; + } else if (hdr.command == ZEBRA_OPAQUE_UNREGISTER) { + handle_opq_unregistration(&hdr, msg); + continue; + } + + /* We only process OPAQUE messages - drop anything else */ + if (hdr.command != ZEBRA_OPAQUE_MESSAGE) + goto drop_it; + + /* Dispatch to any registered ZAPI client(s) */ + + /* Extract subtype and flags */ + ret = zclient_opaque_decode(msg, &info); + if (ret != 0) + goto drop_it; + + /* Look up registered ZAPI client(s) */ + reg = opq_reg_lookup(info.type); + if (reg == NULL) { + if (IS_ZEBRA_DEBUG_RECV && IS_ZEBRA_DEBUG_DETAIL) + zlog_debug("%s: no registrations for opaque type %u, flags %#x", + __func__, info.type, info.flags); + goto drop_it; + } + + /* Reset read pointer, since we'll be re-sending message */ + stream_set_getp(msg, 0); + + /* Send a copy of the message to all registered clients */ + for (client = reg->clients; client; client = client->next) { + dup = NULL; + + if (CHECK_FLAG(info.flags, ZAPI_OPAQUE_FLAG_UNICAST)) { + + if (client->proto != info.proto || + client->instance != info.instance || + client->session_id != info.session_id) + continue; + + if (IS_ZEBRA_DEBUG_RECV && + IS_ZEBRA_DEBUG_DETAIL) + zlog_debug("%s: found matching unicast client %s", + __func__, + opq_client2str(buf, + sizeof(buf), + client)); + + } else { + /* Copy message if more clients */ + if (client->next) + dup = stream_dup(msg); + } + + /* + * TODO -- this isn't ideal: we're going through an + * acquire/release cycle for each client for each + * message. Replace this with a batching version. + */ + zclient = zserv_acquire_client(client->proto, + client->instance, + client->session_id); + if (zclient) { + if (IS_ZEBRA_DEBUG_SEND && + IS_ZEBRA_DEBUG_DETAIL) + zlog_debug("%s: sending %s to client %s", + __func__, + (dup ? "dup" : "msg"), + opq_client2str(buf, + sizeof(buf), + client)); + + /* + * Sending a message actually means enqueuing + * it for a zapi io pthread to send - so we + * don't touch the message after this call. + */ + zserv_send_message(zclient, dup ? dup : msg); + if (dup) + dup = NULL; + else + msg = NULL; + + zserv_release_client(zclient); + } else { + if (IS_ZEBRA_DEBUG_RECV && + IS_ZEBRA_DEBUG_DETAIL) + zlog_debug("%s: type %u: no zclient for %s", + __func__, info.type, + opq_client2str(buf, + sizeof(buf), + client)); + /* Registered but gone? */ + if (dup) + stream_free(dup); + } + + /* If unicast, we're done */ + if (CHECK_FLAG(info.flags, ZAPI_OPAQUE_FLAG_UNICAST)) + break; + } + +drop_it: + + if (msg) + stream_free(msg); + } + + return 0; +} + +/* + * Process a register/unregister message + */ +static int handle_opq_registration(const struct zmsghdr *hdr, + struct stream *msg) +{ + int ret = 0; + struct zapi_opaque_reg_info info; + struct opq_client_reg *client; + struct opq_msg_reg key, *reg; + char buf[50]; + + memset(&info, 0, sizeof(info)); + + if (zapi_opaque_reg_decode(msg, &info) < 0) { + ret = -1; + goto done; + } + + memset(&key, 0, sizeof(key)); + + key.type = info.type; + + reg = opq_regh_find(&opq_reg_hash, &key); + if (reg) { + /* Look for dup client */ + for (client = reg->clients; client != NULL; + client = client->next) { + if (opq_client_match(client, &info)) + break; + } + + if (client) { + /* Oops - duplicate registration? */ + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: duplicate opq reg for client %s", + __func__, + opq_client2str(buf, sizeof(buf), + client)); + goto done; + } + + client = opq_client_alloc(&info); + + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: client %s registers for %u", + __func__, + opq_client2str(buf, sizeof(buf), client), + info.type); + + /* Link client into registration */ + client->next = reg->clients; + if (reg->clients) + reg->clients->prev = client; + reg->clients = client; + } else { + /* + * No existing registrations - create one, add the + * client, and add registration to hash. + */ + reg = opq_reg_alloc(info.type); + client = opq_client_alloc(&info); + + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: client %s registers for new reg %u", + __func__, + opq_client2str(buf, sizeof(buf), client), + info.type); + + reg->clients = client; + + opq_regh_add(&opq_reg_hash, reg); + } + +done: + + stream_free(msg); + return ret; +} + +/* + * Process a register/unregister message + */ +static int handle_opq_unregistration(const struct zmsghdr *hdr, + struct stream *msg) +{ + int ret = 0; + struct zapi_opaque_reg_info info; + struct opq_client_reg *client; + struct opq_msg_reg key, *reg; + char buf[50]; + + memset(&info, 0, sizeof(info)); + + if (zapi_opaque_reg_decode(msg, &info) < 0) { + ret = -1; + goto done; + } + + memset(&key, 0, sizeof(key)); + + key.type = info.type; + + reg = opq_regh_find(&opq_reg_hash, &key); + if (reg == NULL) { + /* Weird: unregister for unknown message? */ + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: unknown client %s/%u/%u unregisters for unknown type %u", + __func__, + zebra_route_string(info.proto), + info.instance, info.session_id, info.type); + goto done; + } + + /* Look for client */ + for (client = reg->clients; client != NULL; + client = client->next) { + if (opq_client_match(client, &info)) + break; + } + + if (client == NULL) { + /* Oops - unregister for unknown client? */ + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: unknown client %s/%u/%u unregisters for %u", + __func__, zebra_route_string(info.proto), + info.instance, info.session_id, info.type); + goto done; + } + + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: client %s unregisters for %u", + __func__, opq_client2str(buf, sizeof(buf), client), + info.type); + + if (client->prev) + client->prev->next = client->next; + if (client->next) + client->next->prev = client->prev; + if (reg->clients == client) + reg->clients = client->next; + + opq_client_free(&client); + + /* Is registration empty now? */ + if (reg->clients == NULL) { + opq_regh_del(&opq_reg_hash, reg); + opq_reg_free(®); + } + +done: + + stream_free(msg); + return ret; +} + +/* Compare utility for registered clients */ +static bool opq_client_match(const struct opq_client_reg *client, + const struct zapi_opaque_reg_info *info) +{ + if (client->proto == info->proto && + client->instance == info->instance && + client->session_id == info->session_id) + return true; + else + return false; +} + +static struct opq_msg_reg *opq_reg_lookup(uint32_t type) +{ + struct opq_msg_reg key, *reg; + + memset(&key, 0, sizeof(key)); + + key.type = type; + + reg = opq_regh_find(&opq_reg_hash, &key); + + return reg; +} + +static struct opq_msg_reg *opq_reg_alloc(uint32_t type) +{ + struct opq_msg_reg *reg; + + reg = XCALLOC(MTYPE_OPQ, sizeof(struct opq_msg_reg)); + + reg->type = type; + INIT_HASH(®->item); + + return reg; +} + +static void opq_reg_free(struct opq_msg_reg **reg) +{ + XFREE(MTYPE_OPQ, (*reg)); +} + +static struct opq_client_reg *opq_client_alloc( + const struct zapi_opaque_reg_info *info) +{ + struct opq_client_reg *client; + + client = XCALLOC(MTYPE_OPQ, sizeof(struct opq_client_reg)); + + client->proto = info->proto; + client->instance = info->instance; + client->session_id = info->session_id; + + return client; +} + +static void opq_client_free(struct opq_client_reg **client) +{ + XFREE(MTYPE_OPQ, (*client)); +} + +static const char *opq_client2str(char *buf, size_t buflen, + const struct opq_client_reg *client) +{ + char sbuf[20]; + + snprintf(buf, buflen, "%s/%u", zebra_route_string(client->proto), + client->instance); + if (client->session_id > 0) { + snprintf(sbuf, sizeof(sbuf), "/%u", client->session_id); + strlcat(buf, sbuf, buflen); + } + + return buf; +} + +/* Hash function for clients registered for messages */ +static uint32_t registration_hash(const struct opq_msg_reg *reg) +{ + return reg->type; +} + +/* Comparison function for client registrations */ +static int registration_compare(const struct opq_msg_reg *reg1, + const struct opq_msg_reg *reg2) +{ + if (reg1->type == reg2->type) + return 0; + else + return -1; +} diff --git a/zebra/zebra_opaque.h b/zebra/zebra_opaque.h new file mode 100644 index 0000000..a9610bf --- /dev/null +++ b/zebra/zebra_opaque.h @@ -0,0 +1,63 @@ +/* + * Zebra opaque message zapi message handler + * Copyright (c) 2020 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifndef _ZEBRA_OPAQUE_H +#define _ZEBRA_OPAQUE_H 1 + +/* Default for number of messages to dequeue per lock cycle */ +#define ZEBRA_OPAQUE_MSG_LIMIT 1000 + +/* + * Initialize the module at startup + */ +void zebra_opaque_init(void); + +/* + * Start the module pthread. This step is run later than the + * 'init' step, in case zebra has fork-ed. + */ +void zebra_opaque_start(void); + +/* + * Does this module handle (intercept) the specified zapi message type? + */ +bool zebra_opaque_handles_msgid(uint16_t id); + +/* + * Module stop, called from the main pthread. This is synchronous: + * once it returns, the pthread has stopped and exited. + */ +void zebra_opaque_stop(void); + +/* + * Module cleanup, called from the zebra main pthread. When it returns, + * all module cleanup is complete. + */ +void zebra_opaque_finish(void); + +/* + * Enqueue a batch of messages for processing. Returns the number dequeued + * from the batch fifo. + */ +uint32_t zebra_opaque_enqueue_batch(struct stream_fifo *batch); + + +#endif /* _ZEBRA_OPAQUE_H */ diff --git a/zebra/zebra_pbr.c b/zebra/zebra_pbr.c new file mode 100644 index 0000000..43e21a6 --- /dev/null +++ b/zebra/zebra_pbr.c @@ -0,0 +1,1454 @@ +/* Zebra Policy Based Routing (PBR) main handling. + * Copyright (C) 2018 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include <jhash.h> +#include <hash.h> +#include <memory.h> +#include <hook.h> + +#include "zebra/zebra_router.h" +#include "zebra/zebra_pbr.h" +#include "zebra/rt.h" +#include "zebra/zapi_msg.h" +#include "zebra/zserv.h" +#include "zebra/debug.h" +#include "zebra/zebra_neigh.h" + +/* definitions */ +DEFINE_MTYPE_STATIC(ZEBRA, PBR_IPTABLE_IFNAME, "PBR interface list"); + +/* definitions */ +static const struct message ipset_type_msg[] = { + {IPSET_NET_PORT_NET, "net,port,net"}, + {IPSET_NET_PORT, "net,port"}, + {IPSET_NET_NET, "net,net"}, + {IPSET_NET, "net"}, + {0} +}; + +const struct message icmp_typecode_str[] = { + { 0 << 8, "echo-reply"}, + { 0 << 8, "pong"}, + { 3 << 8, "network-unreachable"}, + { (3 << 8) + 1, "host-unreachable"}, + { (3 << 8) + 2, "protocol-unreachable"}, + { (3 << 8) + 3, "port-unreachable"}, + { (3 << 8) + 4, "fragmentation-needed"}, + { (3 << 8) + 5, "source-route-failed"}, + { (3 << 8) + 6, "network-unknown"}, + { (3 << 8) + 7, "host-unknown"}, + { (3 << 8) + 9, "network-prohibited"}, + { (3 << 8) + 10, "host-prohibited"}, + { (3 << 8) + 11, "TOS-network-unreachable"}, + { (3 << 8) + 12, "TOS-host-unreachable"}, + { (3 << 8) + 13, "communication-prohibited"}, + { (3 << 8) + 14, "host-precedence-violation"}, + { (3 << 8) + 15, "precedence-cutoff"}, + { 4 << 8, "source-quench"}, + { 5 << 8, "network-redirect"}, + { (5 << 8) + 1, "host-redirect"}, + { (5 << 8) + 2, "TOS-network-redirect"}, + { (5 << 8) + 3, "TOS-host-redirect"}, + { 8 << 8, "echo-request"}, + { 8 << 8, "ping"}, + { 9 << 8, "router-advertisement"}, + { 10 << 8, "router-solicitation"}, + { 11 << 8, "ttl-zero-during-transit"}, + { (11 << 8) + 1, "ttl-zero-during-reassembly"}, + { 12 << 8, "ip-header-bad"}, + { (12 << 8) + 1, "required-option-missing"}, + { 13 << 8, "timestamp-request"}, + { 14 << 8, "timestamp-reply"}, + { 17 << 8, "address-mask-request"}, + { 18 << 8, "address-mask-reply"}, + {0} +}; + +const struct message icmpv6_typecode_str[] = { + { 128 << 8, "echo-request"}, + { 129 << 8, "echo-reply"}, + { 1 << 8, "no-route"}, + { (1 << 8) + 1, "communication-prohibited"}, + { (1 << 8) + 3, "address-unreachable"}, + { (1 << 8) + 4, "port-unreachable"}, + { (2 << 8), "packet-too-big"}, + { 3 << 0, "ttl-zero-during-transit"}, + { (3 << 8) + 1, "ttl-zero-during-reassembly"}, + { 4 << 0, "bad-header"}, + { (4 << 0) + 1, "unknown-header-type"}, + { (4 << 0) + 2, "unknown-option"}, + { 133 << 8, "router-solicitation"}, + { 134 << 8, "router-advertisement"}, + { 135 << 8, "neighbor-solicitation"}, + { 136 << 8, "neighbor-advertisement"}, + { 137 << 8, "redirect"}, + {0} +}; + +/* definitions */ +static const struct message tcp_value_str[] = { + {TCP_HEADER_FIN, "FIN"}, + {TCP_HEADER_SYN, "SYN"}, + {TCP_HEADER_RST, "RST"}, + {TCP_HEADER_PSH, "PSH"}, + {TCP_HEADER_ACK, "ACK"}, + {TCP_HEADER_URG, "URG"}, + {0} +}; + +static const struct message fragment_value_str[] = { + {1, "dont-fragment"}, + {2, "is-fragment"}, + {4, "first-fragment"}, + {8, "last-fragment"}, + {0} +}; + +struct zebra_pbr_env_display { + struct zebra_ns *zns; + struct vty *vty; + char *name; +}; + +/* static function declarations */ +DEFINE_HOOK(zebra_pbr_ipset_entry_get_stat, + (struct zebra_pbr_ipset_entry *ipset, uint64_t *pkts, + uint64_t *bytes), + (ipset, pkts, bytes)); + +DEFINE_HOOK(zebra_pbr_iptable_get_stat, + (struct zebra_pbr_iptable *iptable, uint64_t *pkts, + uint64_t *bytes), + (iptable, pkts, bytes)); + +DEFINE_HOOK(zebra_pbr_iptable_update, + (int cmd, struct zebra_pbr_iptable *iptable), (cmd, iptable)); + +DEFINE_HOOK(zebra_pbr_ipset_entry_update, + (int cmd, struct zebra_pbr_ipset_entry *ipset), (cmd, ipset)); + +DEFINE_HOOK(zebra_pbr_ipset_update, + (int cmd, struct zebra_pbr_ipset *ipset), (cmd, ipset)); + +/* resolve nexthop for dataplane (dpdk) programming */ +static bool zebra_pbr_expand_action; + +/* Private functions */ + +/* Public functions */ +void zebra_pbr_rules_free(void *arg) +{ + struct zebra_pbr_rule *rule; + + rule = (struct zebra_pbr_rule *)arg; + + (void)dplane_pbr_rule_delete(rule); + XFREE(MTYPE_TMP, rule); +} + +uint32_t zebra_pbr_rules_hash_key(const void *arg) +{ + const struct zebra_pbr_rule *rule; + uint32_t key; + + rule = arg; + key = jhash_3words(rule->rule.seq, rule->rule.priority, + rule->rule.action.table, + prefix_hash_key(&rule->rule.filter.src_ip)); + + key = jhash_3words(rule->rule.filter.fwmark, rule->vrf_id, + rule->rule.filter.ip_proto, key); + + key = jhash(rule->ifname, strlen(rule->ifname), key); + + return jhash_3words(rule->rule.filter.src_port, + rule->rule.filter.dst_port, + prefix_hash_key(&rule->rule.filter.dst_ip), + jhash_1word(rule->rule.unique, key)); +} + +bool zebra_pbr_rules_hash_equal(const void *arg1, const void *arg2) +{ + const struct zebra_pbr_rule *r1, *r2; + + r1 = (const struct zebra_pbr_rule *)arg1; + r2 = (const struct zebra_pbr_rule *)arg2; + + if (r1->rule.seq != r2->rule.seq) + return false; + + if (r1->rule.priority != r2->rule.priority) + return false; + + if (r1->rule.unique != r2->rule.unique) + return false; + + if (r1->rule.action.table != r2->rule.action.table) + return false; + + if (r1->rule.filter.src_port != r2->rule.filter.src_port) + return false; + + if (r1->rule.filter.dst_port != r2->rule.filter.dst_port) + return false; + + if (r1->rule.filter.fwmark != r2->rule.filter.fwmark) + return false; + + if (r1->rule.filter.ip_proto != r2->rule.filter.ip_proto) + return false; + + if (!prefix_same(&r1->rule.filter.src_ip, &r2->rule.filter.src_ip)) + return false; + + if (!prefix_same(&r1->rule.filter.dst_ip, &r2->rule.filter.dst_ip)) + return false; + + if (strcmp(r1->rule.ifname, r2->rule.ifname) != 0) + return false; + + if (r1->vrf_id != r2->vrf_id) + return false; + + return true; +} + +struct pbr_rule_unique_lookup { + struct zebra_pbr_rule *rule; + uint32_t unique; + char ifname[INTERFACE_NAMSIZ + 1]; + vrf_id_t vrf_id; +}; + +static int pbr_rule_lookup_unique_walker(struct hash_bucket *b, void *data) +{ + struct pbr_rule_unique_lookup *pul = data; + struct zebra_pbr_rule *rule = b->data; + + if (pul->unique == rule->rule.unique + && strncmp(pul->ifname, rule->rule.ifname, INTERFACE_NAMSIZ) == 0 + && pul->vrf_id == rule->vrf_id) { + pul->rule = rule; + return HASHWALK_ABORT; + } + + return HASHWALK_CONTINUE; +} + +static struct zebra_pbr_rule * +pbr_rule_lookup_unique(struct zebra_pbr_rule *zrule) +{ + struct pbr_rule_unique_lookup pul; + + pul.unique = zrule->rule.unique; + strlcpy(pul.ifname, zrule->rule.ifname, INTERFACE_NAMSIZ); + pul.rule = NULL; + pul.vrf_id = zrule->vrf_id; + hash_walk(zrouter.rules_hash, &pbr_rule_lookup_unique_walker, &pul); + + return pul.rule; +} + +void zebra_pbr_ipset_free(void *arg) +{ + struct zebra_pbr_ipset *ipset; + + ipset = (struct zebra_pbr_ipset *)arg; + hook_call(zebra_pbr_ipset_update, 0, ipset); + XFREE(MTYPE_TMP, ipset); +} + +uint32_t zebra_pbr_ipset_hash_key(const void *arg) +{ + const struct zebra_pbr_ipset *ipset = arg; + uint32_t *pnt = (uint32_t *)&ipset->ipset_name; + uint32_t key = jhash_1word(ipset->vrf_id, 0x63ab42de); + + key = jhash_1word(ipset->family, key); + + return jhash2(pnt, ZEBRA_IPSET_NAME_HASH_SIZE, key); +} + +bool zebra_pbr_ipset_hash_equal(const void *arg1, const void *arg2) +{ + const struct zebra_pbr_ipset *r1, *r2; + + r1 = (const struct zebra_pbr_ipset *)arg1; + r2 = (const struct zebra_pbr_ipset *)arg2; + + if (r1->type != r2->type) + return false; + if (r1->unique != r2->unique) + return false; + if (r1->vrf_id != r2->vrf_id) + return false; + if (r1->family != r2->family) + return false; + + if (strncmp(r1->ipset_name, r2->ipset_name, + ZEBRA_IPSET_NAME_SIZE)) + return false; + return true; +} + +void zebra_pbr_ipset_entry_free(void *arg) +{ + struct zebra_pbr_ipset_entry *ipset; + + ipset = (struct zebra_pbr_ipset_entry *)arg; + + hook_call(zebra_pbr_ipset_entry_update, 0, ipset); + + XFREE(MTYPE_TMP, ipset); +} + +uint32_t zebra_pbr_ipset_entry_hash_key(const void *arg) +{ + const struct zebra_pbr_ipset_entry *ipset; + uint32_t key; + + ipset = arg; + key = prefix_hash_key(&ipset->src); + key = jhash_1word(ipset->unique, key); + key = jhash_1word(prefix_hash_key(&ipset->dst), key); + key = jhash(&ipset->dst_port_min, 2, key); + key = jhash(&ipset->dst_port_max, 2, key); + key = jhash(&ipset->src_port_min, 2, key); + key = jhash(&ipset->src_port_max, 2, key); + key = jhash(&ipset->proto, 1, key); + + return key; +} + +bool zebra_pbr_ipset_entry_hash_equal(const void *arg1, const void *arg2) +{ + const struct zebra_pbr_ipset_entry *r1, *r2; + + r1 = (const struct zebra_pbr_ipset_entry *)arg1; + r2 = (const struct zebra_pbr_ipset_entry *)arg2; + + if (r1->unique != r2->unique) + return false; + + if (!prefix_same(&r1->src, &r2->src)) + return false; + + if (!prefix_same(&r1->dst, &r2->dst)) + return false; + + if (r1->src_port_min != r2->src_port_min) + return false; + + if (r1->src_port_max != r2->src_port_max) + return false; + + if (r1->dst_port_min != r2->dst_port_min) + return false; + + if (r1->dst_port_max != r2->dst_port_max) + return false; + + if (r1->proto != r2->proto) + return false; + return true; +} + +/* this function gives option to flush plugin memory contexts + * with all parameter. set it to true to flush all + * set it to false to flush only passed arg argument + */ +static void _zebra_pbr_iptable_free_all(void *arg, bool all) +{ + struct zebra_pbr_iptable *iptable; + struct listnode *node, *nnode; + char *name; + + iptable = (struct zebra_pbr_iptable *)arg; + + if (all) + hook_call(zebra_pbr_iptable_update, 0, iptable); + + if (iptable->interface_name_list) { + for (ALL_LIST_ELEMENTS(iptable->interface_name_list, node, + nnode, name)) { + XFREE(MTYPE_PBR_IPTABLE_IFNAME, name); + list_delete_node(iptable->interface_name_list, node); + } + list_delete(&iptable->interface_name_list); + } + XFREE(MTYPE_TMP, iptable); +} + +void zebra_pbr_iptable_free(void *arg) +{ + _zebra_pbr_iptable_free_all(arg, false); +} + +uint32_t zebra_pbr_iptable_hash_key(const void *arg) +{ + const struct zebra_pbr_iptable *iptable = arg; + uint32_t *pnt = (uint32_t *)&(iptable->ipset_name); + uint32_t key; + + key = jhash2(pnt, ZEBRA_IPSET_NAME_HASH_SIZE, + 0x63ab42de); + key = jhash_1word(iptable->fwmark, key); + key = jhash_1word(iptable->family, key); + key = jhash_1word(iptable->flow_label, key); + key = jhash_1word(iptable->pkt_len_min, key); + key = jhash_1word(iptable->pkt_len_max, key); + key = jhash_1word(iptable->tcp_flags, key); + key = jhash_1word(iptable->tcp_mask_flags, key); + key = jhash_1word(iptable->dscp_value, key); + key = jhash_1word(iptable->protocol, key); + key = jhash_1word(iptable->fragment, key); + key = jhash_1word(iptable->vrf_id, key); + + return jhash_3words(iptable->filter_bm, iptable->type, + iptable->unique, key); +} + +bool zebra_pbr_iptable_hash_equal(const void *arg1, const void *arg2) +{ + const struct zebra_pbr_iptable *r1, *r2; + + r1 = (const struct zebra_pbr_iptable *)arg1; + r2 = (const struct zebra_pbr_iptable *)arg2; + + if (r1->vrf_id != r2->vrf_id) + return false; + if (r1->type != r2->type) + return false; + if (r1->unique != r2->unique) + return false; + if (r1->filter_bm != r2->filter_bm) + return false; + if (r1->fwmark != r2->fwmark) + return false; + if (r1->action != r2->action) + return false; + if (strncmp(r1->ipset_name, r2->ipset_name, + ZEBRA_IPSET_NAME_SIZE)) + return false; + if (r1->family != r2->family) + return false; + if (r1->flow_label != r2->flow_label) + return false; + if (r1->pkt_len_min != r2->pkt_len_min) + return false; + if (r1->pkt_len_max != r2->pkt_len_max) + return false; + if (r1->tcp_flags != r2->tcp_flags) + return false; + if (r1->tcp_mask_flags != r2->tcp_mask_flags) + return false; + if (r1->dscp_value != r2->dscp_value) + return false; + if (r1->fragment != r2->fragment) + return false; + if (r1->protocol != r2->protocol) + return false; + return true; +} + +static void *pbr_rule_alloc_intern(void *arg) +{ + struct zebra_pbr_rule *zpr; + struct zebra_pbr_rule *new; + + zpr = (struct zebra_pbr_rule *)arg; + + new = XCALLOC(MTYPE_TMP, sizeof(*new)); + + memcpy(new, zpr, sizeof(*zpr)); + + return new; +} + +static struct zebra_pbr_rule *pbr_rule_free(struct zebra_pbr_rule *hash_data, + bool free_data) +{ + if (hash_data->action.neigh) + zebra_neigh_deref(hash_data); + hash_release(zrouter.rules_hash, hash_data); + if (free_data) { + XFREE(MTYPE_TMP, hash_data); + return NULL; + } + + return hash_data; +} + +static struct zebra_pbr_rule *pbr_rule_release(struct zebra_pbr_rule *rule, + bool free_data) +{ + struct zebra_pbr_rule *lookup; + + lookup = hash_lookup(zrouter.rules_hash, rule); + + if (!lookup) + return NULL; + + return pbr_rule_free(lookup, free_data); +} + +void zebra_pbr_show_rule_unit(struct zebra_pbr_rule *rule, struct vty *vty) +{ + struct pbr_rule *prule = &rule->rule; + struct zebra_pbr_action *zaction = &rule->action; + + vty_out(vty, "Rules if %s\n", rule->ifname); + vty_out(vty, " Seq %u pri %u\n", prule->seq, prule->priority); + if (prule->filter.filter_bm & PBR_FILTER_SRC_IP) + vty_out(vty, " SRC IP Match: %pFX\n", &prule->filter.src_ip); + if (prule->filter.filter_bm & PBR_FILTER_DST_IP) + vty_out(vty, " DST IP Match: %pFX\n", &prule->filter.dst_ip); + if (prule->filter.filter_bm & PBR_FILTER_IP_PROTOCOL) + vty_out(vty, " IP protocol Match: %u\n", + prule->filter.ip_proto); + if (prule->filter.filter_bm & PBR_FILTER_SRC_PORT) + vty_out(vty, " SRC Port Match: %u\n", prule->filter.src_port); + if (prule->filter.filter_bm & PBR_FILTER_DST_PORT) + vty_out(vty, " DST Port Match: %u\n", prule->filter.dst_port); + + if (prule->filter.filter_bm & PBR_FILTER_DSFIELD) { + vty_out(vty, " DSCP Match: %u\n", + (prule->filter.dsfield & PBR_DSFIELD_DSCP) >> 2); + vty_out(vty, " ECN Match: %u\n", + prule->filter.dsfield & PBR_DSFIELD_ECN); + } + + if (prule->filter.filter_bm & PBR_FILTER_FWMARK) + vty_out(vty, " MARK Match: %u\n", prule->filter.fwmark); + + vty_out(vty, " Tableid: %u\n", prule->action.table); + if (zaction->afi == AFI_IP) + vty_out(vty, " Action: nh: %pI4 intf: %s\n", + &zaction->gate.ipv4, + ifindex2ifname(zaction->ifindex, rule->vrf_id)); + if (zaction->afi == AFI_IP6) + vty_out(vty, " Action: nh: %pI6 intf: %s\n", + &zaction->gate.ipv6, + ifindex2ifname(zaction->ifindex, rule->vrf_id)); + if (zaction->neigh && (zaction->neigh->flags & ZEBRA_NEIGH_ENT_ACTIVE)) + vty_out(vty, " Action: mac: %pEA\n", &zaction->neigh->mac); +} + +static int zebra_pbr_show_rules_walkcb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_pbr_rule *rule = (struct zebra_pbr_rule *)bucket->data; + struct zebra_pbr_env_display *env = (struct zebra_pbr_env_display *)arg; + struct vty *vty = env->vty; + + zebra_pbr_show_rule_unit(rule, vty); + + return HASHWALK_CONTINUE; +} + +void zebra_pbr_show_rule(struct vty *vty) +{ + struct zebra_pbr_env_display env; + + env.vty = vty; + hash_walk(zrouter.rules_hash, zebra_pbr_show_rules_walkcb, &env); +} + +void zebra_pbr_config_write(struct vty *vty) +{ + if (zebra_pbr_expand_action) + vty_out(vty, "pbr nexthop-resolve\n"); +} + +void zebra_pbr_expand_action_update(bool enable) +{ + zebra_pbr_expand_action = enable; +} + +static void zebra_pbr_expand_rule(struct zebra_pbr_rule *rule) +{ + struct prefix p; + struct route_table *table; + struct route_node *rn; + rib_dest_t *dest; + struct route_entry *re; + const struct nexthop_group *nhg; + const struct nexthop *nexthop; + struct zebra_pbr_action *action = &rule->action; + struct ipaddr ip; + + if (!zebra_pbr_expand_action) + return; + + table = zebra_vrf_get_table_with_table_id( + AFI_IP, SAFI_UNICAST, VRF_DEFAULT, rule->rule.action.table); + if (!table) + return; + + memset(&p, 0, sizeof(p)); + p.family = AF_INET; + + rn = route_node_lookup(table, &p); + if (!rn) + return; + + dest = rib_dest_from_rnode(rn); + re = dest->selected_fib; + if (!re) { + route_unlock_node(rn); + return; + } + + nhg = rib_get_fib_nhg(re); + if (!nhg) { + route_unlock_node(rn); + return; + } + + nexthop = nhg->nexthop; + if (nexthop) { + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + action->afi = AFI_IP; + action->gate.ipv4 = nexthop->gate.ipv4; + action->ifindex = nexthop->ifindex; + ip.ipa_type = AF_INET; + ip.ipaddr_v4 = action->gate.ipv4; + zebra_neigh_ref(action->ifindex, &ip, rule); + break; + + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + action->afi = AFI_IP6; + action->gate.ipv6 = nexthop->gate.ipv6; + action->ifindex = nexthop->ifindex; + ip.ipa_type = AF_INET6; + ip.ipaddr_v6 = action->gate.ipv6; + zebra_neigh_ref(action->ifindex, &ip, rule); + break; + + default: + action->afi = AFI_UNSPEC; + } + } + + route_unlock_node(rn); +} + +void zebra_pbr_add_rule(struct zebra_pbr_rule *rule) +{ + struct zebra_pbr_rule *found; + struct zebra_pbr_rule *old; + struct zebra_pbr_rule *new; + + /** + * Check if we already have it (this checks via a unique ID, walking + * over the hash table, not via a hash operation). + */ + found = pbr_rule_lookup_unique(rule); + + /* If found, this is an update */ + if (found) { + if (IS_ZEBRA_DEBUG_PBR) + zlog_debug( + "%s: seq: %d, prior: %d, unique: %d, ifname: %s -- update", + __func__, rule->rule.seq, rule->rule.priority, + rule->rule.unique, rule->rule.ifname); + + /* remove the old entry from the hash but don't free the hash + * data yet as we need it for the dplane update + */ + old = pbr_rule_release(found, false); + + /* insert new entry into hash */ + new = hash_get(zrouter.rules_hash, rule, pbr_rule_alloc_intern); + /* expand the action if needed */ + zebra_pbr_expand_rule(new); + /* update dataplane */ + (void)dplane_pbr_rule_update(found, new); + /* release the old hash data */ + if (old) + XFREE(MTYPE_TMP, old); + } else { + if (IS_ZEBRA_DEBUG_PBR) + zlog_debug( + "%s: seq: %d, prior: %d, unique: %d, ifname: %s -- new", + __func__, rule->rule.seq, rule->rule.priority, + rule->rule.unique, rule->rule.ifname); + + /* insert new entry into hash */ + new = hash_get(zrouter.rules_hash, rule, pbr_rule_alloc_intern); + /* expand the action if needed */ + zebra_pbr_expand_rule(new); + (void)dplane_pbr_rule_add(new); + } + +} + +void zebra_pbr_del_rule(struct zebra_pbr_rule *rule) +{ + if (IS_ZEBRA_DEBUG_PBR) + zlog_debug("%s: seq: %d, prior: %d, unique: %d, ifname: %s", + __func__, rule->rule.seq, rule->rule.priority, + rule->rule.unique, rule->rule.ifname); + + (void)dplane_pbr_rule_delete(rule); + + if (pbr_rule_release(rule, true)) + zlog_debug("%s: Rule being deleted we know nothing about", + __func__); +} + +void zebra_pbr_process_iptable(struct zebra_dplane_ctx *ctx) +{ + int mode, ret = 0; + struct zebra_pbr_iptable ipt; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_IPTABLE_ADD) + mode = 1; + else + mode = 0; + + dplane_ctx_get_pbr_iptable(ctx, &ipt); + + ret = hook_call(zebra_pbr_iptable_update, mode, &ipt); + if (ret) + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + else + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_FAILURE); +} + +void zebra_pbr_process_ipset(struct zebra_dplane_ctx *ctx) +{ + int mode, ret = 0; + struct zebra_pbr_ipset ipset; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_IPSET_ADD) + mode = 1; + else + mode = 0; + + dplane_ctx_get_pbr_ipset(ctx, &ipset); + + ret = hook_call(zebra_pbr_ipset_update, mode, &ipset); + if (ret) + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + else + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_FAILURE); +} + +void zebra_pbr_process_ipset_entry(struct zebra_dplane_ctx *ctx) +{ + int mode, ret = 0; + struct zebra_pbr_ipset_entry ipset_entry; + struct zebra_pbr_ipset ipset; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_IPSET_ENTRY_ADD) + mode = 1; + else + mode = 0; + + dplane_ctx_get_pbr_ipset_entry(ctx, &ipset_entry); + dplane_ctx_get_pbr_ipset(ctx, &ipset); + + ipset_entry.backpointer = &ipset; + + ret = hook_call(zebra_pbr_ipset_entry_update, mode, &ipset_entry); + if (ret) + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + else + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_FAILURE); +} + +static void zebra_pbr_cleanup_rules(struct hash_bucket *b, void *data) +{ + struct zebra_pbr_rule *rule = b->data; + int *sock = data; + + if (rule->sock == *sock) { + (void)dplane_pbr_rule_delete(rule); + pbr_rule_free(rule, true); + } +} + +static void zebra_pbr_cleanup_ipset(struct hash_bucket *b, void *data) +{ + struct zebra_pbr_ipset *ipset = b->data; + int *sock = data; + + if (ipset->sock == *sock) { + if (hash_release(zrouter.ipset_hash, ipset)) + zebra_pbr_ipset_free(ipset); + else + hook_call(zebra_pbr_ipset_update, 0, ipset); + } +} + +static void zebra_pbr_cleanup_ipset_entry(struct hash_bucket *b, void *data) +{ + struct zebra_pbr_ipset_entry *ipset = b->data; + int *sock = data; + + if (ipset->sock == *sock) { + if (hash_release(zrouter.ipset_entry_hash, ipset)) + zebra_pbr_ipset_entry_free(ipset); + else + hook_call(zebra_pbr_ipset_entry_update, 0, ipset); + } +} + +static void zebra_pbr_cleanup_iptable(struct hash_bucket *b, void *data) +{ + struct zebra_pbr_iptable *iptable = b->data; + int *sock = data; + + if (iptable->sock == *sock) { + if (hash_release(zrouter.iptable_hash, iptable)) + _zebra_pbr_iptable_free_all(iptable, true); + else + hook_call(zebra_pbr_iptable_update, 0, iptable); + } +} + +static int zebra_pbr_client_close_cleanup(struct zserv *client) +{ + int sock = client->sock; + + if (!sock) + return 0; + hash_iterate(zrouter.rules_hash, zebra_pbr_cleanup_rules, &sock); + hash_iterate(zrouter.iptable_hash, zebra_pbr_cleanup_iptable, &sock); + hash_iterate(zrouter.ipset_entry_hash, zebra_pbr_cleanup_ipset_entry, + &sock); + hash_iterate(zrouter.ipset_hash, zebra_pbr_cleanup_ipset, &sock); + return 1; +} + +void zebra_pbr_init(void) +{ + hook_register(zserv_client_close, zebra_pbr_client_close_cleanup); +} + +static void *pbr_ipset_alloc_intern(void *arg) +{ + struct zebra_pbr_ipset *zpi; + struct zebra_pbr_ipset *new; + + zpi = (struct zebra_pbr_ipset *)arg; + + new = XCALLOC(MTYPE_TMP, sizeof(struct zebra_pbr_ipset)); + + memcpy(new, zpi, sizeof(*zpi)); + + return new; +} + +void zebra_pbr_create_ipset(struct zebra_pbr_ipset *ipset) +{ + (void)hash_get(zrouter.ipset_hash, ipset, pbr_ipset_alloc_intern); + (void)dplane_pbr_ipset_add(ipset); +} + +void zebra_pbr_destroy_ipset(struct zebra_pbr_ipset *ipset) +{ + struct zebra_pbr_ipset *lookup; + + lookup = hash_lookup(zrouter.ipset_hash, ipset); + (void)dplane_pbr_ipset_delete(ipset); + if (lookup) { + hash_release(zrouter.ipset_hash, lookup); + XFREE(MTYPE_TMP, lookup); + } else + zlog_debug( + "%s: IPSet Entry being deleted we know nothing about", + __func__); +} + +struct pbr_ipset_name_lookup { + struct zebra_pbr_ipset *ipset; + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; +}; + +const char *zebra_pbr_ipset_type2str(uint32_t type) +{ + return lookup_msg(ipset_type_msg, type, + "Unrecognized IPset Type"); +} + +static int zebra_pbr_ipset_pername_walkcb(struct hash_bucket *bucket, void *arg) +{ + struct pbr_ipset_name_lookup *pinl = + (struct pbr_ipset_name_lookup *)arg; + struct zebra_pbr_ipset *zpi = (struct zebra_pbr_ipset *)bucket->data; + + if (!strncmp(pinl->ipset_name, zpi->ipset_name, + ZEBRA_IPSET_NAME_SIZE)) { + pinl->ipset = zpi; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +struct zebra_pbr_ipset *zebra_pbr_lookup_ipset_pername(char *ipsetname) +{ + struct pbr_ipset_name_lookup pinl; + struct pbr_ipset_name_lookup *ptr = &pinl; + + if (!ipsetname) + return NULL; + memset(ptr, 0, sizeof(struct pbr_ipset_name_lookup)); + snprintf((char *)ptr->ipset_name, ZEBRA_IPSET_NAME_SIZE, "%s", + ipsetname); + hash_walk(zrouter.ipset_hash, zebra_pbr_ipset_pername_walkcb, ptr); + return ptr->ipset; +} + +static void *pbr_ipset_entry_alloc_intern(void *arg) +{ + struct zebra_pbr_ipset_entry *zpi; + struct zebra_pbr_ipset_entry *new; + + zpi = (struct zebra_pbr_ipset_entry *)arg; + + new = XCALLOC(MTYPE_TMP, sizeof(struct zebra_pbr_ipset_entry)); + + memcpy(new, zpi, sizeof(*zpi)); + + return new; +} + +void zebra_pbr_add_ipset_entry(struct zebra_pbr_ipset_entry *ipset) +{ + (void)hash_get(zrouter.ipset_entry_hash, ipset, + pbr_ipset_entry_alloc_intern); + (void)dplane_pbr_ipset_entry_add(ipset); +} + +void zebra_pbr_del_ipset_entry(struct zebra_pbr_ipset_entry *ipset) +{ + struct zebra_pbr_ipset_entry *lookup; + + lookup = hash_lookup(zrouter.ipset_entry_hash, ipset); + (void)dplane_pbr_ipset_entry_delete(ipset); + if (lookup) { + hash_release(zrouter.ipset_entry_hash, lookup); + XFREE(MTYPE_TMP, lookup); + } else + zlog_debug("%s: IPSet being deleted we know nothing about", + __func__); +} + +static void *pbr_iptable_alloc_intern(void *arg) +{ + struct zebra_pbr_iptable *zpi; + struct zebra_pbr_iptable *new; + struct listnode *ln; + char *ifname; + + zpi = (struct zebra_pbr_iptable *)arg; + + new = XCALLOC(MTYPE_TMP, sizeof(struct zebra_pbr_iptable)); + + /* Deep structure copy */ + memcpy(new, zpi, sizeof(*zpi)); + new->interface_name_list = list_new(); + + if (zpi->interface_name_list) { + for (ALL_LIST_ELEMENTS_RO(zpi->interface_name_list, ln, ifname)) + listnode_add(new->interface_name_list, + XSTRDUP(MTYPE_PBR_IPTABLE_IFNAME, ifname)); + } + + return new; +} + +void zebra_pbr_add_iptable(struct zebra_pbr_iptable *iptable) +{ + struct zebra_pbr_iptable *ipt_hash; + + ipt_hash = hash_get(zrouter.iptable_hash, iptable, + pbr_iptable_alloc_intern); + (void)dplane_pbr_iptable_add(ipt_hash); +} + +void zebra_pbr_del_iptable(struct zebra_pbr_iptable *iptable) +{ + struct zebra_pbr_iptable *lookup; + + lookup = hash_lookup(zrouter.iptable_hash, iptable); + (void)dplane_pbr_iptable_delete(iptable); + if (lookup) { + struct listnode *node, *nnode; + char *name; + + hash_release(zrouter.iptable_hash, lookup); + for (ALL_LIST_ELEMENTS(iptable->interface_name_list, + node, nnode, name)) { + XFREE(MTYPE_PBR_IPTABLE_IFNAME, name); + list_delete_node(iptable->interface_name_list, + node); + } + list_delete(&iptable->interface_name_list); + XFREE(MTYPE_TMP, lookup); + } else + zlog_debug("%s: IPTable being deleted we know nothing about", + __func__); +} + +/* + * Handle success or failure of rule (un)install in the kernel. + */ +void zebra_pbr_dplane_result(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result res; + enum dplane_op_e op; + + res = dplane_ctx_get_status(ctx); + op = dplane_ctx_get_op(ctx); + if (op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE) + zsend_rule_notify_owner(ctx, res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_RULE_INSTALLED + : ZAPI_RULE_FAIL_INSTALL); + else if (op == DPLANE_OP_RULE_DELETE) + zsend_rule_notify_owner(ctx, res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_RULE_REMOVED + : ZAPI_RULE_FAIL_REMOVE); + else if (op == DPLANE_OP_IPTABLE_ADD) + zsend_iptable_notify_owner(ctx, + res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_IPTABLE_INSTALLED + : ZAPI_IPTABLE_FAIL_INSTALL); + else if (op == DPLANE_OP_IPTABLE_DELETE) + zsend_iptable_notify_owner(ctx, + res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_IPTABLE_REMOVED + : ZAPI_IPTABLE_FAIL_REMOVE); + else if (op == DPLANE_OP_IPSET_ADD) + zsend_ipset_notify_owner(ctx, + res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_IPSET_INSTALLED + : ZAPI_IPSET_FAIL_INSTALL); + else if (op == DPLANE_OP_IPSET_DELETE) + zsend_ipset_notify_owner(ctx, + res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_IPSET_REMOVED + : ZAPI_IPSET_FAIL_REMOVE); + else if (op == DPLANE_OP_IPSET_ENTRY_ADD) + zsend_ipset_entry_notify_owner( + ctx, res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_IPSET_ENTRY_INSTALLED + : ZAPI_IPSET_ENTRY_FAIL_INSTALL); + else if (op == DPLANE_OP_IPSET_ENTRY_DELETE) + zsend_ipset_entry_notify_owner( + ctx, res == ZEBRA_DPLANE_REQUEST_SUCCESS + ? ZAPI_IPSET_ENTRY_REMOVED + : ZAPI_IPSET_ENTRY_FAIL_REMOVE); + else + flog_err( + EC_ZEBRA_PBR_RULE_UPDATE, + "Context received in pbr rule dplane result handler with incorrect OP code (%u)", + op); +} + +/* + * Handle rule delete notification from kernel. + */ +int kernel_pbr_rule_del(struct zebra_pbr_rule *rule) +{ + return 0; +} + +struct zebra_pbr_ipset_entry_unique_display { + struct zebra_pbr_ipset *zpi; + struct vty *vty; + struct zebra_ns *zns; +}; + + +static const char *zebra_pbr_prefix2str(union prefixconstptr pu, + char *str, int size) +{ + const struct prefix *p = pu.p; + char buf[PREFIX2STR_BUFFER]; + + if ((p->family == AF_INET && p->prefixlen == IPV4_MAX_BITLEN) + || (p->family == AF_INET6 && p->prefixlen == IPV6_MAX_BITLEN)) { + snprintf(str, size, "%s", inet_ntop(p->family, &p->u.prefix, + buf, PREFIX2STR_BUFFER)); + return str; + } + return prefix2str(pu, str, size); +} + +static void zebra_pbr_display_icmp(struct vty *vty, + struct zebra_pbr_ipset_entry *zpie) +{ + char decoded_str[20]; + uint16_t port; + struct zebra_pbr_ipset *zpi; + + zpi = zpie->backpointer; + + /* range icmp type */ + if (zpie->src_port_max || zpie->dst_port_max) { + vty_out(vty, ":icmp:[type <%u:%u>;code <%u:%u>", + zpie->src_port_min, zpie->src_port_max, + zpie->dst_port_min, zpie->dst_port_max); + } else { + port = ((zpie->src_port_min << 8) & 0xff00) + + (zpie->dst_port_min & 0xff); + memset(decoded_str, 0, sizeof(decoded_str)); + snprintf(decoded_str, sizeof(decoded_str), "%u/%u", + zpie->src_port_min, zpie->dst_port_min); + vty_out(vty, ":%s:%s", + zpi->family == AF_INET6 ? "ipv6-icmp" : "icmp", + lookup_msg(zpi->family == AF_INET6 ? + icmpv6_typecode_str : icmp_typecode_str, + port, decoded_str)); + } +} + +static void zebra_pbr_display_port(struct vty *vty, uint32_t filter_bm, + uint16_t port_min, uint16_t port_max, + uint8_t proto) +{ + if (!(filter_bm & PBR_FILTER_PROTO)) { + if (port_max) + vty_out(vty, ":udp/tcp:%d-%d", + port_min, port_max); + else + vty_out(vty, ":udp/tcp:%d", + port_min); + } else { + if (port_max) + vty_out(vty, ":proto %d:%d-%d", + proto, port_min, port_max); + else + vty_out(vty, ":proto %d:%d", + proto, port_min); + } +} + +static int zebra_pbr_show_ipset_entry_walkcb(struct hash_bucket *bucket, + void *arg) +{ + struct zebra_pbr_ipset_entry_unique_display *unique = + (struct zebra_pbr_ipset_entry_unique_display *)arg; + struct zebra_pbr_ipset *zpi = unique->zpi; + struct vty *vty = unique->vty; + struct zebra_pbr_ipset_entry *zpie = + (struct zebra_pbr_ipset_entry *)bucket->data; + uint64_t pkts = 0, bytes = 0; + int ret = 0; + + if (zpie->backpointer != zpi) + return HASHWALK_CONTINUE; + + if ((zpi->type == IPSET_NET_NET) || + (zpi->type == IPSET_NET_PORT_NET)) { + char buf[PREFIX_STRLEN]; + + zebra_pbr_prefix2str(&(zpie->src), buf, sizeof(buf)); + vty_out(vty, "\tfrom %s", buf); + if (zpie->filter_bm & PBR_FILTER_SRC_PORT && + zpie->proto != IPPROTO_ICMP) + zebra_pbr_display_port(vty, zpie->filter_bm, + zpie->src_port_min, + zpie->src_port_max, + zpie->proto); + vty_out(vty, " to "); + zebra_pbr_prefix2str(&(zpie->dst), buf, sizeof(buf)); + vty_out(vty, "%s", buf); + if (zpie->filter_bm & PBR_FILTER_DST_PORT && + zpie->proto != IPPROTO_ICMP) + zebra_pbr_display_port(vty, zpie->filter_bm, + zpie->dst_port_min, + zpie->dst_port_max, + zpie->proto); + if (zpie->proto == IPPROTO_ICMP) + zebra_pbr_display_icmp(vty, zpie); + } else if ((zpi->type == IPSET_NET) || + (zpi->type == IPSET_NET_PORT)) { + char buf[PREFIX_STRLEN]; + + if (zpie->filter_bm & PBR_FILTER_SRC_IP) { + zebra_pbr_prefix2str(&(zpie->src), buf, sizeof(buf)); + vty_out(vty, "\tfrom %s", buf); + } + if (zpie->filter_bm & PBR_FILTER_SRC_PORT && + zpie->proto != IPPROTO_ICMP) + zebra_pbr_display_port(vty, zpie->filter_bm, + zpie->src_port_min, + zpie->src_port_max, + zpie->proto); + if (zpie->filter_bm & PBR_FILTER_DST_IP) { + zebra_pbr_prefix2str(&(zpie->dst), buf, sizeof(buf)); + vty_out(vty, "\tto %s", buf); + } + if (zpie->filter_bm & PBR_FILTER_DST_PORT && + zpie->proto != IPPROTO_ICMP) + zebra_pbr_display_port(vty, zpie->filter_bm, + zpie->dst_port_min, + zpie->dst_port_max, + zpie->proto); + if (zpie->proto == IPPROTO_ICMP) + zebra_pbr_display_icmp(vty, zpie); + } + vty_out(vty, " (%u)\n", zpie->unique); + + ret = hook_call(zebra_pbr_ipset_entry_get_stat, zpie, &pkts, + &bytes); + if (ret && pkts > 0) + vty_out(vty, "\t pkts %" PRIu64 ", bytes %" PRIu64"\n", + pkts, bytes); + return HASHWALK_CONTINUE; +} + +static int zebra_pbr_show_ipset_walkcb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_pbr_env_display *uniqueipset = + (struct zebra_pbr_env_display *)arg; + struct zebra_pbr_ipset *zpi = (struct zebra_pbr_ipset *)bucket->data; + struct zebra_pbr_ipset_entry_unique_display unique; + struct vty *vty = uniqueipset->vty; + struct zebra_ns *zns = uniqueipset->zns; + + vty_out(vty, "IPset %s type %s family %s\n", zpi->ipset_name, + zebra_pbr_ipset_type2str(zpi->type), + family2str(zpi->family)); + unique.vty = vty; + unique.zpi = zpi; + unique.zns = zns; + hash_walk(zrouter.ipset_entry_hash, zebra_pbr_show_ipset_entry_walkcb, + &unique); + vty_out(vty, "\n"); + return HASHWALK_CONTINUE; +} + +size_t zebra_pbr_tcpflags_snprintf(char *buffer, size_t len, + uint16_t tcp_val) +{ + size_t len_written = 0; + static struct message nt = {0}; + const struct message *pnt; + int incr = 0; + + for (pnt = tcp_value_str; + memcmp(pnt, &nt, sizeof(struct message)); pnt++) + if (pnt->key & tcp_val) { + len_written += snprintf(buffer + len_written, + len - len_written, + "%s%s", incr ? + ",":"", pnt->str); + incr++; + } + return len_written; +} + +/* + */ +void zebra_pbr_show_ipset_list(struct vty *vty, char *ipsetname) +{ + struct zebra_pbr_ipset *zpi; + struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); + struct zebra_pbr_ipset_entry_unique_display unique; + struct zebra_pbr_env_display uniqueipset; + + if (ipsetname) { + zpi = zebra_pbr_lookup_ipset_pername(ipsetname); + if (!zpi) { + vty_out(vty, "No IPset %s found\n", ipsetname); + return; + } + vty_out(vty, "IPset %s type %s family %s\n", ipsetname, + zebra_pbr_ipset_type2str(zpi->type), + family2str(zpi->family)); + unique.vty = vty; + unique.zpi = zpi; + unique.zns = zns; + hash_walk(zrouter.ipset_entry_hash, + zebra_pbr_show_ipset_entry_walkcb, &unique); + return; + } + uniqueipset.zns = zns; + uniqueipset.vty = vty; + uniqueipset.name = NULL; + hash_walk(zrouter.ipset_hash, zebra_pbr_show_ipset_walkcb, + &uniqueipset); +} + +struct pbr_rule_fwmark_lookup { + struct zebra_pbr_rule *ptr; + uint32_t fwmark; +}; + +static int zebra_pbr_rule_lookup_fwmark_walkcb(struct hash_bucket *bucket, + void *arg) +{ + struct pbr_rule_fwmark_lookup *iprule = + (struct pbr_rule_fwmark_lookup *)arg; + struct zebra_pbr_rule *zpr = (struct zebra_pbr_rule *)bucket->data; + + if (iprule->fwmark == zpr->rule.filter.fwmark) { + iprule->ptr = zpr; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static void zebra_pbr_show_iptable_unit(struct zebra_pbr_iptable *iptable, + struct vty *vty, + struct zebra_ns *zns) +{ + int ret; + uint64_t pkts = 0, bytes = 0; + + vty_out(vty, "IPtable %s family %s action %s (%u)\n", + iptable->ipset_name, + family2str(iptable->family), + iptable->action == ZEBRA_IPTABLES_DROP ? "drop" : "redirect", + iptable->unique); + if (iptable->type == IPSET_NET_PORT || + iptable->type == IPSET_NET_PORT_NET) { + if (!(iptable->filter_bm & MATCH_ICMP_SET)) { + if (iptable->filter_bm & PBR_FILTER_DST_PORT) + vty_out(vty, "\t lookup dst port\n"); + else if (iptable->filter_bm & PBR_FILTER_SRC_PORT) + vty_out(vty, "\t lookup src port\n"); + } + } + if (iptable->pkt_len_min || iptable->pkt_len_max) { + if (!iptable->pkt_len_max) + vty_out(vty, "\t pkt len %u\n", + iptable->pkt_len_min); + else + vty_out(vty, "\t pkt len [%u;%u]\n", + iptable->pkt_len_min, + iptable->pkt_len_max); + } + if (iptable->tcp_flags || iptable->tcp_mask_flags) { + char tcp_flag_str[64]; + char tcp_flag_mask_str[64]; + + zebra_pbr_tcpflags_snprintf(tcp_flag_str, + sizeof(tcp_flag_str), + iptable->tcp_flags); + zebra_pbr_tcpflags_snprintf(tcp_flag_mask_str, + sizeof(tcp_flag_mask_str), + iptable->tcp_mask_flags); + vty_out(vty, "\t tcpflags [%s/%s]\n", + tcp_flag_str, tcp_flag_mask_str); + } + if (iptable->filter_bm & (MATCH_DSCP_SET | MATCH_DSCP_INVERSE_SET)) { + vty_out(vty, "\t dscp %s %d\n", + iptable->filter_bm & MATCH_DSCP_INVERSE_SET ? + "not" : "", iptable->dscp_value); + } + if (iptable->filter_bm & (MATCH_FLOW_LABEL_SET | + MATCH_FLOW_LABEL_INVERSE_SET)) { + vty_out(vty, "\t flowlabel %s %d\n", + iptable->filter_bm & MATCH_FLOW_LABEL_INVERSE_SET ? + "not" : "", iptable->flow_label); + } + if (iptable->fragment) { + char val_str[10]; + + snprintf(val_str, sizeof(val_str), "%d", iptable->fragment); + vty_out(vty, "\t fragment%s %s\n", + iptable->filter_bm & MATCH_FRAGMENT_INVERSE_SET ? + " not" : "", lookup_msg(fragment_value_str, + iptable->fragment, val_str)); + } + if (iptable->protocol) { + vty_out(vty, "\t protocol %d\n", + iptable->protocol); + } + ret = hook_call(zebra_pbr_iptable_get_stat, iptable, &pkts, + &bytes); + if (ret && pkts > 0) + vty_out(vty, "\t pkts %" PRIu64 ", bytes %" PRIu64"\n", + pkts, bytes); + if (iptable->action != ZEBRA_IPTABLES_DROP) { + struct pbr_rule_fwmark_lookup prfl; + + prfl.fwmark = iptable->fwmark; + prfl.ptr = NULL; + hash_walk(zrouter.rules_hash, + &zebra_pbr_rule_lookup_fwmark_walkcb, &prfl); + if (prfl.ptr) { + struct zebra_pbr_rule *zpr = prfl.ptr; + + vty_out(vty, "\t table %u, fwmark %u\n", + zpr->rule.action.table, + prfl.fwmark); + } + } +} + +static int zebra_pbr_show_iptable_walkcb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_pbr_iptable *iptable = + (struct zebra_pbr_iptable *)bucket->data; + struct zebra_pbr_env_display *env = (struct zebra_pbr_env_display *)arg; + struct vty *vty = env->vty; + struct zebra_ns *zns = env->zns; + char *iptable_name = env->name; + + if (!iptable_name) + zebra_pbr_show_iptable_unit(iptable, vty, zns); + else if (!strncmp(iptable_name, + iptable->ipset_name, + ZEBRA_IPSET_NAME_SIZE)) + zebra_pbr_show_iptable_unit(iptable, vty, zns); + return HASHWALK_CONTINUE; +} + +void zebra_pbr_show_iptable(struct vty *vty, char *iptable_name) +{ + struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); + struct zebra_pbr_env_display env; + + env.vty = vty; + env.zns = zns; + env.name = iptable_name; + hash_walk(zrouter.iptable_hash, zebra_pbr_show_iptable_walkcb, &env); +} + +void zebra_pbr_iptable_update_interfacelist(struct stream *s, + struct zebra_pbr_iptable *zpi) +{ + uint32_t i = 0, index; + struct interface *ifp; + char *name; + + for (i = 0; i < zpi->nb_interface; i++) { + STREAM_GETL(s, index); + ifp = if_lookup_by_index(index, zpi->vrf_id); + if (!ifp) + continue; + name = XSTRDUP(MTYPE_PBR_IPTABLE_IFNAME, ifp->name); + listnode_add(zpi->interface_name_list, name); + } +stream_failure: + return; +} diff --git a/zebra/zebra_pbr.h b/zebra/zebra_pbr.h new file mode 100644 index 0000000..baa8755 --- /dev/null +++ b/zebra/zebra_pbr.h @@ -0,0 +1,299 @@ +/* + * Zebra Policy Based Routing (PBR) Data structures and definitions + * These are public definitions referenced by multiple files. + * Copyright (C) 2018 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_PBR_H +#define _ZEBRA_PBR_H + +#include <zebra.h> + +#include "prefix.h" +#include "if.h" + +#include "rt.h" +#include "pbr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct zebra_pbr_action { + afi_t afi; + + /* currently only one nexthop is supported */ + union g_addr gate; + + /* dest-interface */ + ifindex_t ifindex; + + /* dataplane info */ + intptr_t dp_flow_ptr; + + /* neigh */ + struct zebra_neigh_ent *neigh; + /* zebra_pbr_rule is linked to neigh via neigh_listnode */ + struct listnode neigh_listnode; +}; + +struct zebra_pbr_rule { + int sock; + + struct pbr_rule rule; + + char ifname[INTERFACE_NAMSIZ]; + + struct zebra_pbr_action action; + + vrf_id_t vrf_id; +}; + +#define IS_RULE_FILTERING_ON_SRC_IP(r) \ + (r->rule.filter.filter_bm & PBR_FILTER_SRC_IP) +#define IS_RULE_FILTERING_ON_DST_IP(r) \ + (r->rule.filter.filter_bm & PBR_FILTER_DST_IP) +#define IS_RULE_FILTERING_ON_SRC_PORT(r) \ + (r->rule.filter.filter_bm & PBR_FILTER_SRC_PORT) +#define IS_RULE_FILTERING_ON_DST_PORT(r) \ + (r->rule.filter.filter_bm & PBR_FILTER_DST_PORT) +#define IS_RULE_FILTERING_ON_DSFIELD(r) \ + (r->rule.filter.filter_bm & PBR_FILTER_DSFIELD) +#define IS_RULE_FILTERING_ON_FWMARK(r) \ + (r->rule.filter.filter_bm & PBR_FILTER_FWMARK) + +/* + * An IPSet Entry Filter + * + * This is a filter mapped on ipset entries + */ +struct zebra_pbr_ipset_info { + /* type is encoded as uint32_t + * but value is an enum ipset_type + */ + uint32_t type; + + uint8_t family; + + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; +}; + +struct zebra_pbr_ipset { + /* + * Originating zclient sock fd, so we can know who to send + * back to. + */ + int sock; + + vrf_id_t vrf_id; + + uint32_t unique; + + /* type is encoded as uint32_t + * but value is an enum ipset_type + */ + uint32_t type; + + uint8_t family; + + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; +}; + + +/* + * An IPSet Entry Filter + * + * This is a filter mapped on ipset entries + */ +struct zebra_pbr_ipset_entry { + /* + * Originating zclient sock fd, so we can know who to send + * back to. + */ + int sock; + + uint32_t unique; + + struct prefix src; + struct prefix dst; + + /* udp/tcp src port or icmp type */ + uint16_t src_port_min; + uint16_t src_port_max; + /* udp/tcp dst port or icmp code */ + uint16_t dst_port_min; + uint16_t dst_port_max; + + uint8_t proto; + + uint32_t filter_bm; + + struct zebra_pbr_ipset *backpointer; +}; + +/* + * An IPTables Action + * + * This is a filter mapped on ipset entries + */ +struct zebra_pbr_iptable { + /* + * Originating zclient sock fd, so we can know who to send + * back to. + */ + int sock; + + vrf_id_t vrf_id; + + uint32_t unique; + + /* include ipset type + */ + uint32_t type; + + /* include which IP is to be filtered + */ + uint32_t filter_bm; + + uint32_t fwmark; + + uint32_t action; + + uint16_t pkt_len_min; + uint16_t pkt_len_max; + uint16_t tcp_flags; + uint16_t tcp_mask_flags; + uint8_t dscp_value; + uint8_t fragment; + uint8_t protocol; + + uint32_t nb_interface; + uint16_t flow_label; + + uint8_t family; + + struct list *interface_name_list; + +#define IPTABLE_INSTALL_QUEUED 1 << 1 +#define IPTABLE_UNINSTALL_QUEUED 1 << 2 + uint8_t internal_flags; + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; +}; + +extern const struct message icmp_typecode_str[]; +extern const struct message icmpv6_typecode_str[]; + +const char *zebra_pbr_ipset_type2str(uint32_t type); + +void zebra_pbr_add_rule(struct zebra_pbr_rule *rule); +void zebra_pbr_del_rule(struct zebra_pbr_rule *rule); +void zebra_pbr_create_ipset(struct zebra_pbr_ipset *ipset); +void zebra_pbr_destroy_ipset(struct zebra_pbr_ipset *ipset); +struct zebra_pbr_ipset *zebra_pbr_lookup_ipset_pername(char *ipsetname); +void zebra_pbr_add_ipset_entry(struct zebra_pbr_ipset_entry *ipset); +void zebra_pbr_del_ipset_entry(struct zebra_pbr_ipset_entry *ipset); + +void zebra_pbr_add_iptable(struct zebra_pbr_iptable *iptable); +void zebra_pbr_del_iptable(struct zebra_pbr_iptable *iptable); +void zebra_pbr_process_iptable(struct zebra_dplane_ctx *ctx); +void zebra_pbr_process_ipset(struct zebra_dplane_ctx *ctx); +void zebra_pbr_process_ipset_entry(struct zebra_dplane_ctx *ctx); + +/* + * Get to know existing PBR rules in the kernel - typically called at startup. + */ +extern void kernel_read_pbr_rules(struct zebra_ns *zns); + +/* + * Handle success or failure of rule (un)install in the kernel. + */ +extern void zebra_pbr_dplane_result(struct zebra_dplane_ctx *ctx); + +/* + * Handle success or failure of ipset kinds (un)install in the kernel. + */ +extern void kernel_pbr_ipset_add_del_status(struct zebra_pbr_ipset *ipset, + enum zebra_dplane_status res); + +extern void kernel_pbr_ipset_entry_add_del_status( + struct zebra_pbr_ipset_entry *ipset, + enum zebra_dplane_status res); + +/* + * Handle rule delete notification from kernel. + */ +extern int kernel_pbr_rule_del(struct zebra_pbr_rule *rule); + +extern void zebra_pbr_rules_free(void *arg); +extern uint32_t zebra_pbr_rules_hash_key(const void *arg); +extern bool zebra_pbr_rules_hash_equal(const void *arg1, const void *arg2); + +/* has operates on 32bit pointer + * and field is a string of 8bit + */ +#define ZEBRA_IPSET_NAME_HASH_SIZE (ZEBRA_IPSET_NAME_SIZE / 4) + +extern void zebra_pbr_ipset_free(void *arg); +extern uint32_t zebra_pbr_ipset_hash_key(const void *arg); +extern bool zebra_pbr_ipset_hash_equal(const void *arg1, const void *arg2); + +extern void zebra_pbr_ipset_entry_free(void *arg); +extern uint32_t zebra_pbr_ipset_entry_hash_key(const void *arg); +extern bool zebra_pbr_ipset_entry_hash_equal(const void *arg1, + const void *arg2); + +extern void zebra_pbr_iptable_free(void *arg); +extern uint32_t zebra_pbr_iptable_hash_key(const void *arg); +extern bool zebra_pbr_iptable_hash_equal(const void *arg1, const void *arg2); + +extern void zebra_pbr_config_write(struct vty *vty); +extern void zebra_pbr_expand_action_update(bool enable); +extern void zebra_pbr_init(void); +extern void zebra_pbr_show_ipset_list(struct vty *vty, char *ipsetname); +extern void zebra_pbr_show_iptable(struct vty *vty, char *iptable); +extern void zebra_pbr_iptable_update_interfacelist(struct stream *s, + struct zebra_pbr_iptable *zpi); +size_t zebra_pbr_tcpflags_snprintf(char *buffer, size_t len, + uint16_t tcp_val); +extern void zebra_pbr_show_rule(struct vty *vty); +extern void zebra_pbr_show_rule_unit(struct zebra_pbr_rule *rule, + struct vty *vty); + +DECLARE_HOOK(zebra_pbr_ipset_entry_get_stat, + (struct zebra_pbr_ipset_entry *ipset, uint64_t *pkts, + uint64_t *bytes), + (ipset, pkts, bytes)); +DECLARE_HOOK(zebra_pbr_iptable_get_stat, + (struct zebra_pbr_iptable *iptable, uint64_t *pkts, + uint64_t *bytes), + (iptable, pkts, bytes)); +DECLARE_HOOK(zebra_pbr_iptable_update, + (int cmd, struct zebra_pbr_iptable *iptable), (cmd, iptable)); + +DECLARE_HOOK(zebra_pbr_ipset_entry_update, + (int cmd, struct zebra_pbr_ipset_entry *ipset), (cmd, ipset)); +DECLARE_HOOK(zebra_pbr_ipset_update, + (int cmd, struct zebra_pbr_ipset *ipset), (cmd, ipset)); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_PBR_H */ diff --git a/zebra/zebra_ptm.c b/zebra/zebra_ptm.c new file mode 100644 index 0000000..4a18eb0 --- /dev/null +++ b/zebra/zebra_ptm.c @@ -0,0 +1,1568 @@ +/* Kernel routing table updates using netlink over GNU/Linux system. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include <sys/un.h> /* for sockaddr_un */ +#include <net/if.h> + +#include "bfd.h" +#include "buffer.h" +#include "command.h" +#include "if.h" +#include "network.h" +#include "ptm_lib.h" +#include "rib.h" +#include "stream.h" +#include "lib/version.h" +#include "vrf.h" +#include "vty.h" +#include "lib_errors.h" + +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_ptm_redistribute.h" +#include "zebra/zebra_router.h" +#include "zebra_vrf.h" + +/* + * Choose the BFD implementation that we'll use. + * + * There are two implementations: + * - PTM BFD: which uses an external daemon; + * - bfdd: FRR's own BFD daemon; + */ +#if HAVE_BFDD == 0 + +#define ZEBRA_PTM_RECONNECT_TIME_INITIAL 1 /* initial reconnect is 1s */ +#define ZEBRA_PTM_RECONNECT_TIME_MAX 300 + +#define PTM_MSG_LEN 4 +#define PTM_HEADER_LEN 37 + +const char ZEBRA_PTM_GET_STATUS_CMD[] = "get-status"; +const char ZEBRA_PTM_BFD_START_CMD[] = "start-bfd-sess"; +const char ZEBRA_PTM_BFD_STOP_CMD[] = "stop-bfd-sess"; +const char ZEBRA_PTM_BFD_CLIENT_REG_CMD[] = "reg-bfd-client"; +const char ZEBRA_PTM_BFD_CLIENT_DEREG_CMD[] = "dereg-bfd-client"; + +const char ZEBRA_PTM_CMD_STR[] = "cmd"; +const char ZEBRA_PTM_CMD_STATUS_STR[] = "cmd_status"; +const char ZEBRA_PTM_PORT_STR[] = "port"; +const char ZEBRA_PTM_CBL_STR[] = "cbl status"; +const char ZEBRA_PTM_PASS_STR[] = "pass"; +const char ZEBRA_PTM_FAIL_STR[] = "fail"; +const char ZEBRA_PTM_BFDSTATUS_STR[] = "state"; +const char ZEBRA_PTM_BFDSTATUS_UP_STR[] = "Up"; +const char ZEBRA_PTM_BFDSTATUS_DOWN_STR[] = "Down"; +const char ZEBRA_PTM_BFDDEST_STR[] = "peer"; +const char ZEBRA_PTM_BFDSRC_STR[] = "local"; +const char ZEBRA_PTM_BFDVRF_STR[] = "vrf"; +const char ZEBRA_PTM_INVALID_PORT_NAME[] = "N/A"; +const char ZEBRA_PTM_INVALID_SRC_IP[] = "N/A"; +const char ZEBRA_PTM_INVALID_VRF[] = "N/A"; + +const char ZEBRA_PTM_BFD_DST_IP_FIELD[] = "dstIPaddr"; +const char ZEBRA_PTM_BFD_SRC_IP_FIELD[] = "srcIPaddr"; +const char ZEBRA_PTM_BFD_MIN_RX_FIELD[] = "requiredMinRx"; +const char ZEBRA_PTM_BFD_MIN_TX_FIELD[] = "upMinTx"; +const char ZEBRA_PTM_BFD_DETECT_MULT_FIELD[] = "detectMult"; +const char ZEBRA_PTM_BFD_MULTI_HOP_FIELD[] = "multiHop"; +const char ZEBRA_PTM_BFD_CLIENT_FIELD[] = "client"; +const char ZEBRA_PTM_BFD_SEQID_FIELD[] = "seqid"; +const char ZEBRA_PTM_BFD_IFNAME_FIELD[] = "ifName"; +const char ZEBRA_PTM_BFD_MAX_HOP_CNT_FIELD[] = "maxHopCnt"; +const char ZEBRA_PTM_BFD_SEND_EVENT[] = "sendEvent"; +const char ZEBRA_PTM_BFD_VRF_NAME_FIELD[] = "vrfName"; +const char ZEBRA_PTM_BFD_CBIT_FIELD[] = "bfdcbit"; + +static ptm_lib_handle_t *ptm_hdl; + +struct zebra_ptm_cb ptm_cb; + +static int zebra_ptm_socket_init(void); +void zebra_ptm_sock_read(struct thread *thread); +static void zebra_ptm_install_commands(void); +static int zebra_ptm_handle_msg_cb(void *arg, void *in_ctxt); +void zebra_bfd_peer_replay_req(void); +void zebra_ptm_send_status_req(void); +void zebra_ptm_reset_status(int ptm_disable); +static int zebra_ptm_bfd_client_deregister(struct zserv *client); + +const char ZEBRA_PTM_SOCK_NAME[] = "\0/var/run/ptmd.socket"; + +void zebra_ptm_init(void) +{ + char buf[64]; + + memset(&ptm_cb, 0, sizeof(ptm_cb)); + + ptm_cb.out_data = calloc(1, ZEBRA_PTM_SEND_MAX_SOCKBUF); + if (!ptm_cb.out_data) { + zlog_debug("%s: Allocation of send data failed", __func__); + return; + } + + ptm_cb.in_data = calloc(1, ZEBRA_PTM_MAX_SOCKBUF); + if (!ptm_cb.in_data) { + zlog_debug("%s: Allocation of recv data failed", __func__); + free(ptm_cb.out_data); + return; + } + + ptm_cb.pid = getpid(); + zebra_ptm_install_commands(); + + snprintf(buf, sizeof(buf), "%s", FRR_PTM_NAME); + ptm_hdl = ptm_lib_register(buf, NULL, zebra_ptm_handle_msg_cb, + zebra_ptm_handle_msg_cb); + ptm_cb.wb = buffer_new(0); + + ptm_cb.reconnect_time = ZEBRA_PTM_RECONNECT_TIME_INITIAL; + + ptm_cb.ptm_sock = -1; + + hook_register(zserv_client_close, zebra_ptm_bfd_client_deregister); +} + +void zebra_ptm_finish(void) +{ + buffer_flush_all(ptm_cb.wb, ptm_cb.ptm_sock); + + free(ptm_hdl); + + if (ptm_cb.out_data) + free(ptm_cb.out_data); + + if (ptm_cb.in_data) + free(ptm_cb.in_data); + + /* Cancel events. */ + THREAD_OFF(ptm_cb.t_read); + THREAD_OFF(ptm_cb.t_write); + THREAD_OFF(ptm_cb.t_timer); + + if (ptm_cb.wb) + buffer_free(ptm_cb.wb); + + if (ptm_cb.ptm_sock >= 0) + close(ptm_cb.ptm_sock); +} + +static void zebra_ptm_flush_messages(struct thread *thread) +{ + ptm_cb.t_write = NULL; + + if (ptm_cb.ptm_sock == -1) + return; + + errno = 0; + + switch (buffer_flush_available(ptm_cb.wb, ptm_cb.ptm_sock)) { + case BUFFER_ERROR: + flog_err_sys(EC_LIB_SOCKET, "%s ptm socket error: %s", __func__, + safe_strerror(errno)); + close(ptm_cb.ptm_sock); + ptm_cb.ptm_sock = -1; + zebra_ptm_reset_status(0); + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + return; + case BUFFER_PENDING: + ptm_cb.t_write = NULL; + thread_add_write(zrouter.master, zebra_ptm_flush_messages, NULL, + ptm_cb.ptm_sock, &ptm_cb.t_write); + break; + case BUFFER_EMPTY: + break; + } +} + +static int zebra_ptm_send_message(char *data, int size) +{ + errno = 0; + switch (buffer_write(ptm_cb.wb, ptm_cb.ptm_sock, data, size)) { + case BUFFER_ERROR: + flog_err_sys(EC_LIB_SOCKET, "%s ptm socket error: %s", __func__, + safe_strerror(errno)); + close(ptm_cb.ptm_sock); + ptm_cb.ptm_sock = -1; + zebra_ptm_reset_status(0); + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + return -1; + case BUFFER_EMPTY: + THREAD_OFF(ptm_cb.t_write); + break; + case BUFFER_PENDING: + thread_add_write(zrouter.master, zebra_ptm_flush_messages, NULL, + ptm_cb.ptm_sock, &ptm_cb.t_write); + break; + } + + return 0; +} + +void zebra_ptm_connect(struct thread *t) +{ + int init = 0; + + if (ptm_cb.ptm_sock == -1) { + zebra_ptm_socket_init(); + init = 1; + } + + if (ptm_cb.ptm_sock != -1) { + if (init) { + ptm_cb.t_read = NULL; + thread_add_read(zrouter.master, zebra_ptm_sock_read, + NULL, ptm_cb.ptm_sock, &ptm_cb.t_read); + zebra_bfd_peer_replay_req(); + } + zebra_ptm_send_status_req(); + ptm_cb.reconnect_time = ZEBRA_PTM_RECONNECT_TIME_INITIAL; + } else if (ptm_cb.reconnect_time < ZEBRA_PTM_RECONNECT_TIME_MAX) { + ptm_cb.reconnect_time *= 2; + if (ptm_cb.reconnect_time > ZEBRA_PTM_RECONNECT_TIME_MAX) + ptm_cb.reconnect_time = ZEBRA_PTM_RECONNECT_TIME_MAX; + + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + } else if (ptm_cb.reconnect_time >= ZEBRA_PTM_RECONNECT_TIME_MAX) { + ptm_cb.reconnect_time = ZEBRA_PTM_RECONNECT_TIME_INITIAL; + } +} + +DEFUN (zebra_ptm_enable, + zebra_ptm_enable_cmd, + "ptm-enable", + "Enable neighbor check with specified topology\n") +{ + struct vrf *vrf; + struct interface *ifp; + struct zebra_if *if_data; + + ptm_cb.ptm_enable = ZEBRA_IF_PTM_ENABLE_ON; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + FOR_ALL_INTERFACES (vrf, ifp) + if (!ifp->ptm_enable) { + if_data = (struct zebra_if *)ifp->info; + if (if_data + && (if_data->ptm_enable + == ZEBRA_IF_PTM_ENABLE_UNSPEC)) { + ifp->ptm_enable = + ZEBRA_IF_PTM_ENABLE_ON; + } + /* Assign a default unknown status */ + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + } + + zebra_ptm_connect(NULL); + + return CMD_SUCCESS; +} + +DEFUN (no_zebra_ptm_enable, + no_zebra_ptm_enable_cmd, + "no ptm-enable", + NO_STR + "Enable neighbor check with specified topology\n") +{ + ptm_cb.ptm_enable = ZEBRA_IF_PTM_ENABLE_OFF; + zebra_ptm_reset_status(1); + return CMD_SUCCESS; +} + +DEFUN (zebra_ptm_enable_if, + zebra_ptm_enable_if_cmd, + "ptm-enable", + "Enable neighbor check with specified topology\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *if_data; + int old_ptm_enable; + int send_linkdown = 0; + + if_data = ifp->info; + if_data->ptm_enable = ZEBRA_IF_PTM_ENABLE_UNSPEC; + + if (ifp->ifindex == IFINDEX_INTERNAL) { + return CMD_SUCCESS; + } + + old_ptm_enable = ifp->ptm_enable; + ifp->ptm_enable = ptm_cb.ptm_enable; + + if (if_is_no_ptm_operative(ifp)) + send_linkdown = 1; + + if (!old_ptm_enable && ptm_cb.ptm_enable) { + if (!if_is_operative(ifp) && send_linkdown) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Bringing down interface %s", + __func__, ifp->name); + if_down(ifp); + } + } + + return CMD_SUCCESS; +} + +DEFUN (no_zebra_ptm_enable_if, + no_zebra_ptm_enable_if_cmd, + "no ptm-enable", + NO_STR + "Enable neighbor check with specified topology\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + int send_linkup = 0; + struct zebra_if *if_data; + + if ((ifp->ifindex != IFINDEX_INTERNAL) && (ifp->ptm_enable)) { + if (!if_is_operative(ifp)) + send_linkup = 1; + + ifp->ptm_enable = ZEBRA_IF_PTM_ENABLE_OFF; + if (if_is_no_ptm_operative(ifp) && send_linkup) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Bringing up interface %s", + __func__, ifp->name); + if_up(ifp, true); + } + } + + if_data = ifp->info; + if_data->ptm_enable = ZEBRA_IF_PTM_ENABLE_OFF; + + return CMD_SUCCESS; +} + + +void zebra_ptm_write(struct vty *vty) +{ + if (ptm_cb.ptm_enable) + vty_out(vty, "ptm-enable\n"); + + return; +} + +static int zebra_ptm_socket_init(void) +{ + int ret; + int sock; + struct sockaddr_un addr; + + ptm_cb.ptm_sock = -1; + + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) + return -1; + if (set_nonblocking(sock) < 0) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Unable to set socket non blocking[%s]", + __func__, safe_strerror(errno)); + close(sock); + return -1; + } + + /* Make server socket. */ + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + memcpy(&addr.sun_path, ZEBRA_PTM_SOCK_NAME, + sizeof(ZEBRA_PTM_SOCK_NAME)); + + ret = connect(sock, (struct sockaddr *)&addr, + sizeof(addr.sun_family) + sizeof(ZEBRA_PTM_SOCK_NAME) + - 1); + if (ret < 0) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Unable to connect to socket %s [%s]", + __func__, ZEBRA_PTM_SOCK_NAME, + safe_strerror(errno)); + close(sock); + return -1; + } + ptm_cb.ptm_sock = sock; + return sock; +} + +static void zebra_ptm_install_commands(void) +{ + install_element(CONFIG_NODE, &zebra_ptm_enable_cmd); + install_element(CONFIG_NODE, &no_zebra_ptm_enable_cmd); + install_element(INTERFACE_NODE, &zebra_ptm_enable_if_cmd); + install_element(INTERFACE_NODE, &no_zebra_ptm_enable_if_cmd); +} + +/* BFD session goes down, send message to the protocols. */ +static void if_bfd_session_update(struct interface *ifp, struct prefix *dp, + struct prefix *sp, int status, + vrf_id_t vrf_id) +{ + if (IS_ZEBRA_DEBUG_EVENT) { + char buf[2][INET6_ADDRSTRLEN]; + + if (ifp) { + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_BFD_DEST_UPDATE %s/%d on %s %s event", + inet_ntop(dp->family, &dp->u.prefix, buf[0], + INET6_ADDRSTRLEN), + dp->prefixlen, ifp->name, + bfd_get_status_str(status)); + } else { + struct vrf *vrf = vrf_lookup_by_id(vrf_id); + + zlog_debug( + "MESSAGE: ZEBRA_INTERFACE_BFD_DEST_UPDATE %s/%d with src %s/%d and vrf %s(%u) %s event", + inet_ntop(dp->family, &dp->u.prefix, buf[0], + INET6_ADDRSTRLEN), + dp->prefixlen, + inet_ntop(sp->family, &sp->u.prefix, buf[1], + INET6_ADDRSTRLEN), + sp->prefixlen, VRF_LOGNAME(vrf), vrf_id, + bfd_get_status_str(status)); + } + } + + zebra_interface_bfd_update(ifp, dp, sp, status, vrf_id); +} + +static int zebra_ptm_handle_bfd_msg(void *arg, void *in_ctxt, + struct interface *ifp) +{ + char bfdst_str[32]; + char dest_str[64]; + char src_str[64]; + char vrf_str[64]; + struct prefix dest_prefix; + struct prefix src_prefix; + vrf_id_t vrf_id; + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_BFDSTATUS_STR, bfdst_str); + + if (bfdst_str[0] == '\0') { + return -1; + } + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_BFDDEST_STR, dest_str); + + if (dest_str[0] == '\0') { + zlog_debug("%s: Key %s not found in PTM msg", __func__, + ZEBRA_PTM_BFDDEST_STR); + return -1; + } + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_BFDSRC_STR, src_str); + + if (src_str[0] == '\0') { + zlog_debug("%s: Key %s not found in PTM msg", __func__, + ZEBRA_PTM_BFDSRC_STR); + return -1; + } + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_BFDVRF_STR, vrf_str); + + if (vrf_str[0] == '\0') { + zlog_debug("%s: Key %s not found in PTM msg", __func__, + ZEBRA_PTM_BFDVRF_STR); + return -1; + } + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: Recv Port [%s] bfd status [%s] vrf [%s] peer [%s] local [%s]", + __func__, ifp ? ifp->name : "N/A", bfdst_str, vrf_str, + dest_str, src_str); + + if (str2prefix(dest_str, &dest_prefix) == 0) { + flog_err(EC_ZEBRA_PREFIX_PARSE_ERROR, + "%s: Peer addr %s not found", __func__, dest_str); + return -1; + } + + memset(&src_prefix, 0, sizeof(src_prefix)); + if (strcmp(ZEBRA_PTM_INVALID_SRC_IP, src_str)) { + if (str2prefix(src_str, &src_prefix) == 0) { + flog_err(EC_ZEBRA_PREFIX_PARSE_ERROR, + "%s: Local addr %s not found", __func__, + src_str); + return -1; + } + } + + if (!strcmp(ZEBRA_PTM_INVALID_VRF, vrf_str) && ifp) { + vrf_id = ifp->vrf->vrf_id; + } else { + struct vrf *pVrf; + + pVrf = vrf_lookup_by_name(vrf_str); + if (pVrf) + vrf_id = pVrf->vrf_id; + else + vrf_id = VRF_DEFAULT; + } + + if (!strcmp(bfdst_str, ZEBRA_PTM_BFDSTATUS_DOWN_STR)) { + if_bfd_session_update(ifp, &dest_prefix, &src_prefix, + BFD_STATUS_DOWN, vrf_id); + } else { + if_bfd_session_update(ifp, &dest_prefix, &src_prefix, + BFD_STATUS_UP, vrf_id); + } + + return 0; +} + +static int zebra_ptm_handle_cbl_msg(void *arg, void *in_ctxt, + struct interface *ifp, char *cbl_str) +{ + int send_linkup = 0; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Recv Port [%s] cbl status [%s]", __func__, + ifp->name, cbl_str); + + if (!strcmp(cbl_str, ZEBRA_PTM_PASS_STR) + && (ifp->ptm_status != ZEBRA_PTM_STATUS_UP)) { + + if (ifp->ptm_status == ZEBRA_PTM_STATUS_DOWN) + send_linkup = 1; + ifp->ptm_status = ZEBRA_PTM_STATUS_UP; + if (ifp->ptm_enable && if_is_no_ptm_operative(ifp) + && send_linkup) + if_up(ifp, true); + } else if (!strcmp(cbl_str, ZEBRA_PTM_FAIL_STR) + && (ifp->ptm_status != ZEBRA_PTM_STATUS_DOWN)) { + ifp->ptm_status = ZEBRA_PTM_STATUS_DOWN; + if (ifp->ptm_enable && if_is_no_ptm_operative(ifp)) + if_down(ifp); + } + + return 0; +} + +/* + * zebra_ptm_handle_msg_cb - The purpose of this callback function is to handle + * all the command responses and notifications received from PTM. + * + * Command responses: Upon establishing connection with PTM, Zebra requests + * status of all interfaces using 'get-status' command if global ptm-enable + * knob is enabled. As a response to the get-status command PTM sends status + * of all the interfaces as command responses. All other type of command + * responses with cmd_status key word are dropped. The sole purpose of + * registering this function as callback for the command responses is to + * handle the responses to get-status command. + * + * Notifications: Cable status and BFD session status changes are sent as + * notifications by PTM. So, this function is also the callback function for + * processing all the notifications from the PTM. + * + */ +static int zebra_ptm_handle_msg_cb(void *arg, void *in_ctxt) +{ + struct interface *ifp = NULL; + char port_str[128]; + char cbl_str[32]; + char cmd_status_str[32]; + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_CMD_STATUS_STR, + cmd_status_str); + + /* Drop command response messages */ + if (cmd_status_str[0] != '\0') { + return 0; + } + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_PORT_STR, port_str); + + if (port_str[0] == '\0') { + zlog_debug("%s: Key %s not found in PTM msg", __func__, + ZEBRA_PTM_PORT_STR); + return -1; + } + + if (strcmp(ZEBRA_PTM_INVALID_PORT_NAME, port_str)) { + struct vrf *vrf; + int count = 0; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + ifp = if_lookup_by_name_vrf(port_str, vrf); + if (ifp) { + count++; + if (!vrf_is_backend_netns()) + break; + } + } + + if (!ifp) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: %s not found in interface list", + __func__, port_str); + return -1; + } + if (count > 1) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: multiple interface with name %s", + __func__, port_str); + return -1; + } + } + + ptm_lib_find_key_in_msg(in_ctxt, ZEBRA_PTM_CBL_STR, cbl_str); + + if (cbl_str[0] == '\0') { + return zebra_ptm_handle_bfd_msg(arg, in_ctxt, ifp); + } else { + if (ifp) { + return zebra_ptm_handle_cbl_msg(arg, in_ctxt, ifp, + cbl_str); + } else { + return -1; + } + } +} + +void zebra_ptm_sock_read(struct thread *thread) +{ + int sock; + int rc; + + errno = 0; + sock = THREAD_FD(thread); + + if (sock == -1) + return; + + /* PTM communicates in CSV format */ + do { + rc = ptm_lib_process_msg(ptm_hdl, sock, ptm_cb.in_data, + ZEBRA_PTM_MAX_SOCKBUF, NULL); + } while (rc > 0); + + if (((rc == 0) && !errno) + || (errno && (errno != EWOULDBLOCK) && (errno != EAGAIN))) { + flog_err_sys(EC_LIB_SOCKET, + "%s routing socket error: %s(%d) bytes %d", + __func__, safe_strerror(errno), errno, rc); + + close(ptm_cb.ptm_sock); + ptm_cb.ptm_sock = -1; + zebra_ptm_reset_status(0); + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, + &ptm_cb.t_timer); + return; + } + + ptm_cb.t_read = NULL; + thread_add_read(zrouter.master, zebra_ptm_sock_read, NULL, + ptm_cb.ptm_sock, &ptm_cb.t_read); +} + +/* BFD peer/dst register/update */ +void zebra_ptm_bfd_dst_register(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct prefix src_p; + struct prefix dst_p; + uint8_t multi_hop; + uint8_t multi_hop_cnt; + uint8_t detect_mul; + unsigned int min_rx_timer; + unsigned int min_tx_timer; + char if_name[INTERFACE_NAMSIZ]; + uint8_t len; + void *out_ctxt; + char buf[INET6_ADDRSTRLEN]; + char tmp_buf[64]; + int data_len = ZEBRA_PTM_SEND_MAX_SOCKBUF; + unsigned int pid; + uint8_t cbit_set; + + if (hdr->command == ZEBRA_BFD_DEST_UPDATE) + client->bfd_peer_upd8_cnt++; + else + client->bfd_peer_add_cnt++; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_dst_register msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + if (ptm_cb.ptm_sock == -1) { + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + return; + } + + ptm_lib_init_msg(ptm_hdl, 0, PTMLIB_MSG_TYPE_CMD, NULL, &out_ctxt); + snprintf(tmp_buf, sizeof(tmp_buf), "%s", ZEBRA_PTM_BFD_START_CMD); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_CMD_STR, tmp_buf); + snprintf(tmp_buf, sizeof(tmp_buf), "%s", + zebra_route_string(client->proto)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_CLIENT_FIELD, + tmp_buf); + + s = msg; + + STREAM_GETL(s, pid); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", pid); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_SEQID_FIELD, + tmp_buf); + + STREAM_GETW(s, dst_p.family); + + if (dst_p.family == AF_INET) + dst_p.prefixlen = IPV4_MAX_BYTELEN; + else + dst_p.prefixlen = IPV6_MAX_BYTELEN; + + STREAM_GET(&dst_p.u.prefix, s, dst_p.prefixlen); + if (dst_p.family == AF_INET) { + inet_ntop(AF_INET, &dst_p.u.prefix4, buf, sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_DST_IP_FIELD, buf); + } else { + inet_ntop(AF_INET6, &dst_p.u.prefix6, buf, sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_DST_IP_FIELD, buf); + } + + STREAM_GETL(s, min_rx_timer); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", min_rx_timer); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_MIN_RX_FIELD, + tmp_buf); + STREAM_GETL(s, min_tx_timer); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", min_tx_timer); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_MIN_TX_FIELD, + tmp_buf); + STREAM_GETC(s, detect_mul); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", detect_mul); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_DETECT_MULT_FIELD, + tmp_buf); + + STREAM_GETC(s, multi_hop); + if (multi_hop) { + snprintf(tmp_buf, sizeof(tmp_buf), "%d", 1); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_MULTI_HOP_FIELD, tmp_buf); + STREAM_GETW(s, src_p.family); + + if (src_p.family == AF_INET) + src_p.prefixlen = IPV4_MAX_BYTELEN; + else + src_p.prefixlen = IPV6_MAX_BYTELEN; + + STREAM_GET(&src_p.u.prefix, s, src_p.prefixlen); + if (src_p.family == AF_INET) { + inet_ntop(AF_INET, &src_p.u.prefix4, buf, sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, buf); + } else { + inet_ntop(AF_INET6, &src_p.u.prefix6, buf, sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, buf); + } + + STREAM_GETC(s, multi_hop_cnt); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", multi_hop_cnt); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_MAX_HOP_CNT_FIELD, tmp_buf); + + if (zvrf_id(zvrf) != VRF_DEFAULT) + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_VRF_NAME_FIELD, + zvrf_name(zvrf)); + } else { + if (dst_p.family == AF_INET6) { + STREAM_GETW(s, src_p.family); + + if (src_p.family == AF_INET) + src_p.prefixlen = IPV4_MAX_BYTELEN; + else + src_p.prefixlen = IPV6_MAX_BYTELEN; + + STREAM_GET(&src_p.u.prefix, s, src_p.prefixlen); + if (src_p.family == AF_INET) { + inet_ntop(AF_INET, &src_p.u.prefix4, buf, + sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, + buf); + } else { + inet_ntop(AF_INET6, &src_p.u.prefix6, buf, + sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, + buf); + } + } + STREAM_GETC(s, len); + STREAM_GET(if_name, s, len); + if_name[len] = '\0'; + + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_IFNAME_FIELD, if_name); + } + STREAM_GETC(s, cbit_set); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", cbit_set); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_CBIT_FIELD, tmp_buf); + + snprintf(tmp_buf, sizeof(tmp_buf), "%d", 1); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_SEND_EVENT, + tmp_buf); + + ptm_lib_complete_msg(ptm_hdl, out_ctxt, ptm_cb.out_data, &data_len); + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("%s: Sent message (%d) %s", __func__, data_len, + ptm_cb.out_data); + zebra_ptm_send_message(ptm_cb.out_data, data_len); + + return; + +stream_failure: + ptm_lib_cleanup_msg(ptm_hdl, out_ctxt); +} + +/* BFD peer/dst deregister */ +void zebra_ptm_bfd_dst_deregister(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct prefix src_p; + struct prefix dst_p; + uint8_t multi_hop; + char if_name[INTERFACE_NAMSIZ]; + uint8_t len; + char buf[INET6_ADDRSTRLEN]; + char tmp_buf[64]; + int data_len = ZEBRA_PTM_SEND_MAX_SOCKBUF; + void *out_ctxt; + unsigned int pid; + + client->bfd_peer_del_cnt++; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_dst_deregister msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + if (ptm_cb.ptm_sock == -1) { + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + return; + } + + ptm_lib_init_msg(ptm_hdl, 0, PTMLIB_MSG_TYPE_CMD, NULL, &out_ctxt); + + snprintf(tmp_buf, sizeof(tmp_buf), "%s", ZEBRA_PTM_BFD_STOP_CMD); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_CMD_STR, tmp_buf); + + snprintf(tmp_buf, sizeof(tmp_buf), "%s", + zebra_route_string(client->proto)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_CLIENT_FIELD, + tmp_buf); + + s = msg; + + STREAM_GETL(s, pid); + snprintf(tmp_buf, sizeof(tmp_buf), "%d", pid); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_SEQID_FIELD, + tmp_buf); + + STREAM_GETW(s, dst_p.family); + + if (dst_p.family == AF_INET) + dst_p.prefixlen = IPV4_MAX_BYTELEN; + else + dst_p.prefixlen = IPV6_MAX_BYTELEN; + + STREAM_GET(&dst_p.u.prefix, s, dst_p.prefixlen); + if (dst_p.family == AF_INET) + inet_ntop(AF_INET, &dst_p.u.prefix4, buf, sizeof(buf)); + else + inet_ntop(AF_INET6, &dst_p.u.prefix6, buf, sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_DST_IP_FIELD, buf); + + + STREAM_GETC(s, multi_hop); + if (multi_hop) { + snprintf(tmp_buf, sizeof(tmp_buf), "%d", 1); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_MULTI_HOP_FIELD, tmp_buf); + + STREAM_GETW(s, src_p.family); + + if (src_p.family == AF_INET) + src_p.prefixlen = IPV4_MAX_BYTELEN; + else + src_p.prefixlen = IPV6_MAX_BYTELEN; + + STREAM_GET(&src_p.u.prefix, s, src_p.prefixlen); + if (src_p.family == AF_INET) + inet_ntop(AF_INET, &src_p.u.prefix4, buf, sizeof(buf)); + else + inet_ntop(AF_INET6, &src_p.u.prefix6, buf, sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, buf); + + if (zvrf_id(zvrf) != VRF_DEFAULT) + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_VRF_NAME_FIELD, + zvrf_name(zvrf)); + } else { + if (dst_p.family == AF_INET6) { + STREAM_GETW(s, src_p.family); + + if (src_p.family == AF_INET) + src_p.prefixlen = IPV4_MAX_BYTELEN; + else + src_p.prefixlen = IPV6_MAX_BYTELEN; + + STREAM_GET(&src_p.u.prefix, s, src_p.prefixlen); + if (src_p.family == AF_INET) { + inet_ntop(AF_INET, &src_p.u.prefix4, buf, + sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, + buf); + } else { + inet_ntop(AF_INET6, &src_p.u.prefix6, buf, + sizeof(buf)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_SRC_IP_FIELD, + buf); + } + } + + STREAM_GETC(s, len); + STREAM_GET(if_name, s, len); + if_name[len] = '\0'; + + ptm_lib_append_msg(ptm_hdl, out_ctxt, + ZEBRA_PTM_BFD_IFNAME_FIELD, if_name); + } + + ptm_lib_complete_msg(ptm_hdl, out_ctxt, ptm_cb.out_data, &data_len); + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("%s: Sent message (%d) %s", __func__, data_len, + ptm_cb.out_data); + + zebra_ptm_send_message(ptm_cb.out_data, data_len); + + return; + +stream_failure: + ptm_lib_cleanup_msg(ptm_hdl, out_ctxt); +} + +/* BFD client register */ +void zebra_ptm_bfd_client_register(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + unsigned int pid; + void *out_ctxt = NULL; + char tmp_buf[64]; + int data_len = ZEBRA_PTM_SEND_MAX_SOCKBUF; + + client->bfd_client_reg_cnt++; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_client_register msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + s = msg; + STREAM_GETL(s, pid); + + if (ptm_cb.ptm_sock == -1) { + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + return; + } + + ptm_lib_init_msg(ptm_hdl, 0, PTMLIB_MSG_TYPE_CMD, NULL, &out_ctxt); + + snprintf(tmp_buf, sizeof(tmp_buf), "%s", ZEBRA_PTM_BFD_CLIENT_REG_CMD); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_CMD_STR, tmp_buf); + + snprintf(tmp_buf, sizeof(tmp_buf), "%s", + zebra_route_string(client->proto)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_CLIENT_FIELD, + tmp_buf); + + snprintf(tmp_buf, sizeof(tmp_buf), "%d", pid); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_SEQID_FIELD, + tmp_buf); + + ptm_lib_complete_msg(ptm_hdl, out_ctxt, ptm_cb.out_data, &data_len); + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("%s: Sent message (%d) %s", __func__, data_len, + ptm_cb.out_data); + zebra_ptm_send_message(ptm_cb.out_data, data_len); + + SET_FLAG(ptm_cb.client_flags[client->proto], + ZEBRA_PTM_BFD_CLIENT_FLAG_REG); + + return; + +stream_failure: + /* + * IF we ever add more STREAM_GETXXX functions after the out_ctxt + * is allocated then we need to add this code back in + * + * if (out_ctxt) + * ptm_lib_cleanup_msg(ptm_hdl, out_ctxt); + */ + return; +} + +/* BFD client deregister */ +int zebra_ptm_bfd_client_deregister(struct zserv *client) +{ + uint8_t proto = client->proto; + void *out_ctxt; + char tmp_buf[64]; + int data_len = ZEBRA_PTM_SEND_MAX_SOCKBUF; + + if (!IS_BFD_ENABLED_PROTOCOL(proto)) + return 0; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_client_deregister msg for client %s", + zebra_route_string(proto)); + + if (ptm_cb.ptm_sock == -1) { + ptm_cb.t_timer = NULL; + thread_add_timer(zrouter.master, zebra_ptm_connect, NULL, + ptm_cb.reconnect_time, &ptm_cb.t_timer); + return 0; + } + + ptm_lib_init_msg(ptm_hdl, 0, PTMLIB_MSG_TYPE_CMD, NULL, &out_ctxt); + + snprintf(tmp_buf, sizeof(tmp_buf), "%s", + ZEBRA_PTM_BFD_CLIENT_DEREG_CMD); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_CMD_STR, tmp_buf); + + snprintf(tmp_buf, sizeof(tmp_buf), "%s", zebra_route_string(proto)); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_BFD_CLIENT_FIELD, + tmp_buf); + + ptm_lib_complete_msg(ptm_hdl, out_ctxt, ptm_cb.out_data, &data_len); + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("%s: Sent message (%d) %s", __func__, data_len, + ptm_cb.out_data); + + zebra_ptm_send_message(ptm_cb.out_data, data_len); + UNSET_FLAG(ptm_cb.client_flags[proto], ZEBRA_PTM_BFD_CLIENT_FLAG_REG); + + return 0; +} + +int zebra_ptm_get_enable_state(void) +{ + return ptm_cb.ptm_enable; +} + +/* + * zebra_ptm_get_status_str - Convert status to a display string. + */ +static const char *zebra_ptm_get_status_str(int status) +{ + switch (status) { + case ZEBRA_PTM_STATUS_DOWN: + return "fail"; + case ZEBRA_PTM_STATUS_UP: + return "pass"; + case ZEBRA_PTM_STATUS_UNKNOWN: + default: + return "n/a"; + } +} + +void zebra_ptm_show_status(struct vty *vty, json_object *json, + struct interface *ifp) +{ + const char *status; + + if (ifp->ptm_enable) + status = zebra_ptm_get_status_str(ifp->ptm_status); + else + status = "disabled"; + + if (json) + json_object_string_add(json, "ptmStatus", status); + else + vty_out(vty, " PTM status: %s\n", status); +} + +void zebra_ptm_send_status_req(void) +{ + void *out_ctxt; + int len = ZEBRA_PTM_SEND_MAX_SOCKBUF; + + if (ptm_cb.ptm_enable) { + ptm_lib_init_msg(ptm_hdl, 0, PTMLIB_MSG_TYPE_CMD, NULL, + &out_ctxt); + ptm_lib_append_msg(ptm_hdl, out_ctxt, ZEBRA_PTM_CMD_STR, + ZEBRA_PTM_GET_STATUS_CMD); + ptm_lib_complete_msg(ptm_hdl, out_ctxt, ptm_cb.out_data, &len); + + zebra_ptm_send_message(ptm_cb.out_data, len); + } +} + +void zebra_ptm_reset_status(int ptm_disable) +{ + struct vrf *vrf; + struct interface *ifp; + int send_linkup; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) + FOR_ALL_INTERFACES (vrf, ifp) { + send_linkup = 0; + if (ifp->ptm_enable) { + if (!if_is_operative(ifp)) + send_linkup = 1; + + if (ptm_disable) + ifp->ptm_enable = + ZEBRA_IF_PTM_ENABLE_OFF; + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + + if (if_is_operative(ifp) && send_linkup) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: Bringing up interface %s", + __func__, ifp->name); + if_up(ifp, true); + } + } + } +} + +void zebra_ptm_if_init(struct zebra_if *zebra_ifp) +{ + zebra_ifp->ptm_enable = ZEBRA_IF_PTM_ENABLE_UNSPEC; +} + +void zebra_ptm_if_set_ptm_state(struct interface *ifp, + struct zebra_if *zebra_ifp) +{ + if (zebra_ifp && zebra_ifp->ptm_enable != ZEBRA_IF_PTM_ENABLE_UNSPEC) + ifp->ptm_enable = zebra_ifp->ptm_enable; +} + +void zebra_ptm_if_write(struct vty *vty, struct zebra_if *zebra_ifp) +{ + if (zebra_ifp->ptm_enable == ZEBRA_IF_PTM_ENABLE_OFF) + vty_out(vty, " no ptm-enable\n"); +} + +#else /* HAVE_BFDD */ + +/* + * Data structures. + */ +struct ptm_process { + struct zserv *pp_zs; + pid_t pp_pid; + + TAILQ_ENTRY(ptm_process) pp_entry; +}; +TAILQ_HEAD(ppqueue, ptm_process) ppqueue; + +DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_PTM_BFD_PROCESS, + "PTM BFD process registration table."); + +/* + * Prototypes. + */ +static struct ptm_process *pp_new(pid_t pid, struct zserv *zs); +static struct ptm_process *pp_lookup_byzs(struct zserv *zs); +static void pp_free(struct ptm_process *pp); +static void pp_free_all(void); + +static void zebra_ptm_send_bfdd(struct stream *msg); +static void zebra_ptm_send_clients(struct stream *msg); +static int _zebra_ptm_bfd_client_deregister(struct zserv *zs); +static void _zebra_ptm_reroute(struct zserv *zs, struct zebra_vrf *zvrf, + struct stream *msg, uint32_t command); + + +/* + * Process PID registration. + */ +static struct ptm_process *pp_new(pid_t pid, struct zserv *zs) +{ + struct ptm_process *pp; + +#ifdef PTM_DEBUG + /* Sanity check: more than one client can't have the same PID. */ + TAILQ_FOREACH(pp, &ppqueue, pp_entry) { + if (pp->pp_pid == pid && pp->pp_zs != zs) + zlog_err("%s:%d pid and client pointer doesn't match", + __FILE__, __LINE__); + } +#endif /* PTM_DEBUG */ + + /* Lookup for duplicates. */ + pp = pp_lookup_byzs(zs); + if (pp != NULL) + return pp; + + /* Allocate and register new process. */ + pp = XCALLOC(MTYPE_ZEBRA_PTM_BFD_PROCESS, sizeof(*pp)); + + pp->pp_pid = pid; + pp->pp_zs = zs; + TAILQ_INSERT_HEAD(&ppqueue, pp, pp_entry); + + return pp; +} + +static struct ptm_process *pp_lookup_byzs(struct zserv *zs) +{ + struct ptm_process *pp; + + TAILQ_FOREACH(pp, &ppqueue, pp_entry) { + if (pp->pp_zs != zs) + continue; + + break; + } + + return pp; +} + +static void pp_free(struct ptm_process *pp) +{ + if (pp == NULL) + return; + + TAILQ_REMOVE(&ppqueue, pp, pp_entry); + XFREE(MTYPE_ZEBRA_PTM_BFD_PROCESS, pp); +} + +static void pp_free_all(void) +{ + struct ptm_process *pp; + + while (!TAILQ_EMPTY(&ppqueue)) { + pp = TAILQ_FIRST(&ppqueue); + pp_free(pp); + } +} + + +/* + * Use the FRR's internal daemon implementation. + */ +static void zebra_ptm_send_bfdd(struct stream *msg) +{ + struct listnode *node; + struct zserv *client; + struct stream *msgc; + + /* Create copy for replication. */ + msgc = stream_dup(msg); + + /* Send message to all running BFDd daemons. */ + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (client->proto != ZEBRA_ROUTE_BFD) + continue; + + zserv_send_message(client, msg); + + /* Allocate more messages. */ + msg = stream_dup(msgc); + } + + stream_free(msgc); + stream_free(msg); +} + +static void zebra_ptm_send_clients(struct stream *msg) +{ + struct listnode *node; + struct zserv *client; + struct stream *msgc; + + /* Create copy for replication. */ + msgc = stream_dup(msg); + + /* Send message to all running client daemons. */ + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (!IS_BFD_ENABLED_PROTOCOL(client->proto)) + continue; + + zserv_send_message(client, msg); + + /* Allocate more messages. */ + msg = stream_dup(msgc); + } + + stream_free(msgc); + stream_free(msg); +} + +static int _zebra_ptm_bfd_client_deregister(struct zserv *zs) +{ + struct stream *msg; + struct ptm_process *pp; + + if (!IS_BFD_ENABLED_PROTOCOL(zs->proto)) + return 0; + + /* Find daemon pid by zebra connection pointer. */ + pp = pp_lookup_byzs(zs); + if (pp == NULL) { + zlog_err("%s:%d failed to find process pid registration", + __FILE__, __LINE__); + return -1; + } + + /* Generate, send message and free() daemon related data. */ + msg = stream_new(ZEBRA_MAX_PACKET_SIZ); + if (msg == NULL) { + zlog_debug("%s: not enough memory", __func__); + return 0; + } + + /* + * The message type will be ZEBRA_BFD_DEST_REPLAY so we can use only + * one callback at the `bfdd` side, however the real command + * number will be included right after the zebra header. + */ + zclient_create_header(msg, ZEBRA_BFD_DEST_REPLAY, 0); + stream_putl(msg, ZEBRA_BFD_CLIENT_DEREGISTER); + + /* Put process PID. */ + stream_putl(msg, pp->pp_pid); + + /* Update the data pointers. */ + stream_putw_at(msg, 0, stream_get_endp(msg)); + + zebra_ptm_send_bfdd(msg); + + pp_free(pp); + + return 0; +} + +void zebra_ptm_init(void) +{ + /* Initialize the ptm process information list. */ + TAILQ_INIT(&ppqueue); + + /* + * Send deregistration messages to BFD daemon when some other + * daemon closes. This will help avoid sending daemons + * unnecessary notification messages. + */ + hook_register(zserv_client_close, _zebra_ptm_bfd_client_deregister); +} + +void zebra_ptm_finish(void) +{ + /* Remove the client disconnect hook and free all memory. */ + hook_unregister(zserv_client_close, _zebra_ptm_bfd_client_deregister); + pp_free_all(); +} + + +/* + * Message handling. + */ +static void _zebra_ptm_reroute(struct zserv *zs, struct zebra_vrf *zvrf, + struct stream *msg, uint32_t command) +{ + struct stream *msgc; + char buf[ZEBRA_MAX_PACKET_SIZ]; + pid_t ppid; + + /* Create BFD header */ + msgc = stream_new(ZEBRA_MAX_PACKET_SIZ); + zclient_create_header(msgc, ZEBRA_BFD_DEST_REPLAY, zvrf->vrf->vrf_id); + stream_putl(msgc, command); + + if (STREAM_READABLE(msg) > STREAM_WRITEABLE(msgc)) { + zlog_warn("Cannot fit extended BFD header plus original message contents into ZAPI packet; dropping message"); + goto stream_failure; + } + + /* Copy original message, excluding header, into new message */ + stream_get_from(buf, msg, stream_get_getp(msg), STREAM_READABLE(msg)); + stream_put(msgc, buf, STREAM_READABLE(msg)); + + /* Update length field */ + stream_putw_at(msgc, 0, STREAM_READABLE(msgc)); + + zebra_ptm_send_bfdd(msgc); + msgc = NULL; + + /* Registrate process PID for shutdown hook. */ + STREAM_GETL(msg, ppid); + pp_new(ppid, zs); + + return; + +stream_failure: + if (msgc) + stream_free(msgc); + zlog_err("%s:%d failed to registrate client pid", __FILE__, __LINE__); +} + +void zebra_ptm_bfd_dst_register(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_dst_register msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + _zebra_ptm_reroute(client, zvrf, msg, ZEBRA_BFD_DEST_REGISTER); +} + +void zebra_ptm_bfd_dst_deregister(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_dst_deregister msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + _zebra_ptm_reroute(client, zvrf, msg, ZEBRA_BFD_DEST_DEREGISTER); +} + +void zebra_ptm_bfd_client_register(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_client_register msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + _zebra_ptm_reroute(client, zvrf, msg, ZEBRA_BFD_CLIENT_REGISTER); +} + +void zebra_ptm_bfd_dst_replay(ZAPI_HANDLER_ARGS) +{ + struct stream *msgc; + size_t zmsglen, zhdrlen; + uint32_t cmd; + + /* + * NOTE: + * Replay messages with HAVE_BFDD are meant to be replayed to + * the client daemons. These messages are composed and + * originated from the `bfdd` daemon. + */ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("bfd_dst_update msg from client %s: length=%d", + zebra_route_string(client->proto), hdr->length); + + /* + * Client messages must be re-routed, otherwise do the `bfdd` + * special treatment. + */ + if (client->proto != ZEBRA_ROUTE_BFD) { + _zebra_ptm_reroute(client, zvrf, msg, ZEBRA_BFD_DEST_REPLAY); + return; + } + + /* Figure out if this is an DEST_UPDATE or DEST_REPLAY. */ + if (stream_getl2(msg, &cmd) == false) { + zlog_err("%s: expected at least 4 bytes (command)", __func__); + return; + } + + /* + * Don't modify message in the zebra API. In order to do that we + * need to allocate a new message stream and copy the message + * provided by zebra. + */ + msgc = stream_new(ZEBRA_MAX_PACKET_SIZ); + if (msgc == NULL) { + zlog_debug("%s: not enough memory", __func__); + return; + } + + /* Calculate our header size plus the message contents. */ + if (cmd != ZEBRA_BFD_DEST_REPLAY) { + zhdrlen = ZEBRA_HEADER_SIZE; + zmsglen = msg->endp - msg->getp; + memcpy(msgc->data + zhdrlen, msg->data + msg->getp, zmsglen); + + zclient_create_header(msgc, cmd, zvrf_id(zvrf)); + + msgc->getp = 0; + msgc->endp = zhdrlen + zmsglen; + } else + zclient_create_header(msgc, cmd, zvrf_id(zvrf)); + + /* Update the data pointers. */ + stream_putw_at(msgc, 0, stream_get_endp(msgc)); + + zebra_ptm_send_clients(msgc); +} + +/* + * Unused functions. + */ +void zebra_ptm_if_init(struct zebra_if *zifp __attribute__((__unused__))) +{ + /* NOTHING */ +} + +int zebra_ptm_get_enable_state(void) +{ + return 0; +} + +void zebra_ptm_show_status(struct vty *vty __attribute__((__unused__)), + json_object *json __attribute__((__unused__)), + struct interface *ifp __attribute__((__unused__))) +{ + /* NOTHING */ +} + +void zebra_ptm_write(struct vty *vty __attribute__((__unused__))) +{ + /* NOTHING */ +} + +void zebra_ptm_if_write(struct vty *vty __attribute__((__unused__)), + struct zebra_if *zifp __attribute__((__unused__))) +{ + /* NOTHING */ +} +void zebra_ptm_if_set_ptm_state(struct interface *i __attribute__((__unused__)), + struct zebra_if *zi __attribute__((__unused__))) +{ + /* NOTHING */ +} + +#endif /* HAVE_BFDD */ diff --git a/zebra/zebra_ptm.h b/zebra/zebra_ptm.h new file mode 100644 index 0000000..f8e843c --- /dev/null +++ b/zebra/zebra_ptm.h @@ -0,0 +1,100 @@ +/* + * Definitions for prescriptive topology module (PTM). + * Copyright (C) 1998, 99, 2000 Kunihiro Ishiguro, Toshiaki Takada + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_PTM_H +#define _ZEBRA_PTM_H + +extern const char ZEBRA_PTM_SOCK_NAME[]; +#define ZEBRA_PTM_MAX_SOCKBUF 3200 /* 25B *128 ports */ +#define ZEBRA_PTM_SEND_MAX_SOCKBUF 512 + +#define ZEBRA_PTM_BFD_CLIENT_FLAG_REG (1 << 1) /* client registered with BFD */ + +#include "zebra/zserv.h" +#include "zebra/interface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Zebra ptm context block */ +struct zebra_ptm_cb { + int ptm_sock; /* ptm file descriptor. */ + + struct buffer *wb; /* Buffer of data waiting to be written to ptm. */ + + struct thread *t_read; /* Thread for read */ + struct thread *t_write; /* Thread for write */ + struct thread *t_timer; /* Thread for timer */ + + char *out_data; + char *in_data; + int reconnect_time; + + int ptm_enable; + int pid; + uint8_t client_flags[ZEBRA_ROUTE_MAX]; +}; + +#define ZEBRA_PTM_STATUS_DOWN 0 +#define ZEBRA_PTM_STATUS_UP 1 +#define ZEBRA_PTM_STATUS_UNKNOWN 2 + +/* For interface ptm-enable configuration. */ +#define ZEBRA_IF_PTM_ENABLE_OFF 0 +#define ZEBRA_IF_PTM_ENABLE_ON 1 +#define ZEBRA_IF_PTM_ENABLE_UNSPEC 2 + +#define IS_BFD_ENABLED_PROTOCOL(protocol) ( \ + (protocol) == ZEBRA_ROUTE_BGP || \ + (protocol) == ZEBRA_ROUTE_OSPF || \ + (protocol) == ZEBRA_ROUTE_OSPF6 || \ + (protocol) == ZEBRA_ROUTE_ISIS || \ + (protocol) == ZEBRA_ROUTE_PIM || \ + (protocol) == ZEBRA_ROUTE_OPENFABRIC \ +) + +void zebra_ptm_init(void); +void zebra_ptm_finish(void); +void zebra_ptm_connect(struct thread *t); +void zebra_ptm_write(struct vty *vty); +int zebra_ptm_get_enable_state(void); + +/* ZAPI message handlers */ +void zebra_ptm_bfd_dst_register(ZAPI_HANDLER_ARGS); +void zebra_ptm_bfd_dst_deregister(ZAPI_HANDLER_ARGS); +void zebra_ptm_bfd_client_register(ZAPI_HANDLER_ARGS); +#if HAVE_BFDD > 0 +void zebra_ptm_bfd_dst_replay(ZAPI_HANDLER_ARGS); +#endif /* HAVE_BFDD */ + +void zebra_ptm_show_status(struct vty *vty, json_object *json, + struct interface *ifp); +void zebra_ptm_if_init(struct zebra_if *zebra_ifp); +void zebra_ptm_if_set_ptm_state(struct interface *ifp, + struct zebra_if *zebra_ifp); +void zebra_ptm_if_write(struct vty *vty, struct zebra_if *zebra_ifp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_ptm_redistribute.c b/zebra/zebra_ptm_redistribute.c new file mode 100644 index 0000000..537d69f --- /dev/null +++ b/zebra/zebra_ptm_redistribute.c @@ -0,0 +1,114 @@ +/** + * @copyright Copyright (C) 2015 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> +#include "prefix.h" +#include "vty.h" +#include "stream.h" +#include "zebra/zebra_router.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_ptm_redistribute.h" + +static int zsend_interface_bfd_update(int cmd, struct zserv *client, + struct interface *ifp, struct prefix *dp, + struct prefix *sp, int status, + vrf_id_t vrf_id) +{ + int blen; + struct stream *s; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, vrf_id); + if (ifp) + stream_putl(s, ifp->ifindex); + else + stream_putl(s, 0); + + /* BFD destination prefix information. */ + stream_putc(s, dp->family); + blen = prefix_blen(dp); + stream_put(s, &dp->u.prefix, blen); + stream_putc(s, dp->prefixlen); + + /* BFD status */ + stream_putl(s, status); + + /* BFD source prefix information. */ + stream_putc(s, sp->family); + blen = prefix_blen(sp); + stream_put(s, &sp->u.prefix, blen); + stream_putc(s, sp->prefixlen); + + /* c-bit bullshit */ + stream_putc(s, 0); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + client->if_bfd_cnt++; + return zserv_send_message(client, s); +} + +void zebra_interface_bfd_update(struct interface *ifp, struct prefix *dp, + struct prefix *sp, int status, vrf_id_t vrf_id) +{ + struct listnode *node, *nnode; + struct zserv *client; + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (!IS_BFD_ENABLED_PROTOCOL(client->proto)) + continue; + + /* Notify to the protocol daemons. */ + zsend_interface_bfd_update(ZEBRA_INTERFACE_BFD_DEST_UPDATE, + client, ifp, dp, sp, status, vrf_id); + } +} + +static int zsend_bfd_peer_replay(int cmd, struct zserv *client) +{ + struct stream *s; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, VRF_DEFAULT); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + client->bfd_peer_replay_cnt++; + return zserv_send_message(client, s); +} + +void zebra_bfd_peer_replay_req(void) +{ + struct listnode *node, *nnode; + struct zserv *client; + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (!IS_BFD_ENABLED_PROTOCOL(client->proto)) + continue; + + /* Notify to the protocol daemons. */ + zsend_bfd_peer_replay(ZEBRA_BFD_DEST_REPLAY, client); + } +} diff --git a/zebra/zebra_ptm_redistribute.h b/zebra/zebra_ptm_redistribute.h new file mode 100644 index 0000000..4daf405 --- /dev/null +++ b/zebra/zebra_ptm_redistribute.h @@ -0,0 +1,37 @@ +/** + * @copyright Copyright (C) 2015 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef _ZEBRA_PTM_REDISTRIBUTE_H +#define _ZEBRA_PTM_REDISTRIBUTE_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void zebra_interface_bfd_update(struct interface *, struct prefix *, + struct prefix *, int, vrf_id_t); +extern void zebra_bfd_peer_replay_req(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_PTM_REDISTRIBUTE_H */ diff --git a/zebra/zebra_pw.c b/zebra/zebra_pw.c new file mode 100644 index 0000000..be089fc --- /dev/null +++ b/zebra/zebra_pw.c @@ -0,0 +1,866 @@ +/* Zebra PW code + * Copyright (C) 2016 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "log.h" +#include "memory.h" +#include "thread.h" +#include "command.h" +#include "vrf.h" +#include "lib/json.h" +#include "printfrr.h" + +#include "zebra/debug.h" +#include "zebra/rib.h" +#include "zebra/zebra_router.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_pw.h" + +DEFINE_MTYPE_STATIC(LIB, PW, "Pseudowire"); + +DEFINE_QOBJ_TYPE(zebra_pw); + +DEFINE_HOOK(pw_install, (struct zebra_pw * pw), (pw)); +DEFINE_HOOK(pw_uninstall, (struct zebra_pw * pw), (pw)); + +#define MPLS_NO_LABEL MPLS_INVALID_LABEL + +static int zebra_pw_enabled(struct zebra_pw *); +static void zebra_pw_install(struct zebra_pw *); +static void zebra_pw_uninstall(struct zebra_pw *); +static void zebra_pw_install_retry(struct thread *thread); +static int zebra_pw_check_reachability(const struct zebra_pw *); +static void zebra_pw_update_status(struct zebra_pw *, int); + +static inline int zebra_pw_compare(const struct zebra_pw *a, + const struct zebra_pw *b) +{ + return (strcmp(a->ifname, b->ifname)); +} + +RB_GENERATE(zebra_pw_head, zebra_pw, pw_entry, zebra_pw_compare) +RB_GENERATE(zebra_static_pw_head, zebra_pw, static_pw_entry, zebra_pw_compare) + +struct zebra_pw *zebra_pw_add(struct zebra_vrf *zvrf, const char *ifname, + uint8_t protocol, struct zserv *client) +{ + struct zebra_pw *pw; + + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%u: adding pseudowire %s protocol %s", + zvrf_id(zvrf), ifname, zebra_route_string(protocol)); + + pw = XCALLOC(MTYPE_PW, sizeof(*pw)); + strlcpy(pw->ifname, ifname, sizeof(pw->ifname)); + pw->protocol = protocol; + pw->vrf_id = zvrf_id(zvrf); + pw->client = client; + pw->status = PW_NOT_FORWARDING; + pw->local_label = MPLS_NO_LABEL; + pw->remote_label = MPLS_NO_LABEL; + pw->flags = F_PSEUDOWIRE_CWORD; + + RB_INSERT(zebra_pw_head, &zvrf->pseudowires, pw); + if (pw->protocol == ZEBRA_ROUTE_STATIC) { + RB_INSERT(zebra_static_pw_head, &zvrf->static_pseudowires, pw); + QOBJ_REG(pw, zebra_pw); + } + + return pw; +} + +void zebra_pw_del(struct zebra_vrf *zvrf, struct zebra_pw *pw) +{ + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%u: deleting pseudowire %s protocol %s", pw->vrf_id, + pw->ifname, zebra_route_string(pw->protocol)); + + /* remove nexthop tracking */ + zebra_deregister_rnh_pseudowire(pw->vrf_id, pw); + + /* uninstall */ + if (pw->status == PW_FORWARDING) { + hook_call(pw_uninstall, pw); + dplane_pw_uninstall(pw); + } + + THREAD_OFF(pw->install_retry_timer); + + /* unlink and release memory */ + RB_REMOVE(zebra_pw_head, &zvrf->pseudowires, pw); + if (pw->protocol == ZEBRA_ROUTE_STATIC) + RB_REMOVE(zebra_static_pw_head, &zvrf->static_pseudowires, pw); + + XFREE(MTYPE_PW, pw); +} + +void zebra_pw_change(struct zebra_pw *pw, ifindex_t ifindex, int type, int af, + union g_addr *nexthop, uint32_t local_label, + uint32_t remote_label, uint8_t flags, + union pw_protocol_fields *data) +{ + pw->ifindex = ifindex; + pw->type = type; + pw->af = af; + pw->nexthop = *nexthop; + pw->local_label = local_label; + pw->remote_label = remote_label; + pw->flags = flags; + pw->data = *data; + + if (zebra_pw_enabled(pw)) { + bool nht_exists; + zebra_register_rnh_pseudowire(pw->vrf_id, pw, &nht_exists); + if (nht_exists) + zebra_pw_update(pw); + } else { + if (pw->protocol == ZEBRA_ROUTE_STATIC) + zebra_deregister_rnh_pseudowire(pw->vrf_id, pw); + zebra_pw_uninstall(pw); + } +} + +struct zebra_pw *zebra_pw_find(struct zebra_vrf *zvrf, const char *ifname) +{ + struct zebra_pw pw; + strlcpy(pw.ifname, ifname, sizeof(pw.ifname)); + return (RB_FIND(zebra_pw_head, &zvrf->pseudowires, &pw)); +} + +static int zebra_pw_enabled(struct zebra_pw *pw) +{ + if (pw->protocol == ZEBRA_ROUTE_STATIC) { + if (pw->local_label == MPLS_NO_LABEL + || pw->remote_label == MPLS_NO_LABEL || pw->af == AF_UNSPEC) + return 0; + return 1; + } else + return pw->enabled; +} + +void zebra_pw_update(struct zebra_pw *pw) +{ + if (zebra_pw_check_reachability(pw) < 0) { + zebra_pw_uninstall(pw); + zebra_pw_install_failure(pw, PW_NOT_FORWARDING); + /* wait for NHT and try again later */ + } else { + /* + * Install or reinstall the pseudowire (e.g. to update + * parameters like the nexthop or the use of the control word). + */ + zebra_pw_install(pw); + } +} + +static void zebra_pw_install(struct zebra_pw *pw) +{ + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%u: installing pseudowire %s protocol %s", + pw->vrf_id, pw->ifname, + zebra_route_string(pw->protocol)); + + hook_call(pw_install, pw); + if (dplane_pw_install(pw) == ZEBRA_DPLANE_REQUEST_FAILURE) { + zebra_pw_install_failure(pw, PW_NOT_FORWARDING); + return; + } + + if (pw->status != PW_FORWARDING) + zebra_pw_update_status(pw, PW_FORWARDING); +} + +static void zebra_pw_uninstall(struct zebra_pw *pw) +{ + if (pw->status != PW_FORWARDING) + return; + + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%u: uninstalling pseudowire %s protocol %s", + pw->vrf_id, pw->ifname, + zebra_route_string(pw->protocol)); + + /* ignore any possible error */ + hook_call(pw_uninstall, pw); + dplane_pw_uninstall(pw); + + if (zebra_pw_enabled(pw)) + zebra_pw_update_status(pw, PW_NOT_FORWARDING); +} + +/* + * Installation of the pseudowire in the kernel or hardware has failed. This + * function will notify the pseudowire client about the failure and schedule + * to retry the installation later. This function can be called by an external + * agent that performs the pseudowire installation in an asynchronous way. + */ +void zebra_pw_install_failure(struct zebra_pw *pw, int pwstatus) +{ + if (IS_ZEBRA_DEBUG_PW) + zlog_debug( + "%u: failed installing pseudowire %s, scheduling retry in %u seconds", + pw->vrf_id, pw->ifname, PW_INSTALL_RETRY_INTERVAL); + + /* schedule to retry later */ + THREAD_OFF(pw->install_retry_timer); + thread_add_timer(zrouter.master, zebra_pw_install_retry, pw, + PW_INSTALL_RETRY_INTERVAL, &pw->install_retry_timer); + + zebra_pw_update_status(pw, pwstatus); +} + +static void zebra_pw_install_retry(struct thread *thread) +{ + struct zebra_pw *pw = THREAD_ARG(thread); + + zebra_pw_install(pw); +} + +static void zebra_pw_update_status(struct zebra_pw *pw, int status) +{ + pw->status = status; + if (pw->client) + zsend_pw_update(pw->client, pw); +} + +static int zebra_pw_check_reachability_strict(const struct zebra_pw *pw, + struct route_entry *re) +{ + const struct nexthop *nexthop; + const struct nexthop_group *nhg; + bool found_p = false; + bool fail_p = false; + + /* TODO: consider GRE/L2TPv3 tunnels in addition to MPLS LSPs */ + + /* All active nexthops must be labelled; look at + * primary and backup fib lists, in case there's been + * a backup nexthop activation. + */ + nhg = rib_get_fib_nhg(re); + if (nhg && nhg->nexthop) { + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { + if (nexthop->nh_label != NULL) + found_p = true; + else { + fail_p = true; + break; + } + } + } + } + + if (fail_p) + goto done; + + nhg = rib_get_fib_backup_nhg(re); + if (nhg && nhg->nexthop) { + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { + if (nexthop->nh_label != NULL) + found_p = true; + else { + fail_p = true; + break; + } + } + } + } + +done: + + if (fail_p || !found_p) { + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%s: unlabeled route for %s", + __func__, pw->ifname); + return -1; + } + + return 0; +} + +static int zebra_pw_check_reachability(const struct zebra_pw *pw) +{ + struct route_entry *re; + const struct nexthop *nexthop; + const struct nexthop_group *nhg; + bool found_p = false; + + /* TODO: consider GRE/L2TPv3 tunnels in addition to MPLS LSPs */ + + /* Find route to the remote end of the pseudowire */ + re = rib_match(family2afi(pw->af), SAFI_UNICAST, pw->vrf_id, + &pw->nexthop, NULL); + if (!re) { + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%s: no route found for %s", __func__, + pw->ifname); + return -1; + } + + /* Stricter checking for some OSes (OBSD, e.g.) */ + if (mpls_pw_reach_strict) + return zebra_pw_check_reachability_strict(pw, re); + + /* There must be at least one installed labelled nexthop; + * look at primary and backup fib lists, in case there's been + * a backup nexthop activation. + */ + nhg = rib_get_fib_nhg(re); + if (nhg && nhg->nexthop) { + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) && + nexthop->nh_label != NULL) { + found_p = true; + break; + } + } + } + + if (found_p) + return 0; + + nhg = rib_get_fib_backup_nhg(re); + if (nhg && nhg->nexthop) { + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) && + nexthop->nh_label != NULL) { + found_p = true; + break; + } + } + } + + if (!found_p) { + if (IS_ZEBRA_DEBUG_PW) + zlog_debug("%s: unlabeled route for %s", + __func__, pw->ifname); + return -1; + } + + return 0; +} + +static int zebra_pw_client_close(struct zserv *client) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + struct zebra_pw *pw, *tmp; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + zvrf = vrf->info; + RB_FOREACH_SAFE (pw, zebra_pw_head, &zvrf->pseudowires, tmp) { + if (pw->client != client) + continue; + zebra_pw_del(zvrf, pw); + } + } + + return 0; +} + +void zebra_pw_init(struct zebra_vrf *zvrf) +{ + RB_INIT(zebra_pw_head, &zvrf->pseudowires); + RB_INIT(zebra_static_pw_head, &zvrf->static_pseudowires); + + hook_register(zserv_client_close, zebra_pw_client_close); +} + +void zebra_pw_exit(struct zebra_vrf *zvrf) +{ + struct zebra_pw *pw; + + while (!RB_EMPTY(zebra_pw_head, &zvrf->pseudowires)) { + pw = RB_ROOT(zebra_pw_head, &zvrf->pseudowires); + + zebra_pw_del(zvrf, pw); + } +} + +DEFUN_NOSH (pseudowire_if, + pseudowire_if_cmd, + "pseudowire IFNAME", + "Static pseudowire configuration\n" + "Pseudowire name\n") +{ + struct zebra_vrf *zvrf; + struct zebra_pw *pw; + const char *ifname; + int idx = 0; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return CMD_WARNING; + + argv_find(argv, argc, "IFNAME", &idx); + ifname = argv[idx]->arg; + + pw = zebra_pw_find(zvrf, ifname); + if (pw && pw->protocol != ZEBRA_ROUTE_STATIC) { + vty_out(vty, "%% Pseudowire is not static\n"); + return CMD_WARNING; + } + + if (!pw) + pw = zebra_pw_add(zvrf, ifname, ZEBRA_ROUTE_STATIC, NULL); + VTY_PUSH_CONTEXT(PW_NODE, pw); + + return CMD_SUCCESS; +} + +DEFUN (no_pseudowire_if, + no_pseudowire_if_cmd, + "no pseudowire IFNAME", + NO_STR + "Static pseudowire configuration\n" + "Pseudowire name\n") +{ + struct zebra_vrf *zvrf; + struct zebra_pw *pw; + const char *ifname; + int idx = 0; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return CMD_WARNING; + + argv_find(argv, argc, "IFNAME", &idx); + ifname = argv[idx]->arg; + + pw = zebra_pw_find(zvrf, ifname); + if (pw) { + if (pw->protocol != ZEBRA_ROUTE_STATIC) { + vty_out(vty, "%% Pseudowire is not static\n"); + return CMD_WARNING; + } + zebra_pw_del(zvrf, pw); + } + + return CMD_SUCCESS; +} + +DEFUN (pseudowire_labels, + pseudowire_labels_cmd, + "[no] mpls label local (16-1048575) remote (16-1048575)", + NO_STR + "MPLS L2VPN PW command\n" + "MPLS L2VPN static labels\n" + "Local pseudowire label\n" + "Local pseudowire label\n" + "Remote pseudowire label\n" + "Remote pseudowire label\n") +{ + VTY_DECLVAR_CONTEXT(zebra_pw, pw); + int idx = 0; + mpls_label_t local_label, remote_label; + + if (argv_find(argv, argc, "no", &idx)) { + local_label = MPLS_NO_LABEL; + remote_label = MPLS_NO_LABEL; + } else { + argv_find(argv, argc, "local", &idx); + local_label = atoi(argv[idx + 1]->arg); + argv_find(argv, argc, "remote", &idx); + remote_label = atoi(argv[idx + 1]->arg); + } + + zebra_pw_change(pw, pw->ifindex, pw->type, pw->af, &pw->nexthop, + local_label, remote_label, pw->flags, &pw->data); + + return CMD_SUCCESS; +} + +DEFUN (pseudowire_neighbor, + pseudowire_neighbor_cmd, + "[no] neighbor <A.B.C.D|X:X::X:X>", + NO_STR + "Specify the IPv4 or IPv6 address of the remote endpoint\n" + "IPv4 address\n" + "IPv6 address\n") +{ + VTY_DECLVAR_CONTEXT(zebra_pw, pw); + int idx = 0; + const char *address; + int af; + union g_addr nexthop; + + af = AF_UNSPEC; + memset(&nexthop, 0, sizeof(nexthop)); + + if (!argv_find(argv, argc, "no", &idx)) { + argv_find(argv, argc, "neighbor", &idx); + address = argv[idx + 1]->arg; + + if (inet_pton(AF_INET, address, &nexthop.ipv4) == 1) + af = AF_INET; + else if (inet_pton(AF_INET6, address, &nexthop.ipv6) == 1) + af = AF_INET6; + else { + vty_out(vty, "%% Malformed address\n"); + return CMD_WARNING; + } + } + + zebra_pw_change(pw, pw->ifindex, pw->type, af, &nexthop, + pw->local_label, pw->remote_label, pw->flags, + &pw->data); + + return CMD_SUCCESS; +} + +DEFUN (pseudowire_control_word, + pseudowire_control_word_cmd, + "[no] control-word <exclude|include>", + NO_STR + "Control-word options\n" + "Exclude control-word in pseudowire packets\n" + "Include control-word in pseudowire packets\n") +{ + VTY_DECLVAR_CONTEXT(zebra_pw, pw); + int idx = 0; + uint8_t flags = 0; + + if (argv_find(argv, argc, "no", &idx)) + flags = F_PSEUDOWIRE_CWORD; + else { + argv_find(argv, argc, "control-word", &idx); + if (argv[idx + 1]->text[0] == 'i') + flags = F_PSEUDOWIRE_CWORD; + } + + zebra_pw_change(pw, pw->ifindex, pw->type, pw->af, &pw->nexthop, + pw->local_label, pw->remote_label, flags, &pw->data); + + return CMD_SUCCESS; +} + +DEFUN (show_pseudowires, + show_pseudowires_cmd, + "show mpls pseudowires", + SHOW_STR + MPLS_STR + "Pseudowires\n") +{ + struct zebra_vrf *zvrf; + struct zebra_pw *pw; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return 0; + + vty_out(vty, "%-16s %-24s %-12s %-8s %-10s\n", "Interface", "Neighbor", + "Labels", "Protocol", "Status"); + + RB_FOREACH (pw, zebra_pw_head, &zvrf->pseudowires) { + char buf_nbr[INET6_ADDRSTRLEN]; + char buf_labels[64]; + + inet_ntop(pw->af, &pw->nexthop, buf_nbr, sizeof(buf_nbr)); + + if (pw->local_label != MPLS_NO_LABEL + && pw->remote_label != MPLS_NO_LABEL) + snprintf(buf_labels, sizeof(buf_labels), "%u/%u", + pw->local_label, pw->remote_label); + else + snprintf(buf_labels, sizeof(buf_labels), "-"); + + vty_out(vty, "%-16s %-24s %-12s %-8s %-10s\n", pw->ifname, + (pw->af != AF_UNSPEC) ? buf_nbr : "-", buf_labels, + zebra_route_string(pw->protocol), + (zebra_pw_enabled(pw) && pw->status == PW_FORWARDING) + ? "UP" + : "DOWN"); + } + + return CMD_SUCCESS; +} + +static void vty_show_mpls_pseudowire_detail(struct vty *vty) +{ + struct zebra_vrf *zvrf; + struct zebra_pw *pw; + struct route_entry *re; + struct nexthop *nexthop; + struct nexthop_group *nhg; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + RB_FOREACH (pw, zebra_pw_head, &zvrf->pseudowires) { + char buf_nbr[INET6_ADDRSTRLEN]; + char buf_nh[100]; + + vty_out(vty, "Interface: %s\n", pw->ifname); + inet_ntop(pw->af, &pw->nexthop, buf_nbr, sizeof(buf_nbr)); + vty_out(vty, " Neighbor: %s\n", + (pw->af != AF_UNSPEC) ? buf_nbr : "-"); + if (pw->local_label != MPLS_NO_LABEL) + vty_out(vty, " Local Label: %u\n", pw->local_label); + else + vty_out(vty, " Local Label: %s\n", "-"); + if (pw->remote_label != MPLS_NO_LABEL) + vty_out(vty, " Remote Label: %u\n", pw->remote_label); + else + vty_out(vty, " Remote Label: %s\n", "-"); + vty_out(vty, " Protocol: %s\n", + zebra_route_string(pw->protocol)); + if (pw->protocol == ZEBRA_ROUTE_LDP) + vty_out(vty, " VC-ID: %u\n", pw->data.ldp.pwid); + vty_out(vty, " Status: %s \n", + (zebra_pw_enabled(pw) && pw->status == PW_FORWARDING) + ? "Up" + : "Down"); + re = rib_match(family2afi(pw->af), SAFI_UNICAST, pw->vrf_id, + &pw->nexthop, NULL); + if (re == NULL) + continue; + + nhg = rib_get_fib_nhg(re); + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + snprintfrr(buf_nh, sizeof(buf_nh), "%pNHv", + nexthop); + vty_out(vty, " Next Hop: %s\n", buf_nh); + if (nexthop->nh_label) + vty_out(vty, " Next Hop label: %u\n", + nexthop->nh_label->label[0]); + else + vty_out(vty, " Next Hop label: %s\n", + "-"); + } + + /* Include any installed backups */ + nhg = rib_get_fib_backup_nhg(re); + if (nhg == NULL) + continue; + + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + snprintfrr(buf_nh, sizeof(buf_nh), "%pNHv", + nexthop); + vty_out(vty, " Next Hop: %s\n", buf_nh); + if (nexthop->nh_label) + vty_out(vty, " Next Hop label: %u\n", + nexthop->nh_label->label[0]); + else + vty_out(vty, " Next Hop label: %s\n", + "-"); + } + } +} + +static void vty_show_mpls_pseudowire(struct zebra_pw *pw, json_object *json_pws) +{ + struct route_entry *re; + struct nexthop *nexthop; + struct nexthop_group *nhg; + char buf_nbr[INET6_ADDRSTRLEN]; + char buf_nh[100]; + json_object *json_pw = NULL; + json_object *json_nexthop = NULL; + json_object *json_nexthops = NULL; + + json_nexthops = json_object_new_array(); + json_pw = json_object_new_object(); + + json_object_string_add(json_pw, "interface", pw->ifname); + if (pw->af == AF_UNSPEC) + json_object_string_add(json_pw, "neighbor", "-"); + else { + inet_ntop(pw->af, &pw->nexthop, buf_nbr, sizeof(buf_nbr)); + json_object_string_add(json_pw, "neighbor", buf_nbr); + } + if (pw->local_label != MPLS_NO_LABEL) + json_object_int_add(json_pw, "localLabel", pw->local_label); + else + json_object_string_add(json_pw, "localLabel", "-"); + if (pw->remote_label != MPLS_NO_LABEL) + json_object_int_add(json_pw, "remoteLabel", pw->remote_label); + else + json_object_string_add(json_pw, "remoteLabel", "-"); + json_object_string_add(json_pw, "protocol", + zebra_route_string(pw->protocol)); + if (pw->protocol == ZEBRA_ROUTE_LDP) + json_object_int_add(json_pw, "vcId", pw->data.ldp.pwid); + json_object_string_add( + json_pw, "Status", + (zebra_pw_enabled(pw) && pw->status == PW_FORWARDING) ? "Up" + : "Down"); + re = rib_match(family2afi(pw->af), SAFI_UNICAST, pw->vrf_id, + &pw->nexthop, NULL); + if (re == NULL) + goto done; + + nhg = rib_get_fib_nhg(re); + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + json_nexthop = json_object_new_object(); + snprintfrr(buf_nh, sizeof(buf_nh), "%pNHv", nexthop); + json_object_string_add(json_nexthop, "nexthop", buf_nh); + if (nexthop->nh_label) + json_object_int_add( + json_nexthop, "nhLabel", + nexthop->nh_label->label[0]); + else + json_object_string_add(json_nexthop, "nhLabel", + "-"); + + json_object_array_add(json_nexthops, json_nexthop); + } + + /* Include installed backup nexthops also */ + nhg = rib_get_fib_backup_nhg(re); + if (nhg == NULL) + goto done; + + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + json_nexthop = json_object_new_object(); + snprintfrr(buf_nh, sizeof(buf_nh), "%pNHv", nexthop); + json_object_string_add(json_nexthop, "nexthop", buf_nh); + if (nexthop->nh_label) + json_object_int_add( + json_nexthop, "nhLabel", + nexthop->nh_label->label[0]); + else + json_object_string_add(json_nexthop, "nhLabel", + "-"); + + json_object_array_add(json_nexthops, json_nexthop); + } + +done: + + json_object_object_add(json_pw, "nexthops", json_nexthops); + json_object_array_add(json_pws, json_pw); +} + +static void vty_show_mpls_pseudowire_detail_json(struct vty *vty) +{ + json_object *json = NULL; + json_object *json_pws = NULL; + struct zebra_vrf *zvrf; + struct zebra_pw *pw; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + json = json_object_new_object(); + json_pws = json_object_new_array(); + RB_FOREACH (pw, zebra_pw_head, &zvrf->pseudowires) { + vty_show_mpls_pseudowire(pw, json_pws); + } + json_object_object_add(json, "pw", json_pws); + vty_json(vty, json); +} + +DEFUN(show_pseudowires_detail, show_pseudowires_detail_cmd, + "show mpls pseudowires detail [json]$json", + SHOW_STR MPLS_STR + "Pseudowires\n" + "Detailed output\n" JSON_STR) +{ + bool uj = use_json(argc, argv); + + if (uj) + vty_show_mpls_pseudowire_detail_json(vty); + else + vty_show_mpls_pseudowire_detail(vty); + + return CMD_SUCCESS; +} + +/* Pseudowire configuration write function. */ +static int zebra_pw_config(struct vty *vty) +{ + int write = 0; + struct zebra_vrf *zvrf; + struct zebra_pw *pw; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return 0; + + RB_FOREACH (pw, zebra_static_pw_head, &zvrf->static_pseudowires) { + vty_out(vty, "pseudowire %s\n", pw->ifname); + if (pw->local_label != MPLS_NO_LABEL + && pw->remote_label != MPLS_NO_LABEL) + vty_out(vty, " mpls label local %u remote %u\n", + pw->local_label, pw->remote_label); + else + vty_out(vty, + " ! Incomplete config, specify the static MPLS labels\n"); + + if (pw->af != AF_UNSPEC) { + char buf[INET6_ADDRSTRLEN]; + inet_ntop(pw->af, &pw->nexthop, buf, sizeof(buf)); + vty_out(vty, " neighbor %s\n", buf); + } else + vty_out(vty, + " ! Incomplete config, specify a neighbor address\n"); + + if (!(pw->flags & F_PSEUDOWIRE_CWORD)) + vty_out(vty, " control-word exclude\n"); + + vty_out(vty, "exit\n"); + vty_out(vty, "!\n"); + write = 1; + } + + return write; +} + +static int zebra_pw_config(struct vty *vty); +static struct cmd_node pw_node = { + .name = "pw", + .node = PW_NODE, + .parent_node = CONFIG_NODE, + .prompt = "%s(config-pw)# ", + .config_write = zebra_pw_config, +}; + +void zebra_pw_vty_init(void) +{ + install_node(&pw_node); + install_default(PW_NODE); + + install_element(CONFIG_NODE, &pseudowire_if_cmd); + install_element(CONFIG_NODE, &no_pseudowire_if_cmd); + install_element(PW_NODE, &pseudowire_labels_cmd); + install_element(PW_NODE, &pseudowire_neighbor_cmd); + install_element(PW_NODE, &pseudowire_control_word_cmd); + + install_element(VIEW_NODE, &show_pseudowires_cmd); + install_element(VIEW_NODE, &show_pseudowires_detail_cmd); +} diff --git a/zebra/zebra_pw.h b/zebra/zebra_pw.h new file mode 100644 index 0000000..9800c58 --- /dev/null +++ b/zebra/zebra_pw.h @@ -0,0 +1,85 @@ +/* Zebra PW code + * Copyright (C) 2016 Volta Networks, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#ifndef ZEBRA_PW_H_ +#define ZEBRA_PW_H_ + +#include <net/if.h> +#include <netinet/in.h> + +#include "lib/hook.h" +#include "lib/qobj.h" +#include "lib/pw.h" + +#include "zebra/zebra_vrf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PW_INSTALL_RETRY_INTERVAL 30 + +struct zebra_pw { + RB_ENTRY(zebra_pw) pw_entry, static_pw_entry; + vrf_id_t vrf_id; + char ifname[INTERFACE_NAMSIZ]; + ifindex_t ifindex; + int type; + int af; + union g_addr nexthop; + uint32_t local_label; + uint32_t remote_label; + uint8_t flags; + union pw_protocol_fields data; + int enabled; + int status; + uint8_t protocol; + struct zserv *client; + struct rnh *rnh; + struct thread *install_retry_timer; + QOBJ_FIELDS; +}; +DECLARE_QOBJ_TYPE(zebra_pw); + +RB_HEAD(zebra_pw_head, zebra_pw); +RB_PROTOTYPE(zebra_pw_head, zebra_pw, pw_entry, zebra_pw_compare); + +RB_HEAD(zebra_static_pw_head, zebra_pw); +RB_PROTOTYPE(zebra_static_pw_head, zebra_pw, static_pw_entry, zebra_pw_compare); + +DECLARE_HOOK(pw_install, (struct zebra_pw * pw), (pw)); +DECLARE_HOOK(pw_uninstall, (struct zebra_pw * pw), (pw)); + +struct zebra_pw *zebra_pw_add(struct zebra_vrf *zvrf, const char *ifname, + uint8_t protocol, struct zserv *client); +void zebra_pw_del(struct zebra_vrf *, struct zebra_pw *); +void zebra_pw_change(struct zebra_pw *, ifindex_t, int, int, union g_addr *, + uint32_t, uint32_t, uint8_t, union pw_protocol_fields *); +struct zebra_pw *zebra_pw_find(struct zebra_vrf *, const char *); +void zebra_pw_update(struct zebra_pw *); +void zebra_pw_install_failure(struct zebra_pw *pw, int pwstatus); +void zebra_pw_init(struct zebra_vrf *); +void zebra_pw_exit(struct zebra_vrf *); +void zebra_pw_vty_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* ZEBRA_PW_H_ */ diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c new file mode 100644 index 0000000..1718376 --- /dev/null +++ b/zebra/zebra_rib.c @@ -0,0 +1,4909 @@ +/* Routing Information Base. + * Copyright (C) 1997, 98, 99, 2001 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "command.h" +#include "if.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "mpls.h" +#include "nexthop.h" +#include "prefix.h" +#include "prefix.h" +#include "routemap.h" +#include "sockunion.h" +#include "srcdest_table.h" +#include "table.h" +#include "thread.h" +#include "vrf.h" +#include "workqueue.h" +#include "nexthop_group_private.h" +#include "frr_pthread.h" +#include "printfrr.h" +#include "frrscript.h" + +#include "zebra/zebra_router.h" +#include "zebra/connected.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/redistribute.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_routemap.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_script.h" + +DEFINE_MGROUP(ZEBRA, "zebra"); + +DEFINE_MTYPE(ZEBRA, RE, "Route Entry"); +DEFINE_MTYPE_STATIC(ZEBRA, RIB_DEST, "RIB destination"); +DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object"); +DEFINE_MTYPE_STATIC(ZEBRA, WQ_WRAPPER, "WQ wrapper"); + +/* + * Event, list, and mutex for delivery of dataplane results + */ +static pthread_mutex_t dplane_mutex; +static struct thread *t_dplane; +static struct dplane_ctx_q rib_dplane_q; + +DEFINE_HOOK(rib_update, (struct route_node * rn, const char *reason), + (rn, reason)); +DEFINE_HOOK(rib_shutdown, (struct route_node * rn), (rn)); + + +/* Meta Q's specific names */ +enum meta_queue_indexes { + META_QUEUE_NHG, + META_QUEUE_EVPN, + META_QUEUE_EARLY_ROUTE, + META_QUEUE_EARLY_LABEL, + META_QUEUE_CONNECTED, + META_QUEUE_KERNEL, + META_QUEUE_STATIC, + META_QUEUE_NOTBGP, + META_QUEUE_BGP, + META_QUEUE_OTHER, +}; + +/* Each route type's string and default distance value. */ +static const struct { + int key; + uint8_t distance; + enum meta_queue_indexes meta_q_map; +} route_info[ZEBRA_ROUTE_MAX] = { + [ZEBRA_ROUTE_NHG] = {ZEBRA_ROUTE_NHG, 255 /* Unneeded for nhg's */, + META_QUEUE_NHG}, + [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, META_QUEUE_KERNEL}, + [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, META_QUEUE_KERNEL}, + [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, META_QUEUE_CONNECTED}, + [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, META_QUEUE_STATIC}, + [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, + META_QUEUE_BGP}, + [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, META_QUEUE_STATIC}, + [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, META_QUEUE_BGP}, + [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, META_QUEUE_BGP}, + [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, + META_QUEUE_BGP}, + [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, META_QUEUE_BGP}, + [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, + META_QUEUE_BGP}, + [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, + META_QUEUE_NOTBGP}, + [ZEBRA_ROUTE_VRRP] = {ZEBRA_ROUTE_VRRP, 255, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_SRTE] = {ZEBRA_ROUTE_SRTE, 255, META_QUEUE_OTHER}, + [ZEBRA_ROUTE_ALL] = {ZEBRA_ROUTE_ALL, 255, META_QUEUE_OTHER}, + /* Any new route type added to zebra, should be mirrored here */ + + /* no entry/default: 150 */ +}; + +/* Wrapper struct for nhg workqueue items; a 'ctx' is an incoming update + * from the OS, and an 'nhe' is a nhe update. + */ +struct wq_nhg_wrapper { + int type; + union { + struct nhg_ctx *ctx; + struct nhg_hash_entry *nhe; + } u; +}; + +#define WQ_NHG_WRAPPER_TYPE_CTX 0x01 +#define WQ_NHG_WRAPPER_TYPE_NHG 0x02 + +/* Wrapper structs for evpn/vxlan workqueue items. */ +struct wq_evpn_wrapper { + int type; + bool add_p; + vrf_id_t vrf_id; + bool esr_rxed; + uint8_t df_alg; + uint16_t df_pref; + uint32_t flags; + uint32_t seq; + esi_t esi; + vni_t vni; + struct ipaddr ip; + struct ethaddr macaddr; + struct prefix prefix; + struct in_addr vtep_ip; +}; + +#define WQ_EVPN_WRAPPER_TYPE_VRFROUTE 0x01 +#define WQ_EVPN_WRAPPER_TYPE_REM_ES 0x02 +#define WQ_EVPN_WRAPPER_TYPE_REM_MACIP 0x03 +#define WQ_EVPN_WRAPPER_TYPE_REM_VTEP 0x04 + +enum wq_label_types { + WQ_LABEL_FTN_UNINSTALL, + WQ_LABEL_LABELS_PROCESS, +}; + +struct wq_label_wrapper { + enum wq_label_types type; + vrf_id_t vrf_id; + + struct prefix p; + enum lsp_types_t ltype; + uint8_t route_type; + uint8_t route_instance; + + bool add_p; + struct zapi_labels zl; + + int afi; +}; + +static void rib_addnode(struct route_node *rn, struct route_entry *re, + int process); + +/* %pRN is already a printer for route_nodes that just prints the prefix */ +#ifdef _FRR_ATTRIBUTE_PRINTFRR +#pragma FRR printfrr_ext "%pZN" (struct route_node *) +#endif + +static const char *subqueue2str(enum meta_queue_indexes index) +{ + switch (index) { + case META_QUEUE_NHG: + return "NHG Objects"; + case META_QUEUE_EVPN: + return "EVPN/VxLan Objects"; + case META_QUEUE_EARLY_ROUTE: + return "Early Route Processing"; + case META_QUEUE_EARLY_LABEL: + return "Early Label Handling"; + case META_QUEUE_CONNECTED: + return "Connected Routes"; + case META_QUEUE_KERNEL: + return "Kernel Routes"; + case META_QUEUE_STATIC: + return "Static Routes"; + case META_QUEUE_NOTBGP: + return "RIP/OSPF/ISIS/EIGRP/NHRP Routes"; + case META_QUEUE_BGP: + return "BGP Routes"; + case META_QUEUE_OTHER: + return "Other Routes"; + } + + return "Unknown"; +} + +printfrr_ext_autoreg_p("ZN", printfrr_zebra_node); +static ssize_t printfrr_zebra_node(struct fbuf *buf, struct printfrr_eargs *ea, + const void *ptr) +{ + struct route_node *rn = (struct route_node *)ptr; + ssize_t rv = 0; + + /* just the table number? */ + if (ea->fmt[0] == 't') { + rib_dest_t *dest; + struct route_entry *re = NULL; + + ea->fmt++; + + if (!rn) + return bputch(buf, '!'); + + dest = rib_dest_from_rnode(rn); + if (dest) + re = re_list_first(&dest->routes); + if (re) + rv += bprintfrr(buf, "%u", re->table); + else + rv += bputch(buf, '?'); + + } else { + char cbuf[PREFIX_STRLEN * 2 + 6]; + struct rib_table_info *info; + + if (!rn) + return bputs(buf, "{(route_node *) NULL}"); + + srcdest_rnode2str(rn, cbuf, sizeof(cbuf)); + rv += bputs(buf, cbuf); + + info = srcdest_rnode_table_info(rn); + if (info->safi == SAFI_MULTICAST) + rv += bputs(buf, " (MRIB)"); + } + return rv; +} + +#define rnode_debug(node, vrf_id, msg, ...) \ + zlog_debug("%s: (%u:%pZNt):%pZN: " msg, __func__, vrf_id, node, node, \ + ##__VA_ARGS__) + +#define rnode_info(node, vrf_id, msg, ...) \ + zlog_info("%s: (%u:%pZNt):%pZN: " msg, __func__, vrf_id, node, node, \ + ##__VA_ARGS__) + +static char *_dump_re_status(const struct route_entry *re, char *buf, + size_t len) +{ + if (re->status == 0) { + snprintfrr(buf, len, "None "); + return buf; + } + + snprintfrr( + buf, len, "%s%s%s%s%s%s%s", + CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED) ? "Removed " : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED) ? "Changed " : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED) + ? "Label Changed " + : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED) ? "Queued " : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) ? "Installed " + : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED) ? "Failed " : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG) ? "Fib NHG " + : ""); + return buf; +} + +uint8_t route_distance(int type) +{ + uint8_t distance; + + if ((unsigned)type >= array_size(route_info)) + distance = 150; + else + distance = route_info[type].distance; + + return distance; +} + +int is_zebra_valid_kernel_table(uint32_t table_id) +{ +#ifdef linux + if ((table_id == RT_TABLE_UNSPEC) || (table_id == RT_TABLE_LOCAL) + || (table_id == RT_TABLE_COMPAT)) + return 0; +#endif + + return 1; +} + +int is_zebra_main_routing_table(uint32_t table_id) +{ + if (table_id == RT_TABLE_MAIN) + return 1; + return 0; +} + +int zebra_check_addr(const struct prefix *p) +{ + if (p->family == AF_INET) { + uint32_t addr; + + addr = p->u.prefix4.s_addr; + addr = ntohl(addr); + + if (IPV4_NET127(addr) || IN_CLASSD(addr) + || IPV4_LINKLOCAL(addr)) + return 0; + } + if (p->family == AF_INET6) { + if (IN6_IS_ADDR_LOOPBACK(&p->u.prefix6)) + return 0; + if (IN6_IS_ADDR_LINKLOCAL(&p->u.prefix6)) + return 0; + } + return 1; +} + +static void route_entry_attach_ref(struct route_entry *re, + struct nhg_hash_entry *new) +{ + re->nhe = new; + re->nhe_id = new->id; + re->nhe_installed_id = 0; + + zebra_nhg_increment_ref(new); +} + +/* Replace (if 'new_nhghe') or clear (if that's NULL) an re's nhe. */ +int route_entry_update_nhe(struct route_entry *re, + struct nhg_hash_entry *new_nhghe) +{ + int ret = 0; + struct nhg_hash_entry *old_nhg = NULL; + + if (new_nhghe == NULL) { + old_nhg = re->nhe; + + re->nhe_id = 0; + re->nhe_installed_id = 0; + re->nhe = NULL; + goto done; + } + + if ((re->nhe_id != 0) && re->nhe && (re->nhe != new_nhghe)) { + /* Capture previous nhg, if any */ + old_nhg = re->nhe; + + route_entry_attach_ref(re, new_nhghe); + } else if (!re->nhe) + /* This is the first time it's being attached */ + route_entry_attach_ref(re, new_nhghe); + +done: + /* Detach / deref previous nhg */ + if (old_nhg) + zebra_nhg_decrement_ref(old_nhg); + + return ret; +} + +void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry) +{ + struct zebra_router_table *zrt; + struct route_node *rn; + struct route_entry *re, *next; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p", + __func__, new_entry->id, new_entry, old_entry); + + /* We have to do them ALL */ + RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) { + for (rn = route_top(zrt->table); rn; + rn = srcdest_route_next(rn)) { + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (re->nhe && re->nhe == old_entry) + route_entry_update_nhe(re, new_entry); + } + } + } +} + +struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, + const union g_addr *addr, + struct route_node **rn_out) +{ + struct prefix p; + struct route_table *table; + struct route_node *rn; + struct route_entry *match = NULL; + + /* Lookup table. */ + table = zebra_vrf_table(afi, safi, vrf_id); + if (!table) + return 0; + + memset(&p, 0, sizeof(p)); + p.family = afi; + if (afi == AFI_IP) { + p.u.prefix4 = addr->ipv4; + p.prefixlen = IPV4_MAX_BITLEN; + } else { + p.u.prefix6 = addr->ipv6; + p.prefixlen = IPV6_MAX_BITLEN; + } + + rn = route_node_match(table, &p); + + while (rn) { + rib_dest_t *dest; + + route_unlock_node(rn); + + dest = rib_dest_from_rnode(rn); + if (dest && dest->selected_fib + && !CHECK_FLAG(dest->selected_fib->status, + ROUTE_ENTRY_REMOVED)) + match = dest->selected_fib; + + /* If there is no selected route or matched route is EGP, go up + tree. */ + if (!match) { + do { + rn = rn->parent; + } while (rn && rn->info == NULL); + if (rn) + route_lock_node(rn); + } else { + if (match->type != ZEBRA_ROUTE_CONNECT) { + if (!CHECK_FLAG(match->status, + ROUTE_ENTRY_INSTALLED)) + return NULL; + } + + if (rn_out) + *rn_out = rn; + return match; + } + } + return NULL; +} + +struct route_entry *rib_match_ipv4_multicast(vrf_id_t vrf_id, + struct in_addr addr, + struct route_node **rn_out) +{ + struct route_entry *re = NULL, *mre = NULL, *ure = NULL; + struct route_node *m_rn = NULL, *u_rn = NULL; + union g_addr gaddr = {.ipv4 = addr}; + + switch (zrouter.ipv4_multicast_mode) { + case MCAST_MRIB_ONLY: + return rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, + rn_out); + case MCAST_URIB_ONLY: + return rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, rn_out); + case MCAST_NO_CONFIG: + case MCAST_MIX_MRIB_FIRST: + re = mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, + &m_rn); + if (!mre) + re = ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, + &gaddr, &u_rn); + break; + case MCAST_MIX_DISTANCE: + mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); + ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); + if (mre && ure) + re = ure->distance < mre->distance ? ure : mre; + else if (mre) + re = mre; + else if (ure) + re = ure; + break; + case MCAST_MIX_PFXLEN: + mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); + ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); + if (mre && ure) + re = u_rn->p.prefixlen > m_rn->p.prefixlen ? ure : mre; + else if (mre) + re = mre; + else if (ure) + re = ure; + break; + } + + if (rn_out) + *rn_out = (re == mre) ? m_rn : u_rn; + + if (IS_ZEBRA_DEBUG_RIB) { + char buf[BUFSIZ]; + inet_ntop(AF_INET, &addr, buf, BUFSIZ); + + zlog_debug("%s: %s: vrf: %s(%u) found %s, using %s", __func__, + buf, vrf_id_to_name(vrf_id), vrf_id, + mre ? (ure ? "MRIB+URIB" : "MRIB") + : ure ? "URIB" : "nothing", + re == ure ? "URIB" : re == mre ? "MRIB" : "none"); + } + return re; +} + +struct route_entry *rib_match_ipv6_multicast(vrf_id_t vrf_id, + struct in6_addr addr, + struct route_node **rn_out) +{ + struct route_entry *re = NULL, *mre = NULL, *ure = NULL; + struct route_node *m_rn = NULL, *u_rn = NULL; + union g_addr gaddr = {.ipv6 = addr}; + + switch (zrouter.ipv4_multicast_mode) { + case MCAST_MRIB_ONLY: + return rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, + rn_out); + case MCAST_URIB_ONLY: + return rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, rn_out); + case MCAST_NO_CONFIG: + case MCAST_MIX_MRIB_FIRST: + re = mre = rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, + &m_rn); + if (!mre) + re = ure = rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, + &gaddr, &u_rn); + break; + case MCAST_MIX_DISTANCE: + mre = rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); + ure = rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); + if (mre && ure) + re = ure->distance < mre->distance ? ure : mre; + else if (mre) + re = mre; + else if (ure) + re = ure; + break; + case MCAST_MIX_PFXLEN: + mre = rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); + ure = rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); + if (mre && ure) + re = u_rn->p.prefixlen > m_rn->p.prefixlen ? ure : mre; + else if (mre) + re = mre; + else if (ure) + re = ure; + break; + } + + if (rn_out) + *rn_out = (re == mre) ? m_rn : u_rn; + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: %pI6: vrf: %s(%u) found %s, using %s", __func__, + &addr, vrf_id_to_name(vrf_id), vrf_id, + mre ? (ure ? "MRIB+URIB" : "MRIB") + : ure ? "URIB" : "nothing", + re == ure ? "URIB" : re == mre ? "MRIB" : "none"); + return re; +} + +struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, vrf_id_t vrf_id) +{ + struct route_table *table; + struct route_node *rn; + struct route_entry *match = NULL; + rib_dest_t *dest; + + /* Lookup table. */ + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id); + if (!table) + return 0; + + rn = route_node_lookup(table, (struct prefix *)p); + + /* No route for this prefix. */ + if (!rn) + return NULL; + + /* Unlock node. */ + route_unlock_node(rn); + dest = rib_dest_from_rnode(rn); + + if (dest && dest->selected_fib + && !CHECK_FLAG(dest->selected_fib->status, ROUTE_ENTRY_REMOVED)) + match = dest->selected_fib; + + if (!match) + return NULL; + + if (match->type == ZEBRA_ROUTE_CONNECT) + return match; + + if (CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) + return match; + + return NULL; +} + +/* + * Is this RIB labeled-unicast? It must be of type BGP and all paths + * (nexthops) must have a label. + */ +int zebra_rib_labeled_unicast(struct route_entry *re) +{ + struct nexthop *nexthop = NULL; + + if (re->type != ZEBRA_ROUTE_BGP) + return 0; + + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) + if (!nexthop->nh_label || !nexthop->nh_label->num_labels) + return 0; + + return 1; +} + +/* Update flag indicates whether this is a "replace" or not. Currently, this + * is only used for IPv4. + */ +void rib_install_kernel(struct route_node *rn, struct route_entry *re, + struct route_entry *old) +{ + struct nexthop *nexthop; + struct rib_table_info *info = srcdest_rnode_table_info(rn); + struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); + const struct prefix *p, *src_p; + enum zebra_dplane_result ret; + + rib_dest_t *dest = rib_dest_from_rnode(rn); + + srcdest_rnode_prefixes(rn, &p, &src_p); + + if (info->safi != SAFI_UNICAST) { + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + return; + } + + + /* + * Install the resolved nexthop object first. + */ + zebra_nhg_install_kernel(re->nhe); + + /* + * If this is a replace to a new RE let the originator of the RE + * know that they've lost + */ + if (old && (old != re) && (old->type != re->type)) + zsend_route_notify_owner(rn, old, ZAPI_ROUTE_BETTER_ADMIN_WON, + info->afi, info->safi); + + /* Update fib selection */ + dest->selected_fib = re; + + /* + * Make sure we update the FPM any time we send new information to + * the kernel. + */ + hook_call(rib_update, rn, "installing in kernel"); + + /* Send add or update */ + if (old) + ret = dplane_route_update(rn, re, old); + else + ret = dplane_route_add(rn, re); + + switch (ret) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + SET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + + if (old) { + SET_FLAG(old->status, ROUTE_ENTRY_QUEUED); + + /* Free old FIB nexthop group */ + UNSET_FLAG(old->status, ROUTE_ENTRY_USE_FIB_NHG); + if (old->fib_ng.nexthop) { + nexthops_free(old->fib_ng.nexthop); + old->fib_ng.nexthop = NULL; + } + } + + if (zvrf) + zvrf->installs_queued++; + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + { + flog_err(EC_ZEBRA_DP_INSTALL_FAIL, + "%u:%u:%pRN: Failed to enqueue dataplane install", + re->vrf_id, re->table, rn); + break; + } + case ZEBRA_DPLANE_REQUEST_SUCCESS: + if (zvrf) + zvrf->installs++; + break; + } + + return; +} + +/* Uninstall the route from kernel. */ +void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re) +{ + struct nexthop *nexthop; + struct rib_table_info *info = srcdest_rnode_table_info(rn); + struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); + + if (info->safi != SAFI_UNICAST) { + UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + return; + } + + /* + * Make sure we update the FPM any time we send new information to + * the dataplane. + */ + hook_call(rib_update, rn, "uninstalling from kernel"); + + switch (dplane_route_delete(rn, re)) { + case ZEBRA_DPLANE_REQUEST_QUEUED: + if (zvrf) + zvrf->removals_queued++; + break; + case ZEBRA_DPLANE_REQUEST_FAILURE: + flog_err(EC_ZEBRA_DP_INSTALL_FAIL, + "%u:%pRN: Failed to enqueue dataplane uninstall", + re->vrf_id, rn); + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + if (zvrf) + zvrf->removals++; + break; + } + + return; +} + +/* + * rib_can_delete_dest + * + * Returns true if the given dest can be deleted from the table. + */ +static int rib_can_delete_dest(rib_dest_t *dest) +{ + if (re_list_first(&dest->routes)) { + return 0; + } + + /* + * Unresolved rnh's are stored on the default route's list + * + * dest->rnode can also be the source prefix node in an + * ipv6 sourcedest table. Fortunately the prefix of a + * source prefix node can never be the default prefix. + */ + if (is_default_prefix(&dest->rnode->p)) + return 0; + + /* + * Don't delete the dest if we have to update the FPM about this + * prefix. + */ + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM) + || CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) + return 0; + + return 1; +} + +void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, + bool rt_delete) +{ + rib_dest_t *dest = rib_dest_from_rnode(rn); + struct rnh *rnh; + + /* + * We are storing the rnh's associated withb + * the tracked nexthop as a list of the rn's. + * Unresolved rnh's are placed at the top + * of the tree list.( 0.0.0.0/0 for v4 and 0::0/0 for v6 ) + * As such for each rn we need to walk up the tree + * and see if any rnh's need to see if they + * would match a more specific route + */ + while (rn) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + "%s: %pRN Being examined for Nexthop Tracking Count: %zd", + __func__, rn, + dest ? rnh_list_count(&dest->nht) : 0); + + if (rt_delete && (!dest || !rnh_list_count(&dest->nht))) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%pRN has no tracking NHTs. Bailing", + rn); + break; + } + if (!dest) { + rn = rn->parent; + if (rn) + dest = rib_dest_from_rnode(rn); + continue; + } + /* + * If we have any rnh's stored in the nht list + * then we know that this route node was used for + * nht resolution and as such we need to call the + * nexthop tracking evaluation code + */ + frr_each_safe(rnh_list, &dest->nht, rnh) { + struct zebra_vrf *zvrf = + zebra_vrf_lookup_by_id(rnh->vrf_id); + struct prefix *p = &rnh->node->p; + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + "%s(%u):%pRN has Nexthop(%pRN) depending on it, evaluating %u:%u", + zvrf_name(zvrf), zvrf_id(zvrf), rn, + rnh->node, seq, rnh->seqno); + + /* + * If we have evaluated this node on this pass + * already, due to following the tree up + * then we know that we can move onto the next + * rnh to process. + * + * Additionally we call zebra_evaluate_rnh + * when we gc the dest. In this case we know + * that there must be no other re's where + * we were originally as such we know that + * that sequence number is ok to respect. + */ + if (rnh->seqno == seq) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + " Node processed and moved already"); + continue; + } + + rnh->seqno = seq; + zebra_evaluate_rnh(zvrf, family2afi(p->family), 0, p, + rnh->safi); + } + + rn = rn->parent; + if (rn) + dest = rib_dest_from_rnode(rn); + } +} + +/* + * rib_gc_dest + * + * Garbage collect the rib dest corresponding to the given route node + * if appropriate. + * + * Returns true if the dest was deleted, false otherwise. + */ +int rib_gc_dest(struct route_node *rn) +{ + rib_dest_t *dest; + + dest = rib_dest_from_rnode(rn); + if (!dest) + return 0; + + if (!rib_can_delete_dest(dest)) + return 0; + + if (IS_ZEBRA_DEBUG_RIB) { + struct zebra_vrf *zvrf; + + zvrf = rib_dest_vrf(dest); + rnode_debug(rn, zvrf_id(zvrf), "removing dest from table"); + } + + zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), + true); + + dest->rnode = NULL; + rnh_list_fini(&dest->nht); + XFREE(MTYPE_RIB_DEST, dest); + rn->info = NULL; + + /* + * Release the one reference that we keep on the route node. + */ + route_unlock_node(rn); + return 1; +} + +void zebra_rtable_node_cleanup(struct route_table *table, + struct route_node *node) +{ + struct route_entry *re, *next; + + RNODE_FOREACH_RE_SAFE (node, re, next) { + rib_unlink(node, re); + } + + if (node->info) { + rib_dest_t *dest = node->info; + + /* Remove from update queue of FPM module */ + hook_call(rib_shutdown, node); + + rnh_list_fini(&dest->nht); + XFREE(MTYPE_RIB_DEST, node->info); + } +} + +static void rib_process_add_fib(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *new) +{ + hook_call(rib_update, rn, "new route selected"); + + /* Update real nexthop. This may actually determine if nexthop is active + * or not. */ + if (!nexthop_group_active_nexthop_num(&(new->nhe->nhg))) { + UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); + return; + } + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s(%u:%u):%pRN: Adding route rn %p, re %p (%s)", + zvrf_name(zvrf), zvrf_id(zvrf), new->table, rn, rn, + new, zebra_route_string(new->type)); + + /* If labeled-unicast route, install transit LSP. */ + if (zebra_rib_labeled_unicast(new)) + zebra_mpls_lsp_install(zvrf, rn, new); + + rib_install_kernel(rn, new, NULL); + + UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); +} + +static void rib_process_del_fib(struct zebra_vrf *zvrf, struct route_node *rn, + struct route_entry *old) +{ + hook_call(rib_update, rn, "removing existing route"); + + /* Uninstall from kernel. */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s(%u:%u):%pRN: Deleting route rn %p, re %p (%s)", + zvrf_name(zvrf), zvrf_id(zvrf), old->table, rn, rn, + old, zebra_route_string(old->type)); + + /* If labeled-unicast route, uninstall transit LSP. */ + if (zebra_rib_labeled_unicast(old)) + zebra_mpls_lsp_uninstall(zvrf, rn, old); + + rib_uninstall_kernel(rn, old); + + /* Update nexthop for route, reset changed flag. */ + /* Note: this code also handles the Linux case when an interface goes + * down, causing the kernel to delete routes without sending DELROUTE + * notifications + */ + if (RIB_KERNEL_ROUTE(old)) + SET_FLAG(old->status, ROUTE_ENTRY_REMOVED); + else + UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED); +} + +static void rib_process_update_fib(struct zebra_vrf *zvrf, + struct route_node *rn, + struct route_entry *old, + struct route_entry *new) +{ + int nh_active = 0; + + /* + * We have to install or update if a new route has been selected or + * something has changed. + */ + if (new != old || CHECK_FLAG(new->status, ROUTE_ENTRY_CHANGED)) { + hook_call(rib_update, rn, "updating existing route"); + + /* Update the nexthop; we could determine here that nexthop is + * inactive. */ + if (nexthop_group_active_nexthop_num(&(new->nhe->nhg))) + nh_active = 1; + + /* If nexthop is active, install the selected route, if + * appropriate. If + * the install succeeds, cleanup flags for prior route, if + * different from + * newly selected. + */ + if (nh_active) { + if (IS_ZEBRA_DEBUG_RIB) { + if (new != old) + zlog_debug( + "%s(%u:%u):%pRN: Updating route rn %p, re %p (%s) old %p (%s)", + zvrf_name(zvrf), zvrf_id(zvrf), + new->table, rn, rn, new, + zebra_route_string(new->type), + old, + zebra_route_string(old->type)); + else + zlog_debug( + "%s(%u:%u):%pRN: Updating route rn %p, re %p (%s)", + zvrf_name(zvrf), zvrf_id(zvrf), + new->table, rn, rn, new, + zebra_route_string(new->type)); + } + + /* If labeled-unicast route, uninstall transit LSP. */ + if (zebra_rib_labeled_unicast(old)) + zebra_mpls_lsp_uninstall(zvrf, rn, old); + + /* + * Non-system route should be installed. + * If labeled-unicast route, install transit + * LSP. + */ + if (zebra_rib_labeled_unicast(new)) + zebra_mpls_lsp_install(zvrf, rn, new); + + rib_install_kernel(rn, new, old); + } + + /* + * If nexthop for selected route is not active or install + * failed, we + * may need to uninstall and delete for redistribution. + */ + if (!nh_active) { + if (IS_ZEBRA_DEBUG_RIB) { + if (new != old) + zlog_debug( + "%s(%u:%u):%pRN: Deleting route rn %p, re %p (%s) old %p (%s) - nexthop inactive", + zvrf_name(zvrf), zvrf_id(zvrf), + new->table, rn, rn, new, + zebra_route_string(new->type), + old, + zebra_route_string(old->type)); + else + zlog_debug( + "%s(%u:%u):%pRN: Deleting route rn %p, re %p (%s) - nexthop inactive", + zvrf_name(zvrf), zvrf_id(zvrf), + new->table, rn, rn, new, + zebra_route_string(new->type)); + } + + /* + * When we have gotten to this point + * the new route entry has no nexthops + * that are usable and as such we need + * to remove the old route, but only + * if we were the one who installed + * the old route + */ + if (!RIB_SYSTEM_ROUTE(old)) { + /* If labeled-unicast route, uninstall transit + * LSP. */ + if (zebra_rib_labeled_unicast(old)) + zebra_mpls_lsp_uninstall(zvrf, rn, old); + + rib_uninstall_kernel(rn, old); + } + } + } else { + /* + * Same route selected; check if in the FIB and if not, + * re-install. This is housekeeping code to deal with + * race conditions in kernel with linux netlink reporting + * interface up before IPv4 or IPv6 protocol is ready + * to add routes. + */ + if (!CHECK_FLAG(new->status, ROUTE_ENTRY_INSTALLED) || + RIB_SYSTEM_ROUTE(new)) + rib_install_kernel(rn, new, NULL); + } + + /* Update prior route. */ + if (new != old) + UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED); + + /* Clear changed flag. */ + UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); +} + +/* Check if 'alternate' RIB entry is better than 'current'. */ +static struct route_entry *rib_choose_best(struct route_entry *current, + struct route_entry *alternate) +{ + if (current == NULL) + return alternate; + + /* filter route selection in following order: + * - connected beats other types + * - if both connected, loopback or vrf wins + * - lower distance beats higher + * - lower metric beats higher for equal distance + * - last, hence oldest, route wins tie break. + */ + + /* Connected routes. Check to see if either are a vrf + * or loopback interface. If not, pick the last connected + * route of the set of lowest metric connected routes. + */ + if (alternate->type == ZEBRA_ROUTE_CONNECT) { + if (current->type != ZEBRA_ROUTE_CONNECT) + return alternate; + + /* both are connected. are either loop or vrf? */ + struct nexthop *nexthop = NULL; + + for (ALL_NEXTHOPS(alternate->nhe->nhg, nexthop)) { + struct interface *ifp = if_lookup_by_index( + nexthop->ifindex, alternate->vrf_id); + + if (ifp && if_is_loopback(ifp)) + return alternate; + } + + for (ALL_NEXTHOPS(current->nhe->nhg, nexthop)) { + struct interface *ifp = if_lookup_by_index( + nexthop->ifindex, current->vrf_id); + + if (ifp && if_is_loopback(ifp)) + return current; + } + + /* Neither are loop or vrf so pick best metric */ + if (alternate->metric <= current->metric) + return alternate; + + return current; + } + + if (current->type == ZEBRA_ROUTE_CONNECT) + return current; + + /* higher distance loses */ + if (alternate->distance < current->distance) + return alternate; + if (current->distance < alternate->distance) + return current; + + /* metric tie-breaks equal distance */ + if (alternate->metric <= current->metric) + return alternate; + + return current; +} + +/* Core function for processing routing information base. */ +static void rib_process(struct route_node *rn) +{ + struct route_entry *re; + struct route_entry *next; + struct route_entry *old_selected = NULL; + struct route_entry *new_selected = NULL; + struct route_entry *old_fib = NULL; + struct route_entry *new_fib = NULL; + struct route_entry *best = NULL; + rib_dest_t *dest; + struct zebra_vrf *zvrf = NULL; + struct vrf *vrf; + + vrf_id_t vrf_id = VRF_UNKNOWN; + + assert(rn); + + dest = rib_dest_from_rnode(rn); + /* + * We have an enqueued node with nothing to process here + * let's just finish up and return; + */ + if (!dest) + return; + + zvrf = rib_dest_vrf(dest); + vrf_id = zvrf_id(zvrf); + + vrf = vrf_lookup_by_id(vrf_id); + + /* + * we can have rn's that have a NULL info pointer + * (dest). As such let's not let the deref happen + * additionally we know RNODE_FOREACH_RE_SAFE + * will not iterate so we are ok. + */ + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + struct route_entry *re = re_list_first(&dest->routes); + + zlog_debug("%s(%u:%u):%pRN: Processing rn %p", + VRF_LOGNAME(vrf), vrf_id, re->table, rn, + rn); + } + + old_fib = dest->selected_fib; + + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + char flags_buf[128]; + char status_buf[128]; + + zlog_debug( + "%s(%u:%u):%pRN: Examine re %p (%s) status: %sflags: %sdist %d metric %d", + VRF_LOGNAME(vrf), vrf_id, re->table, rn, re, + zebra_route_string(re->type), + _dump_re_status(re, status_buf, + sizeof(status_buf)), + zclient_dump_route_flags(re->flags, flags_buf, + sizeof(flags_buf)), + re->distance, re->metric); + } + + /* Currently selected re. */ + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) { + assert(old_selected == NULL); + old_selected = re; + } + + /* Skip deleted entries from selection */ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + + /* + * If the route entry has changed, verify/resolve + * the nexthops associated with the entry. + * + * In any event if we have nexthops that are not active + * then we cannot use this particular route entry so + * skip it. + */ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) { + if (!nexthop_active_update(rn, re)) { + const struct prefix *p; + struct rib_table_info *info; + + if (re->type == ZEBRA_ROUTE_TABLE) { + /* XXX: HERE BE DRAGONS!!!!! + * In all honesty, I have not yet + * figured out what this part does or + * why the ROUTE_ENTRY_CHANGED test + * above is correct or why we need to + * delete a route here, and also not + * whether this concerns both selected + * and fib route, or only selected + * or only fib + * + * This entry was denied by the 'ip + * protocol + * table' route-map, we need to delete + * it */ + if (re != old_selected) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s: %s(%u):%pRN: imported via import-table but denied by the ip protocol table route-map", + __func__, + VRF_LOGNAME( + vrf), + vrf_id, rn); + rib_unlink(rn, re); + } else + SET_FLAG(re->status, + ROUTE_ENTRY_REMOVED); + } + + info = srcdest_rnode_table_info(rn); + srcdest_rnode_prefixes(rn, &p, NULL); + zsend_route_notify_owner( + rn, re, ZAPI_ROUTE_FAIL_INSTALL, + info->afi, info->safi); + continue; + } + } else { + /* + * If the re has not changed and the nhg we have is + * not usable, then we cannot use this route entry + * for consideration, as that the route will just + * not install if it is selected. + */ + if (!nexthop_group_active_nexthop_num(&re->nhe->nhg)) + continue; + } + + /* Infinite distance. */ + if (re->distance == DISTANCE_INFINITY && + re->type != ZEBRA_ROUTE_KERNEL) { + UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + continue; + } + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_FIB_OVERRIDE)) { + best = rib_choose_best(new_fib, re); + if (new_fib && best != new_fib) + UNSET_FLAG(new_fib->status, + ROUTE_ENTRY_CHANGED); + new_fib = best; + } else { + best = rib_choose_best(new_selected, re); + if (new_selected && best != new_selected) + UNSET_FLAG(new_selected->status, + ROUTE_ENTRY_CHANGED); + new_selected = best; + } + if (best != re) + UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + } /* RNODE_FOREACH_RE */ + + /* If no FIB override route, use the selected route also for FIB */ + if (new_fib == NULL) + new_fib = new_selected; + + /* After the cycle is finished, the following pointers will be set: + * old_selected --- RE entry currently having SELECTED + * new_selected --- RE entry that is newly SELECTED + * old_fib --- RE entry currently in kernel FIB + * new_fib --- RE entry that is newly to be in kernel FIB + * + * new_selected will get SELECTED flag, and is going to be redistributed + * the zclients. new_fib (which can be new_selected) will be installed + * in kernel. + */ + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + struct route_entry *entry; + + entry = old_selected + ? old_selected + : new_selected + ? new_selected + : old_fib ? old_fib + : new_fib ? new_fib : NULL; + + zlog_debug( + "%s(%u:%u):%pRN: After processing: old_selected %p new_selected %p old_fib %p new_fib %p", + VRF_LOGNAME(vrf), vrf_id, entry ? entry->table : 0, rn, + (void *)old_selected, (void *)new_selected, + (void *)old_fib, (void *)new_fib); + } + + /* Buffer ROUTE_ENTRY_CHANGED here, because it will get cleared if + * fib == selected */ + bool selected_changed = new_selected && CHECK_FLAG(new_selected->status, + ROUTE_ENTRY_CHANGED); + + /* Update SELECTED entry */ + if (old_selected != new_selected || selected_changed) { + + if (new_selected && new_selected != new_fib) + UNSET_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED); + + if (new_selected) + SET_FLAG(new_selected->flags, ZEBRA_FLAG_SELECTED); + + if (old_selected) { + /* + * If we're removing the old entry, we should tell + * redist subscribers about that *if* they aren't + * going to see a redist for the new entry. + */ + if (!new_selected || CHECK_FLAG(old_selected->status, + ROUTE_ENTRY_REMOVED)) + redistribute_delete(rn, old_selected, + new_selected); + + if (old_selected != new_selected) + UNSET_FLAG(old_selected->flags, + ZEBRA_FLAG_SELECTED); + } + } + + /* Update fib according to selection results */ + if (new_fib && old_fib) + rib_process_update_fib(zvrf, rn, old_fib, new_fib); + else if (new_fib) + rib_process_add_fib(zvrf, rn, new_fib); + else if (old_fib) + rib_process_del_fib(zvrf, rn, old_fib); + + /* Remove all RE entries queued for removal */ + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { + if (IS_ZEBRA_DEBUG_RIB) { + rnode_debug(rn, vrf_id, "rn %p, removing re %p", + (void *)rn, (void *)re); + } + rib_unlink(rn, re); + } + } + + /* + * Check if the dest can be deleted now. + */ + rib_gc_dest(rn); +} + +static void zebra_rib_evaluate_mpls(struct route_node *rn) +{ + rib_dest_t *dest = rib_dest_from_rnode(rn); + struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT); + + if (!dest) + return; + + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_LSPS)) { + if (IS_ZEBRA_DEBUG_MPLS) + zlog_debug( + "%s(%u): Scheduling all LSPs upon RIB completion", + zvrf_name(zvrf), zvrf_id(zvrf)); + zebra_mpls_lsp_schedule(zvrf); + mpls_unmark_lsps_for_processing(rn); + } +} + +/* + * Utility to match route with dplane context data + */ +static bool rib_route_match_ctx(const struct route_entry *re, + const struct zebra_dplane_ctx *ctx, + bool is_update) +{ + bool result = false; + + if (is_update) { + /* + * In 'update' case, we test info about the 'previous' or + * 'old' route + */ + if ((re->type == dplane_ctx_get_old_type(ctx)) && + (re->instance == dplane_ctx_get_old_instance(ctx))) { + result = true; + + /* We use an extra test for statics, and another for + * kernel routes. + */ + if (re->type == ZEBRA_ROUTE_STATIC && + (re->distance != dplane_ctx_get_old_distance(ctx) || + re->tag != dplane_ctx_get_old_tag(ctx))) { + result = false; + } else if (re->type == ZEBRA_ROUTE_KERNEL && + re->metric != + dplane_ctx_get_old_metric(ctx)) { + result = false; + } + } + + } else { + /* + * Ordinary, single-route case using primary context info + */ + if ((dplane_ctx_get_op(ctx) != DPLANE_OP_ROUTE_DELETE) && + CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { + /* Skip route that's been deleted */ + goto done; + } + + if ((re->type == dplane_ctx_get_type(ctx)) && + (re->instance == dplane_ctx_get_instance(ctx))) { + result = true; + + /* We use an extra test for statics, and another for + * kernel routes. + */ + if (re->type == ZEBRA_ROUTE_STATIC && + (re->distance != dplane_ctx_get_distance(ctx) || + re->tag != dplane_ctx_get_tag(ctx))) { + result = false; + } else if (re->type == ZEBRA_ROUTE_KERNEL && + re->metric != dplane_ctx_get_metric(ctx)) { + result = false; + } else if (re->type == ZEBRA_ROUTE_CONNECT) { + result = nexthop_group_equal_no_recurse( + &re->nhe->nhg, dplane_ctx_get_ng(ctx)); + } + } + } + +done: + + return (result); +} + +static void zebra_rib_fixup_system(struct route_node *rn) +{ + struct route_entry *re; + + RNODE_FOREACH_RE(rn, re) { + struct nexthop *nhop; + + if (!RIB_SYSTEM_ROUTE(re)) + continue; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + + SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + + for (ALL_NEXTHOPS(re->nhe->nhg, nhop)) { + if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + SET_FLAG(nhop->flags, NEXTHOP_FLAG_FIB); + } + } +} + +/* Route comparison logic, with various special cases. */ +static bool rib_compare_routes(const struct route_entry *re1, + const struct route_entry *re2) +{ + if (re1->type != re2->type) + return false; + + if (re1->instance != re2->instance) + return false; + + if (re1->type == ZEBRA_ROUTE_KERNEL && re1->metric != re2->metric) + return false; + + if (CHECK_FLAG(re1->flags, ZEBRA_FLAG_RR_USE_DISTANCE) && + re1->distance != re2->distance) + return false; + + /* We support multiple connected routes: this supports multiple + * v6 link-locals, and we also support multiple addresses in the same + * subnet on a single interface. + */ + if (re1->type != ZEBRA_ROUTE_CONNECT) + return true; + + return false; +} + +/* + * Compare nexthop lists from a route and a dplane context; test whether + * the list installed in the FIB matches the route's list. + * Set 'changed_p' to 'true' if there were changes to the route's + * installed nexthops. + * + * Return 'false' if any ACTIVE route nexthops are not mentioned in the FIB + * list. + */ +static bool rib_update_nhg_from_ctx(struct nexthop_group *re_nhg, + const struct nexthop_group *ctx_nhg, + bool *changed_p) +{ + bool matched_p = true; + struct nexthop *nexthop, *ctx_nexthop; + + /* Get the first `installed` one to check against. + * If the dataplane doesn't set these to be what was actually installed, + * it will just be whatever was in re->nhe->nhg? + */ + ctx_nexthop = ctx_nhg->nexthop; + + if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_RECURSIVE) + || !CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop); + + for (ALL_NEXTHOPS_PTR(re_nhg, nexthop)) { + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + /* Check for a FIB nexthop corresponding to the RIB nexthop */ + if (!nexthop_same(ctx_nexthop, nexthop)) { + /* If the FIB doesn't know about the nexthop, + * it's not installed + */ + if (IS_ZEBRA_DEBUG_RIB_DETAILED || + IS_ZEBRA_DEBUG_NHG_DETAIL) { + zlog_debug("%s: no ctx match for rib nh %pNHv %s", + __func__, nexthop, + (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB) ? + "(FIB)":"")); + } + matched_p = false; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + *changed_p = true; + + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + /* Keep checking nexthops */ + continue; + } + + if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_FIB)) { + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: rib nh %pNHv -> installed", + __func__, nexthop); + + *changed_p = true; + } + + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } else { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: rib nh %pNHv -> uninstalled", + __func__, nexthop); + + *changed_p = true; + } + + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } + + ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop); + } + + return matched_p; +} + +/* + * Update a route from a dplane context. This consolidates common code + * that can be used in processing of results from FIB updates, and in + * async notification processing. + * The return is 'true' if the installed nexthops changed; 'false' otherwise. + */ +static bool rib_update_re_from_ctx(struct route_entry *re, + struct route_node *rn, + struct zebra_dplane_ctx *ctx) +{ + struct nexthop *nexthop; + bool matched; + const struct nexthop_group *ctxnhg; + struct nexthop_group *re_nhg; + bool is_selected = false; /* Is 're' currently the selected re? */ + bool changed_p = false; /* Change to nexthops? */ + rib_dest_t *dest; + struct vrf *vrf; + + vrf = vrf_lookup_by_id(re->vrf_id); + + dest = rib_dest_from_rnode(rn); + if (dest) + is_selected = (re == dest->selected_fib); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("update_from_ctx: %s(%u:%u):%pRN: %sSELECTED, re %p", + VRF_LOGNAME(vrf), re->vrf_id, re->table, rn, + (is_selected ? "" : "NOT "), re); + + /* Update zebra's nexthop FIB flag for each nexthop that was installed. + * If the installed set differs from the set requested by the rib/owner, + * we use the fib-specific nexthop-group to record the actual FIB + * status. + */ + matched = false; + ctxnhg = dplane_ctx_get_ng(ctx); + + /* Check route's fib group and incoming notif group for equivalence. + * + * Let's assume the nexthops are ordered here to save time. + */ + /* TODO -- this isn't testing or comparing the FIB flags; we should + * do a more explicit loop, checking the incoming notification's flags. + */ + if (re->fib_ng.nexthop && ctxnhg->nexthop && + nexthop_group_equal(&re->fib_ng, ctxnhg)) + matched = true; + + /* If the new FIB set matches the existing FIB set, we're done. */ + if (matched) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u:%u):%pRN update_from_ctx(): existing fib nhg, no change", + VRF_LOGNAME(vrf), re->vrf_id, re->table, rn); + goto check_backups; + + } else if (CHECK_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG)) { + /* + * Free stale fib list and move on to check the rib nhg. + */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u:%u):%pRN update_from_ctx(): replacing fib nhg", + VRF_LOGNAME(vrf), re->vrf_id, re->table, rn); + nexthops_free(re->fib_ng.nexthop); + re->fib_ng.nexthop = NULL; + + UNSET_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG); + + /* Note that the installed nexthops have changed */ + changed_p = true; + } else { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u:%u):%pRN update_from_ctx(): no fib nhg", + VRF_LOGNAME(vrf), re->vrf_id, re->table, rn); + } + + /* + * Compare with the rib nexthop group. The comparison here is different: + * the RIB group may be a superset of the list installed in the FIB. We + * walk the RIB group, looking for the 'installable' candidate + * nexthops, and then check those against the set + * that is actually installed. + * + * Assume nexthops are ordered here as well. + */ + + /* If nothing is installed, we can skip some of the checking/comparison + * of nexthops. + */ + if (ctxnhg->nexthop == NULL) { + changed_p = true; + goto no_nexthops; + } + + matched = rib_update_nhg_from_ctx(&(re->nhe->nhg), ctxnhg, &changed_p); + + /* If all nexthops were processed, we're done */ + if (matched) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u:%u):%pRN update_from_ctx(): rib nhg matched, changed '%s'", + VRF_LOGNAME(vrf), re->vrf_id, re->table, rn, + (changed_p ? "true" : "false")); + goto check_backups; + } + +no_nexthops: + + /* FIB nexthop set differs from the RIB set: + * create a fib-specific nexthop-group + */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u:%u):%pRN update_from_ctx(): changed %s, adding new fib nhg%s", + VRF_LOGNAME(vrf), re->vrf_id, re->table, rn, + (changed_p ? "true" : "false"), + ctxnhg->nexthop != NULL ? "" : " (empty)"); + + /* Set the flag about the dedicated fib list */ + SET_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG); + if (ctxnhg->nexthop) + copy_nexthops(&(re->fib_ng.nexthop), ctxnhg->nexthop, NULL); + +check_backups: + + /* + * Check the status of the route's backup nexthops, if any. + * The logic for backups is somewhat different: if any backup is + * installed, a new fib nhg will be attached to the route. + */ + re_nhg = zebra_nhg_get_backup_nhg(re->nhe); + if (re_nhg == NULL) + goto done; /* No backup nexthops */ + + /* First check the route's 'fib' list of backups, if it's present + * from some previous event. + */ + re_nhg = &re->fib_backup_ng; + ctxnhg = dplane_ctx_get_backup_ng(ctx); + + matched = false; + if (re_nhg->nexthop && ctxnhg && nexthop_group_equal(re_nhg, ctxnhg)) + matched = true; + + /* If the new FIB set matches an existing FIB set, we're done. */ + if (matched) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u):%pRN update_from_ctx(): existing fib backup nhg, no change", + VRF_LOGNAME(vrf), re->vrf_id, rn); + goto done; + + } else if (re->fib_backup_ng.nexthop) { + /* + * Free stale fib backup list and move on to check + * the route's backups. + */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u):%pRN update_from_ctx(): replacing fib backup nhg", + VRF_LOGNAME(vrf), re->vrf_id, rn); + nexthops_free(re->fib_backup_ng.nexthop); + re->fib_backup_ng.nexthop = NULL; + + /* Note that the installed nexthops have changed */ + changed_p = true; + } else { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u):%pRN update_from_ctx(): no fib backup nhg", + VRF_LOGNAME(vrf), re->vrf_id, rn); + } + + /* + * If a FIB backup nexthop set exists, attach a copy + * to the route if any backup is installed + */ + if (ctxnhg && ctxnhg->nexthop) { + + for (ALL_NEXTHOPS_PTR(ctxnhg, nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + break; + } + + /* If no installed backups, we're done */ + if (nexthop == NULL) + goto done; + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "%s(%u):%pRN update_from_ctx(): changed %s, adding new backup fib nhg", + VRF_LOGNAME(vrf), re->vrf_id, rn, + (changed_p ? "true" : "false")); + + copy_nexthops(&(re->fib_backup_ng.nexthop), ctxnhg->nexthop, + NULL); + } + +done: + + return changed_p; +} + +/* + * Helper to locate a zebra route-node from a dplane context. This is used + * when processing dplane results, e.g. Note well: the route-node is returned + * with a ref held - route_unlock_node() must be called eventually. + */ +struct route_node *rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx) +{ + struct route_table *table = NULL; + struct route_node *rn = NULL; + const struct prefix *dest_pfx, *src_pfx; + + /* Locate rn and re(s) from ctx */ + + table = zebra_vrf_lookup_table_with_table_id( + dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx), + dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx)); + if (table == NULL) { + if (IS_ZEBRA_DEBUG_DPLANE) { + zlog_debug( + "Failed to find route for ctx: no table for afi %d, safi %d, vrf %s(%u)", + dplane_ctx_get_afi(ctx), + dplane_ctx_get_safi(ctx), + vrf_id_to_name(dplane_ctx_get_vrf(ctx)), + dplane_ctx_get_vrf(ctx)); + } + goto done; + } + + dest_pfx = dplane_ctx_get_dest(ctx); + src_pfx = dplane_ctx_get_src(ctx); + + rn = srcdest_rnode_get(table, dest_pfx, + src_pfx ? (struct prefix_ipv6 *)src_pfx : NULL); + +done: + return rn; +} + + + +/* + * Route-update results processing after async dataplane update. + */ +static void rib_process_result(struct zebra_dplane_ctx *ctx) +{ + struct zebra_vrf *zvrf = NULL; + struct vrf *vrf; + struct route_node *rn = NULL; + struct route_entry *re = NULL, *old_re = NULL, *rib; + bool is_update = false; + enum dplane_op_e op; + enum zebra_dplane_result status; + uint32_t seq; + rib_dest_t *dest; + bool fib_changed = false; + struct rib_table_info *info; + bool rt_delete = false; + + zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx)); + vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); + + /* Locate rn and re(s) from ctx */ + rn = rib_find_rn_from_ctx(ctx); + if (rn == NULL) { + if (IS_ZEBRA_DEBUG_DPLANE) { + zlog_debug( + "Failed to process dplane results: no route for %s(%u):%pRN", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), rn); + } + goto done; + } + + dest = rib_dest_from_rnode(rn); + info = srcdest_rnode_table_info(rn); + + op = dplane_ctx_get_op(ctx); + status = dplane_ctx_get_status(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "%s(%u:%u):%pRN Processing dplane result ctx %p, op %s result %s", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn, ctx, dplane_op2str(op), + dplane_res2str(status)); + + /* + * Update is a bit of a special case, where we may have both old and new + * routes to post-process. + */ + is_update = dplane_ctx_is_update(ctx); + + /* + * Take a pass through the routes, look for matches with the context + * info. + */ + RNODE_FOREACH_RE(rn, rib) { + + if (re == NULL) { + if (rib_route_match_ctx(rib, ctx, false)) + re = rib; + } + + /* Check for old route match */ + if (is_update && (old_re == NULL)) { + if (rib_route_match_ctx(rib, ctx, true /*is_update*/)) + old_re = rib; + } + + /* Have we found the routes we need to work on? */ + if (re && ((!is_update || old_re))) + break; + } + + seq = dplane_ctx_get_seq(ctx); + + /* + * Check sequence number(s) to detect stale results before continuing + */ + if (re) { + if (re->dplane_sequence != seq) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "%s(%u):%pRN Stale dplane result for re %p", + VRF_LOGNAME(vrf), + dplane_ctx_get_vrf(ctx), rn, re); + } else { + if (!zrouter.asic_offloaded || + (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED) || + CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED))) + UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + } + } + + if (old_re) { + if (old_re->dplane_sequence != dplane_ctx_get_old_seq(ctx)) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "%s(%u:%u):%pRN Stale dplane result for old_re %p", + VRF_LOGNAME(vrf), + dplane_ctx_get_vrf(ctx), old_re->table, + rn, old_re); + } else + UNSET_FLAG(old_re->status, ROUTE_ENTRY_QUEUED); + } + + switch (op) { + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + if (re) { + UNSET_FLAG(re->status, ROUTE_ENTRY_FAILED); + SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + } + /* + * On an update operation from the same route type + * context retrieval currently has no way to know + * which was the old and which was the new. + * So don't unset our flags that we just set. + * We know redistribution is ok because the + * old_re in this case is used for nothing + * more than knowing whom to contact if necessary. + */ + if (old_re && old_re != re) { + UNSET_FLAG(old_re->status, ROUTE_ENTRY_FAILED); + UNSET_FLAG(old_re->status, + ROUTE_ENTRY_INSTALLED); + } + + /* Update zebra route based on the results in + * the context struct. + */ + if (re) { + fib_changed = + rib_update_re_from_ctx(re, rn, ctx); + + if (!fib_changed) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug( + "%s(%u:%u):%pRN no fib change for re", + VRF_LOGNAME(vrf), + dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table( + ctx), + rn); + } + + /* Redistribute if this is the selected re */ + if (dest && re == dest->selected_fib) + redistribute_update(rn, re, old_re); + } + + /* + * System routes are weird in that they + * allow multiple to be installed that match + * to the same prefix, so after we get the + * result we need to clean them up so that + * we can actually use them. + */ + if ((re && RIB_SYSTEM_ROUTE(re)) || + (old_re && RIB_SYSTEM_ROUTE(old_re))) + zebra_rib_fixup_system(rn); + + if (zvrf) + zvrf->installs++; + + /* Notify route owner */ + if (zebra_router_notify_on_ack()) + zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED); + else { + if (re) { + if (CHECK_FLAG(re->flags, + ZEBRA_FLAG_OFFLOADED)) + zsend_route_notify_owner_ctx( + ctx, + ZAPI_ROUTE_INSTALLED); + if (CHECK_FLAG( + re->flags, + ZEBRA_FLAG_OFFLOAD_FAILED)) + zsend_route_notify_owner_ctx( + ctx, + ZAPI_ROUTE_FAIL_INSTALL); + } + } + } else { + if (re) { + SET_FLAG(re->status, ROUTE_ENTRY_FAILED); + UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + } if (old_re) + SET_FLAG(old_re->status, ROUTE_ENTRY_FAILED); + if (re) + zsend_route_notify_owner( + rn, re, ZAPI_ROUTE_FAIL_INSTALL, + info->afi, info->safi); + + zlog_warn("%s(%u:%u):%pRN: Route install failed", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + } + break; + case DPLANE_OP_ROUTE_DELETE: + rt_delete = true; + if (re) + SET_FLAG(re->status, ROUTE_ENTRY_FAILED); + /* + * In the delete case, the zebra core datastructs were + * updated (or removed) at the time the delete was issued, + * so we're just notifying the route owner. + */ + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + if (re) { + UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + UNSET_FLAG(re->status, ROUTE_ENTRY_FAILED); + } + zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVED); + + if (zvrf) + zvrf->removals++; + } else { + if (re) + SET_FLAG(re->status, ROUTE_ENTRY_FAILED); + zsend_route_notify_owner_ctx(ctx, + ZAPI_ROUTE_REMOVE_FAIL); + + zlog_warn("%s(%u:%u):%pRN: Route Deletion failure", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + } + + /* + * System routes are weird in that they + * allow multiple to be installed that match + * to the same prefix, so after we get the + * result we need to clean them up so that + * we can actually use them. + */ + if ((re && RIB_SYSTEM_ROUTE(re)) || + (old_re && RIB_SYSTEM_ROUTE(old_re))) + zebra_rib_fixup_system(rn); + break; + default: + break; + } + + zebra_rib_evaluate_rn_nexthops(rn, seq, rt_delete); + zebra_rib_evaluate_mpls(rn); +done: + + if (rn) + route_unlock_node(rn); +} + +/* + * Count installed/FIB nexthops + */ +static int rib_count_installed_nh(struct route_entry *re) +{ + int count = 0; + struct nexthop *nexthop; + struct nexthop_group *nhg; + + nhg = rib_get_fib_nhg(re); + + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + /* The meaningful flag depends on where the installed + * nexthops reside. + */ + if (nhg == &(re->fib_ng)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + count++; + } else { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + count++; + } + } + + nhg = rib_get_fib_backup_nhg(re); + if (nhg) { + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + count++; + } + } + + return count; +} + +/* + * Handle notification from async dataplane: the dataplane has detected + * some change to a route, and notifies zebra so that the control plane + * can reflect that change. + */ +static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) +{ + struct route_node *rn = NULL; + struct route_entry *re = NULL; + struct vrf *vrf; + struct nexthop *nexthop; + rib_dest_t *dest; + bool fib_changed = false; + bool debug_p = IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_RIB; + int start_count, end_count; + + vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); + + /* Locate rn and re(s) from ctx */ + rn = rib_find_rn_from_ctx(ctx); + if (rn == NULL) { + if (debug_p) { + zlog_debug( + "Failed to process dplane notification: no routes for %s(%u:%u):%pRN", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + } + goto done; + } + + dest = rib_dest_from_rnode(rn); + + if (debug_p) + zlog_debug("%s(%u:%u):%pRN Processing dplane notif ctx %p", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn, ctx); + + /* + * Take a pass through the routes, look for matches with the context + * info. + */ + RNODE_FOREACH_RE(rn, re) { + if (rib_route_match_ctx(re, ctx, false /*!update*/)) + break; + } + + /* No match? Nothing we can do */ + if (re == NULL) { + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN Unable to process dplane notification: no entry for type %s", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn, + zebra_route_string(dplane_ctx_get_type(ctx))); + + goto done; + } + + /* Ensure we clear the QUEUED flag */ + if (!zrouter.asic_offloaded) + UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + + /* Is this a notification that ... matters? We mostly care about + * the route that is currently selected for installation; we may also + * get an un-install notification, and handle that too. + */ + if (re != dest->selected_fib) { + /* + * If we need to, clean up after a delete that was part of + * an update operation. + */ + end_count = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + end_count++; + } + + /* If no nexthops or none installed, ensure that this re + * gets its 'installed' flag cleared. + */ + if (end_count == 0) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) + UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN dplane notif, uninstalled type %s route", + VRF_LOGNAME(vrf), + dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn, + zebra_route_string( + dplane_ctx_get_type(ctx))); + } else { + /* At least report on the event. */ + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN dplane notif, but type %s not selected_fib", + VRF_LOGNAME(vrf), + dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn, + zebra_route_string( + dplane_ctx_get_type(ctx))); + } + goto done; + } + + /* We'll want to determine whether the installation status of the + * route has changed: we'll check the status before processing, + * and then again if there's been a change. + */ + start_count = 0; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) + start_count = rib_count_installed_nh(re); + + /* Update zebra's nexthop FIB flags based on the context struct's + * nexthops. + */ + fib_changed = rib_update_re_from_ctx(re, rn, ctx); + + if (!fib_changed) { + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN dplane notification: rib_update returns FALSE", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + } + + /* + * Perform follow-up work if the actual status of the prefix + * changed. + */ + end_count = rib_count_installed_nh(re); + + /* Various fib transitions: changed nexthops; from installed to + * not-installed; or not-installed to installed. + */ + if (start_count > 0 && end_count > 0) { + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN applied nexthop changes from dplane notification", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + + /* Changed nexthops - update kernel/others */ + dplane_route_notif_update(rn, re, + DPLANE_OP_ROUTE_UPDATE, ctx); + + } else if (start_count == 0 && end_count > 0) { + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN installed transition from dplane notification", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + + /* We expect this to be the selected route, so we want + * to tell others about this transition. + */ + SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + + /* Changed nexthops - update kernel/others */ + dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_UPDATE, ctx); + + /* Redistribute, lsp, and nht update */ + redistribute_update(rn, re, NULL); + + } else if (start_count > 0 && end_count == 0) { + if (debug_p) + zlog_debug( + "%s(%u:%u):%pRN un-installed transition from dplane notification", + VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), rn); + + /* Transition from _something_ installed to _nothing_ + * installed. + */ + /* We expect this to be the selected route, so we want + * to tell others about this transistion. + */ + UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + + /* Changed nexthops - update kernel/others */ + dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_DELETE, ctx); + + /* Redistribute, lsp, and nht update */ + redistribute_delete(rn, re, NULL); + } + + /* Make any changes visible for lsp and nexthop-tracking processing */ + zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), + false); + + zebra_rib_evaluate_mpls(rn); + +done: + if (rn) + route_unlock_node(rn); +} + +/* + * Process a node from the EVPN/VXLAN subqueue. + */ +static void process_subq_evpn(struct listnode *lnode) +{ + struct wq_evpn_wrapper *w; + + /* In general, the list node points to a wrapper object + * holding the info necessary to make some update. + */ + w = listgetdata(lnode); + if (!w) + return; + + if (w->type == WQ_EVPN_WRAPPER_TYPE_VRFROUTE) { + if (w->add_p) + zebra_vxlan_evpn_vrf_route_add(w->vrf_id, &w->macaddr, + &w->ip, &w->prefix); + else + zebra_vxlan_evpn_vrf_route_del(w->vrf_id, &w->ip, + &w->prefix); + } else if (w->type == WQ_EVPN_WRAPPER_TYPE_REM_ES) { + if (w->add_p) + zebra_evpn_remote_es_add(&w->esi, w->ip.ipaddr_v4, + w->esr_rxed, w->df_alg, + w->df_pref); + else + zebra_evpn_remote_es_del(&w->esi, w->ip.ipaddr_v4); + } else if (w->type == WQ_EVPN_WRAPPER_TYPE_REM_MACIP) { + uint16_t ipa_len = 0; + + if (w->ip.ipa_type == IPADDR_V4) + ipa_len = IPV4_MAX_BYTELEN; + else if (w->ip.ipa_type == IPADDR_V6) + ipa_len = IPV6_MAX_BYTELEN; + + if (w->add_p) + zebra_evpn_rem_macip_add(w->vni, &w->macaddr, ipa_len, + &w->ip, w->flags, w->seq, + w->vtep_ip, &w->esi); + else + zebra_evpn_rem_macip_del(w->vni, &w->macaddr, ipa_len, + &w->ip, w->vtep_ip); + } else if (w->type == WQ_EVPN_WRAPPER_TYPE_REM_VTEP) { + if (w->add_p) + zebra_vxlan_remote_vtep_add(w->vrf_id, w->vni, + w->vtep_ip, w->flags); + else + zebra_vxlan_remote_vtep_del(w->vrf_id, w->vni, + w->vtep_ip); + } + + + XFREE(MTYPE_WQ_WRAPPER, w); +} + +/* + * Process the nexthop-group workqueue subqueue + */ +static void process_subq_nhg(struct listnode *lnode) +{ + struct nhg_ctx *ctx; + struct nhg_hash_entry *nhe, *newnhe; + struct wq_nhg_wrapper *w; + uint8_t qindex = META_QUEUE_NHG; + + w = listgetdata(lnode); + + if (!w) + return; + + /* Two types of object - an update from the local kernel, or + * an nhg update from a daemon. + */ + if (w->type == WQ_NHG_WRAPPER_TYPE_CTX) { + ctx = w->u.ctx; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "NHG Context id=%u dequeued from sub-queue %s", + ctx->id, subqueue2str(qindex)); + + + /* Process nexthop group updates coming 'up' from the OS */ + nhg_ctx_process(ctx); + + } else if (w->type == WQ_NHG_WRAPPER_TYPE_NHG) { + nhe = w->u.nhe; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("NHG %u dequeued from sub-queue %s", nhe->id, + subqueue2str(qindex)); + + /* Process incoming nhg update, probably from a proto daemon */ + newnhe = zebra_nhg_proto_add(nhe->id, nhe->type, + nhe->zapi_instance, + nhe->zapi_session, &nhe->nhg, 0); + + /* Report error to daemon via ZAPI */ + if (newnhe == NULL) + zsend_nhg_notify(nhe->type, nhe->zapi_instance, + nhe->zapi_session, nhe->id, + ZAPI_NHG_FAIL_INSTALL); + + /* Free temp nhe - we own that memory. */ + zebra_nhg_free(nhe); + } + + XFREE(MTYPE_WQ_WRAPPER, w); +} + +static void process_subq_early_label(struct listnode *lnode) +{ + struct wq_label_wrapper *w = listgetdata(lnode); + struct zebra_vrf *zvrf; + + if (!w) + return; + + zvrf = vrf_info_lookup(w->vrf_id); + if (!zvrf) { + XFREE(MTYPE_WQ_WRAPPER, w); + return; + } + + switch (w->type) { + case WQ_LABEL_FTN_UNINSTALL: + zebra_mpls_ftn_uninstall(zvrf, w->ltype, &w->p, w->route_type, + w->route_instance); + break; + case WQ_LABEL_LABELS_PROCESS: + zebra_mpls_zapi_labels_process(w->add_p, zvrf, &w->zl); + break; + } + + XFREE(MTYPE_WQ_WRAPPER, w); +} + +static void process_subq_route(struct listnode *lnode, uint8_t qindex) +{ + struct route_node *rnode = NULL; + rib_dest_t *dest = NULL; + struct zebra_vrf *zvrf = NULL; + + rnode = listgetdata(lnode); + dest = rib_dest_from_rnode(rnode); + assert(dest); + + zvrf = rib_dest_vrf(dest); + + rib_process(rnode); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + struct route_entry *re = NULL; + + /* + * rib_process may have freed the dest + * as part of the garbage collection. Let's + * prevent stupidity from happening. + */ + dest = rib_dest_from_rnode(rnode); + if (dest) + re = re_list_first(&dest->routes); + + zlog_debug("%s(%u:%u):%pRN rn %p dequeued from sub-queue %s", + zvrf_name(zvrf), zvrf_id(zvrf), re ? re->table : 0, + rnode, rnode, subqueue2str(qindex)); + } + + if (rnode->info) + UNSET_FLAG(rib_dest_from_rnode(rnode)->flags, + RIB_ROUTE_QUEUED(qindex)); + + route_unlock_node(rnode); +} + +static void rib_re_nhg_free(struct route_entry *re) +{ + if (re->nhe && re->nhe_id) { + assert(re->nhe->id == re->nhe_id); + route_entry_update_nhe(re, NULL); + } else if (re->nhe && re->nhe->nhg.nexthop) + nexthops_free(re->nhe->nhg.nexthop); + + nexthops_free(re->fib_ng.nexthop); +} + +struct zebra_early_route { + afi_t afi; + safi_t safi; + struct prefix p; + struct prefix_ipv6 src_p; + bool src_p_provided; + struct route_entry *re; + struct nhg_hash_entry *re_nhe; + bool startup; + bool deletion; + bool fromkernel; +}; + +static void early_route_memory_free(struct zebra_early_route *ere) +{ + if (ere->re_nhe) + zebra_nhg_free(ere->re_nhe); + + XFREE(MTYPE_RE, ere->re); + XFREE(MTYPE_WQ_WRAPPER, ere); +} + +static void process_subq_early_route_add(struct zebra_early_route *ere) +{ + struct route_entry *re = ere->re; + struct route_table *table; + struct nhg_hash_entry *nhe = NULL; + struct route_node *rn; + struct route_entry *same = NULL, *first_same = NULL; + int same_count = 0; + rib_dest_t *dest; + + /* Lookup table. */ + table = zebra_vrf_get_table_with_table_id(ere->afi, ere->safi, + re->vrf_id, re->table); + if (!table) { + early_route_memory_free(ere); + return; + } + + if (re->nhe_id > 0) { + nhe = zebra_nhg_lookup_id(re->nhe_id); + + if (!nhe) { + /* + * We've received from the kernel a nexthop id + * that we don't have saved yet. More than likely + * it has not been processed and is on the + * queue to be processed. Let's stop what we + * are doing and cause the meta q to be processed + * storing this for later. + * + * This is being done this way because zebra + * runs with the assumption t + */ + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Zebra failed to find the nexthop hash entry for id=%u in a route entry %pFX", + re->nhe_id, &ere->p); + + early_route_memory_free(ere); + return; + } + } else { + /* Lookup nhe from route information */ + nhe = zebra_nhg_rib_find_nhe(ere->re_nhe, ere->afi); + if (!nhe) { + char buf2[PREFIX_STRLEN] = ""; + + flog_err( + EC_ZEBRA_TABLE_LOOKUP_FAILED, + "Zebra failed to find or create a nexthop hash entry for %pFX%s%s", + &ere->p, ere->src_p_provided ? " from " : "", + ere->src_p_provided + ? prefix2str(&ere->src_p, buf2, + sizeof(buf2)) + : ""); + + early_route_memory_free(ere); + return; + } + } + + /* + * Attach the re to the nhe's nexthop group. + * + * TODO: This will need to change when we start getting IDs from upper + * level protocols, as the refcnt might be wrong, since it checks + * if old_id != new_id. + */ + route_entry_update_nhe(re, nhe); + + /* Make it sure prefixlen is applied to the prefix. */ + apply_mask(&ere->p); + if (ere->src_p_provided) + apply_mask_ipv6(&ere->src_p); + + /* Set default distance by route type. */ + if (re->distance == 0) + re->distance = route_distance(re->type); + + /* Lookup route node.*/ + rn = srcdest_rnode_get(table, &ere->p, + ere->src_p_provided ? &ere->src_p : NULL); + + /* + * If same type of route are installed, treat it as a implicit + * withdraw. If the user has specified the No route replace semantics + * for the install don't do a route replace. + */ + RNODE_FOREACH_RE (rn, same) { + if (CHECK_FLAG(same->status, ROUTE_ENTRY_REMOVED)) { + same_count++; + continue; + } + + /* Compare various route_entry properties */ + if (rib_compare_routes(re, same)) { + same_count++; + + if (first_same == NULL) + first_same = same; + } + } + + same = first_same; + + if (!ere->startup && (re->flags & ZEBRA_FLAG_SELFROUTE) && + zrouter.asic_offloaded) { + if (!same) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug( + "prefix: %pRN is a self route where we do not have an entry for it. Dropping this update, it's useless", + rn); + /* + * We are not on startup, this is a self route + * and we have asic offload. Which means + * we are getting a callback for a entry + * that was already deleted to the kernel + * but an earlier response was just handed + * back. Drop it on the floor + */ + early_route_memory_free(ere); + return; + } + } + + /* If this route is kernel/connected route, notify the dataplane. */ + if (RIB_SYSTEM_ROUTE(re)) { + /* Notify dataplane */ + dplane_sys_route_add(rn, re); + } + + /* Link new re to node.*/ + if (IS_ZEBRA_DEBUG_RIB) { + rnode_debug( + rn, re->vrf_id, + "Inserting route rn %p, re %p (%s) existing %p, same_count %d", + rn, re, zebra_route_string(re->type), same, same_count); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + route_entry_dump( + &ere->p, + ere->src_p_provided ? &ere->src_p : NULL, re); + } + + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + rib_addnode(rn, re, 1); + + /* Free implicit route.*/ + if (same) + rib_delnode(rn, same); + + /* See if we can remove some RE entries that are queued for + * removal, but won't be considered in rib processing. + */ + dest = rib_dest_from_rnode(rn); + RNODE_FOREACH_RE_SAFE (rn, re, same) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { + /* If the route was used earlier, must retain it. */ + if (dest && re == dest->selected_fib) + continue; + + if (IS_ZEBRA_DEBUG_RIB) + rnode_debug(rn, re->vrf_id, + "rn %p, removing unneeded re %p", + rn, re); + + rib_unlink(rn, re); + } + } + + route_unlock_node(rn); + if (ere->re_nhe) + zebra_nhg_free(ere->re_nhe); + XFREE(MTYPE_WQ_WRAPPER, ere); +} + +static void process_subq_early_route_delete(struct zebra_early_route *ere) +{ + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + struct route_entry *fib = NULL; + struct route_entry *same = NULL; + struct nexthop *rtnh; + char buf2[INET6_ADDRSTRLEN]; + rib_dest_t *dest; + + if (ere->src_p_provided) + assert(!ere->src_p.prefixlen || ere->afi == AFI_IP6); + + /* Lookup table. */ + table = zebra_vrf_lookup_table_with_table_id( + ere->afi, ere->safi, ere->re->vrf_id, ere->re->table); + if (!table) { + early_route_memory_free(ere); + return; + } + + /* Apply mask. */ + apply_mask(&ere->p); + if (ere->src_p_provided) + apply_mask_ipv6(&ere->src_p); + + /* Lookup route node. */ + rn = srcdest_rnode_lookup(table, &ere->p, + ere->src_p_provided ? &ere->src_p : NULL); + if (!rn) { + if (IS_ZEBRA_DEBUG_RIB) { + char src_buf[PREFIX_STRLEN]; + struct vrf *vrf = vrf_lookup_by_id(ere->re->vrf_id); + + if (ere->src_p_provided && ere->src_p.prefixlen) + prefix2str(&ere->src_p, src_buf, + sizeof(src_buf)); + else + src_buf[0] = '\0'; + + zlog_debug("%s[%d]:%pRN%s%s doesn't exist in rib", + vrf->name, ere->re->table, rn, + (src_buf[0] != '\0') ? " from " : "", + src_buf); + } + early_route_memory_free(ere); + return; + } + + dest = rib_dest_from_rnode(rn); + fib = dest->selected_fib; + + struct nexthop *nh = NULL; + + if (ere->re->nhe) + nh = ere->re->nhe->nhg.nexthop; + + /* Lookup same type route. */ + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + + if (re->type != ere->re->type) + continue; + if (re->instance != ere->re->instance) + continue; + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_RR_USE_DISTANCE) && + ere->re->distance != re->distance) + continue; + + if (re->type == ZEBRA_ROUTE_KERNEL && + re->metric != ere->re->metric) + continue; + if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = nh) && + rtnh->type == NEXTHOP_TYPE_IFINDEX && nh) { + if (rtnh->ifindex != nh->ifindex) + continue; + same = re; + break; + } + + /* Make sure that the route found has the same gateway. */ + if (ere->re->nhe_id && re->nhe_id == ere->re->nhe_id) { + same = re; + break; + } + + if (nh == NULL) { + same = re; + break; + } + for (ALL_NEXTHOPS(re->nhe->nhg, rtnh)) { + /* + * No guarantee all kernel send nh with labels + * on delete. + */ + if (nexthop_same_no_labels(rtnh, nh)) { + same = re; + break; + } + } + + if (same) + break; + } + /* + * If same type of route can't be found and this message is from + * kernel. + */ + if (!same) { + /* + * In the past(HA!) we could get here because + * we were receiving a route delete from the + * kernel and we're not marking the proto + * as coming from it's appropriate originator. + * Now that we are properly noticing the fact + * that the kernel has deleted our route we + * are not going to get called in this path + * I am going to leave this here because + * this might still work this way on non-linux + * platforms as well as some weird state I have + * not properly thought of yet. + * If we can show that this code path is + * dead then we can remove it. + */ + if (fib && CHECK_FLAG(ere->re->flags, ZEBRA_FLAG_SELFROUTE)) { + if (IS_ZEBRA_DEBUG_RIB) { + rnode_debug( + rn, ere->re->vrf_id, + "rn %p, re %p (%s) was deleted from kernel, adding", + rn, fib, zebra_route_string(fib->type)); + } + if (zrouter.allow_delete || + CHECK_FLAG(dest->flags, RIB_ROUTE_ANY_QUEUED)) { + UNSET_FLAG(fib->status, ROUTE_ENTRY_INSTALLED); + /* Unset flags. */ + for (rtnh = fib->nhe->nhg.nexthop; rtnh; + rtnh = rtnh->next) + UNSET_FLAG(rtnh->flags, + NEXTHOP_FLAG_FIB); + + /* + * This is a non FRR route + * as such we should mark + * it as deleted + */ + dest->selected_fib = NULL; + } else { + /* + * This means someone else, other than Zebra, + * has deleted a Zebra router from the kernel. + * We will add it back + */ + rib_install_kernel(rn, fib, NULL); + } + } else { + if (IS_ZEBRA_DEBUG_RIB) { + if (nh) + rnode_debug( + rn, ere->re->vrf_id, + "via %s ifindex %d type %d doesn't exist in rib", + inet_ntop(afi2family(ere->afi), + &nh->gate, buf2, + sizeof(buf2)), + nh->ifindex, ere->re->type); + else + rnode_debug( + rn, ere->re->vrf_id, + "type %d doesn't exist in rib", + ere->re->type); + } + route_unlock_node(rn); + early_route_memory_free(ere); + return; + } + } + + if (same) { + struct nexthop *tmp_nh; + + if (ere->fromkernel && + CHECK_FLAG(ere->re->flags, ZEBRA_FLAG_SELFROUTE) && + !zrouter.allow_delete) { + rib_install_kernel(rn, same, NULL); + route_unlock_node(rn); + + early_route_memory_free(ere); + return; + } + + /* Special handling for IPv4 or IPv6 routes sourced from + * EVPN - the nexthop (and associated MAC) need to be + * uninstalled if no more refs. + */ + for (ALL_NEXTHOPS(re->nhe->nhg, tmp_nh)) { + struct ipaddr vtep_ip; + + if (CHECK_FLAG(tmp_nh->flags, NEXTHOP_FLAG_EVPN)) { + memset(&vtep_ip, 0, sizeof(struct ipaddr)); + if (ere->afi == AFI_IP) { + vtep_ip.ipa_type = IPADDR_V4; + memcpy(&(vtep_ip.ipaddr_v4), + &(tmp_nh->gate.ipv4), + sizeof(struct in_addr)); + } else { + vtep_ip.ipa_type = IPADDR_V6; + memcpy(&(vtep_ip.ipaddr_v6), + &(tmp_nh->gate.ipv6), + sizeof(struct in6_addr)); + } + zebra_rib_queue_evpn_route_del( + re->vrf_id, &vtep_ip, &ere->p); + } + } + + /* Notify dplane if system route changes */ + if (RIB_SYSTEM_ROUTE(re)) + dplane_sys_route_del(rn, same); + + rib_delnode(rn, same); + } + + route_unlock_node(rn); + + early_route_memory_free(ere); +} + +/* + * When FRR receives a route we need to match the route up to + * nexthop groups. That we also may have just received + * place the data on this queue so that this work of finding + * the nexthop group entries for the route entry is always + * done after the nexthop group has had a chance to be processed + */ +static void process_subq_early_route(struct listnode *lnode) +{ + struct zebra_early_route *ere = listgetdata(lnode); + + if (ere->deletion) + process_subq_early_route_delete(ere); + else + process_subq_early_route_add(ere); +} + +/* + * Examine the specified subqueue; process one entry and return 1 if + * there is a node, return 0 otherwise. + */ +static unsigned int process_subq(struct list *subq, + enum meta_queue_indexes qindex) +{ + struct listnode *lnode = listhead(subq); + + if (!lnode) + return 0; + + switch (qindex) { + case META_QUEUE_EVPN: + process_subq_evpn(lnode); + break; + case META_QUEUE_NHG: + process_subq_nhg(lnode); + break; + case META_QUEUE_EARLY_ROUTE: + process_subq_early_route(lnode); + break; + case META_QUEUE_EARLY_LABEL: + process_subq_early_label(lnode); + break; + case META_QUEUE_CONNECTED: + case META_QUEUE_KERNEL: + case META_QUEUE_STATIC: + case META_QUEUE_NOTBGP: + case META_QUEUE_BGP: + case META_QUEUE_OTHER: + process_subq_route(lnode, qindex); + break; + } + + list_delete_node(subq, lnode); + + return 1; +} + +/* Dispatch the meta queue by picking and processing the next node from + * a non-empty sub-queue with lowest priority. wq is equal to zebra->ribq and + * data is pointed to the meta queue structure. + */ +static wq_item_status meta_queue_process(struct work_queue *dummy, void *data) +{ + struct meta_queue *mq = data; + unsigned i; + uint32_t queue_len, queue_limit; + + /* Ensure there's room for more dataplane updates */ + queue_limit = dplane_get_in_queue_limit(); + queue_len = dplane_get_in_queue_len(); + if (queue_len > queue_limit) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "rib queue: dplane queue len %u, limit %u, retrying", + queue_len, queue_limit); + + /* Ensure that the meta-queue is actually enqueued */ + if (work_queue_empty(zrouter.ribq)) + work_queue_add(zrouter.ribq, zrouter.mq); + + return WQ_QUEUE_BLOCKED; + } + + for (i = 0; i < MQ_SIZE; i++) + if (process_subq(mq->subq[i], i)) { + mq->size--; + break; + } + return mq->size ? WQ_REQUEUE : WQ_SUCCESS; +} + + +/* + * Look into the RN and queue it into the highest priority queue + * at this point in time for processing. + * + * We will enqueue a route node only once per invocation. + * + * There are two possibilities here that should be kept in mind. + * If the original invocation has not been pulled off for processing + * yet, A subsuquent invocation can have a route entry with a better + * meta queue index value and we can have a situation where + * we might have the same node enqueued 2 times. Not necessarily + * an optimal situation but it should be ok. + * + * The other possibility is that the original invocation has not + * been pulled off for processing yet, A subsusquent invocation + * doesn't have a route_entry with a better meta-queue and the + * original metaqueue index value will win and we'll end up with + * the route node enqueued once. + */ +static int rib_meta_queue_add(struct meta_queue *mq, void *data) +{ + struct route_node *rn = NULL; + struct route_entry *re = NULL, *curr_re = NULL; + uint8_t qindex = MQ_SIZE, curr_qindex = MQ_SIZE; + + rn = (struct route_node *)data; + + RNODE_FOREACH_RE (rn, curr_re) { + curr_qindex = route_info[curr_re->type].meta_q_map; + + if (curr_qindex <= qindex) { + re = curr_re; + qindex = curr_qindex; + } + } + + if (!re) + return -1; + + /* Invariant: at this point we always have rn->info set. */ + if (CHECK_FLAG(rib_dest_from_rnode(rn)->flags, + RIB_ROUTE_QUEUED(qindex))) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + rnode_debug(rn, re->vrf_id, + "rn %p is already queued in sub-queue %s", + (void *)rn, subqueue2str(qindex)); + return -1; + } + + SET_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_QUEUED(qindex)); + listnode_add(mq->subq[qindex], rn); + route_lock_node(rn); + mq->size++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %s", + (void *)rn, subqueue2str(qindex)); + + return 0; +} + +static int early_label_meta_queue_add(struct meta_queue *mq, void *data) +{ + listnode_add(mq->subq[META_QUEUE_EARLY_LABEL], data); + mq->size++; + return 0; +} + +static int rib_meta_queue_nhg_ctx_add(struct meta_queue *mq, void *data) +{ + struct nhg_ctx *ctx = NULL; + uint8_t qindex = META_QUEUE_NHG; + struct wq_nhg_wrapper *w; + + ctx = (struct nhg_ctx *)data; + + if (!ctx) + return -1; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_nhg_wrapper)); + + w->type = WQ_NHG_WRAPPER_TYPE_CTX; + w->u.ctx = ctx; + + listnode_add(mq->subq[qindex], w); + mq->size++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("NHG Context id=%u queued into sub-queue %s", + ctx->id, subqueue2str(qindex)); + + return 0; +} + +static int rib_meta_queue_nhg_add(struct meta_queue *mq, void *data) +{ + struct nhg_hash_entry *nhe = NULL; + uint8_t qindex = META_QUEUE_NHG; + struct wq_nhg_wrapper *w; + + nhe = (struct nhg_hash_entry *)data; + + if (!nhe) + return -1; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_nhg_wrapper)); + + w->type = WQ_NHG_WRAPPER_TYPE_NHG; + w->u.nhe = nhe; + + listnode_add(mq->subq[qindex], w); + mq->size++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("NHG id=%u queued into sub-queue %s", nhe->id, + subqueue2str(qindex)); + + return 0; +} + +static int rib_meta_queue_evpn_add(struct meta_queue *mq, void *data) +{ + listnode_add(mq->subq[META_QUEUE_EVPN], data); + mq->size++; + + return 0; +} + +static int mq_add_handler(void *data, + int (*mq_add_func)(struct meta_queue *mq, void *data)) +{ + if (zrouter.ribq == NULL) { + flog_err(EC_ZEBRA_WQ_NONEXISTENT, + "%s: work_queue does not exist!", __func__); + return -1; + } + + /* + * The RIB queue should normally be either empty or holding the only + * work_queue_item element. In the latter case this element would + * hold a pointer to the meta queue structure, which must be used to + * actually queue the route nodes to process. So create the MQ + * holder, if necessary, then push the work into it in any case. + * This semantics was introduced after 0.99.9 release. + */ + if (work_queue_empty(zrouter.ribq)) + work_queue_add(zrouter.ribq, zrouter.mq); + + return mq_add_func(zrouter.mq, data); +} + +void mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, + struct prefix *prefix, uint8_t route_type, + uint8_t route_instance) +{ + struct wq_label_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_label_wrapper)); + + w->type = WQ_LABEL_FTN_UNINSTALL; + w->vrf_id = zvrf->vrf->vrf_id; + w->p = *prefix; + w->ltype = type; + w->route_type = route_type; + w->route_instance = route_instance; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("Early Label Handling for %pFX", prefix); + + mq_add_handler(w, early_label_meta_queue_add); +} + +void mpls_zapi_labels_process(bool add_p, struct zebra_vrf *zvrf, + const struct zapi_labels *zl) +{ + struct wq_label_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_label_wrapper)); + w->type = WQ_LABEL_LABELS_PROCESS; + w->vrf_id = zvrf->vrf->vrf_id; + w->add_p = add_p; + w->zl = *zl; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("Early Label Handling: Labels Process"); + + mq_add_handler(w, early_label_meta_queue_add); +} + +/* Add route_node to work queue and schedule processing */ +int rib_queue_add(struct route_node *rn) +{ + assert(rn); + + /* Pointless to queue a route_node with no RIB entries to add or remove + */ + if (!rnode_to_ribs(rn)) { + zlog_debug("%s: called for route_node (%p, %u) with no ribs", + __func__, (void *)rn, route_node_get_lock_count(rn)); + zlog_backtrace(LOG_DEBUG); + return -1; + } + + return mq_add_handler(rn, rib_meta_queue_add); +} + +/* + * Enqueue incoming nhg info from OS for processing + */ +int rib_queue_nhg_ctx_add(struct nhg_ctx *ctx) +{ + assert(ctx); + + return mq_add_handler(ctx, rib_meta_queue_nhg_ctx_add); +} + +/* + * Enqueue incoming nhg from proto daemon for processing + */ +int rib_queue_nhe_add(struct nhg_hash_entry *nhe) +{ + if (nhe == NULL) + return -1; + + return mq_add_handler(nhe, rib_meta_queue_nhg_add); +} + +/* + * Enqueue evpn route for processing + */ +int zebra_rib_queue_evpn_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix) +{ + struct wq_evpn_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_VRFROUTE; + w->add_p = true; + w->vrf_id = vrf_id; + w->macaddr = *rmac; + w->ip = *vtep_ip; + w->prefix = *host_prefix; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: (%u)%pIA, host prefix %pFX enqueued", __func__, + vrf_id, vtep_ip, host_prefix); + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +int zebra_rib_queue_evpn_route_del(vrf_id_t vrf_id, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix) +{ + struct wq_evpn_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_VRFROUTE; + w->add_p = false; + w->vrf_id = vrf_id; + w->ip = *vtep_ip; + w->prefix = *host_prefix; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: (%u)%pIA, host prefix %pFX enqueued", __func__, + vrf_id, vtep_ip, host_prefix); + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +/* Enqueue EVPN remote ES for processing */ +int zebra_rib_queue_evpn_rem_es_add(const esi_t *esi, + const struct in_addr *vtep_ip, + bool esr_rxed, uint8_t df_alg, + uint16_t df_pref) +{ + struct wq_evpn_wrapper *w; + char buf[ESI_STR_LEN]; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_REM_ES; + w->add_p = true; + w->esi = *esi; + w->ip.ipa_type = IPADDR_V4; + w->ip.ipaddr_v4 = *vtep_ip; + w->esr_rxed = esr_rxed; + w->df_alg = df_alg; + w->df_pref = df_pref; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: vtep %pI4, esi %s enqueued", __func__, vtep_ip, + esi_to_str(esi, buf, sizeof(buf))); + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +int zebra_rib_queue_evpn_rem_es_del(const esi_t *esi, + const struct in_addr *vtep_ip) +{ + struct wq_evpn_wrapper *w; + char buf[ESI_STR_LEN]; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_REM_ES; + w->add_p = false; + w->esi = *esi; + w->ip.ipa_type = IPADDR_V4; + w->ip.ipaddr_v4 = *vtep_ip; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + if (memcmp(esi, zero_esi, sizeof(esi_t)) != 0) + esi_to_str(esi, buf, sizeof(buf)); + else + strlcpy(buf, "-", sizeof(buf)); + + zlog_debug("%s: vtep %pI4, esi %s enqueued", __func__, vtep_ip, + buf); + } + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +/* + * Enqueue EVPN remote macip update for processing + */ +int zebra_rib_queue_evpn_rem_macip_add(vni_t vni, const struct ethaddr *macaddr, + const struct ipaddr *ipaddr, + uint8_t flags, uint32_t seq, + struct in_addr vtep_ip, const esi_t *esi) +{ + struct wq_evpn_wrapper *w; + char buf[ESI_STR_LEN]; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_REM_MACIP; + w->add_p = true; + w->vni = vni; + w->macaddr = *macaddr; + w->ip = *ipaddr; + w->flags = flags; + w->seq = seq; + w->vtep_ip = vtep_ip; + w->esi = *esi; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + if (memcmp(esi, zero_esi, sizeof(esi_t)) != 0) + esi_to_str(esi, buf, sizeof(buf)); + else + strlcpy(buf, "-", sizeof(buf)); + + zlog_debug("%s: mac %pEA, vtep %pI4, esi %s enqueued", __func__, + macaddr, &vtep_ip, buf); + } + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +int zebra_rib_queue_evpn_rem_macip_del(vni_t vni, const struct ethaddr *macaddr, + const struct ipaddr *ip, + struct in_addr vtep_ip) +{ + struct wq_evpn_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_REM_MACIP; + w->add_p = false; + w->vni = vni; + w->macaddr = *macaddr; + w->ip = *ip; + w->vtep_ip = vtep_ip; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: mac %pEA, vtep %pI4 enqueued", __func__, + macaddr, &vtep_ip); + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +/* + * Enqueue remote VTEP address for processing + */ +int zebra_rib_queue_evpn_rem_vtep_add(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip, int flood_control) +{ + struct wq_evpn_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_REM_VTEP; + w->add_p = true; + w->vrf_id = vrf_id; + w->vni = vni; + w->vtep_ip = vtep_ip; + w->flags = flood_control; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: vrf %u, vtep %pI4 enqueued", __func__, vrf_id, + &vtep_ip); + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +int zebra_rib_queue_evpn_rem_vtep_del(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip) +{ + struct wq_evpn_wrapper *w; + + w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); + + w->type = WQ_EVPN_WRAPPER_TYPE_REM_VTEP; + w->add_p = false; + w->vrf_id = vrf_id; + w->vni = vni; + w->vtep_ip = vtep_ip; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: vrf %u, vtep %pI4 enqueued", __func__, vrf_id, + &vtep_ip); + + return mq_add_handler(w, rib_meta_queue_evpn_add); +} + +/* Create new meta queue. + A destructor function doesn't seem to be necessary here. + */ +static struct meta_queue *meta_queue_new(void) +{ + struct meta_queue *new; + unsigned i; + + new = XCALLOC(MTYPE_WORK_QUEUE, sizeof(struct meta_queue)); + + for (i = 0; i < MQ_SIZE; i++) { + new->subq[i] = list_new(); + assert(new->subq[i]); + } + + return new; +} + +/* Clean up the EVPN meta-queue list */ +static void evpn_meta_queue_free(struct meta_queue *mq, struct list *l, + struct zebra_vrf *zvrf) +{ + struct listnode *node, *nnode; + struct wq_evpn_wrapper *w; + + /* Free the node wrapper object, and the struct it wraps */ + for (ALL_LIST_ELEMENTS(l, node, nnode, w)) { + if (zvrf) { + vrf_id_t vrf_id = zvrf->vrf->vrf_id; + + if (w->vrf_id != vrf_id) + continue; + } + + node->data = NULL; + + XFREE(MTYPE_WQ_WRAPPER, w); + + list_delete_node(l, node); + mq->size--; + } +} + +/* Clean up the nhg meta-queue list */ +static void nhg_meta_queue_free(struct meta_queue *mq, struct list *l, + struct zebra_vrf *zvrf) +{ + struct wq_nhg_wrapper *w; + struct listnode *node, *nnode; + + /* Free the node wrapper object, and the struct it wraps */ + for (ALL_LIST_ELEMENTS(l, node, nnode, w)) { + if (zvrf) { + vrf_id_t vrf_id = zvrf->vrf->vrf_id; + + if (w->type == WQ_NHG_WRAPPER_TYPE_CTX && + w->u.ctx->vrf_id != vrf_id) + continue; + else if (w->type == WQ_NHG_WRAPPER_TYPE_NHG && + w->u.nhe->vrf_id != vrf_id) + continue; + } + if (w->type == WQ_NHG_WRAPPER_TYPE_CTX) + nhg_ctx_free(&w->u.ctx); + else if (w->type == WQ_NHG_WRAPPER_TYPE_NHG) + zebra_nhg_free(w->u.nhe); + + node->data = NULL; + XFREE(MTYPE_WQ_WRAPPER, w); + + list_delete_node(l, node); + mq->size--; + } +} + +static void early_label_meta_queue_free(struct meta_queue *mq, struct list *l, + struct zebra_vrf *zvrf) +{ + struct wq_label_wrapper *w; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(l, node, nnode, w)) { + if (zvrf && zvrf->vrf->vrf_id != w->vrf_id) + continue; + + switch (w->type) { + case WQ_LABEL_FTN_UNINSTALL: + case WQ_LABEL_LABELS_PROCESS: + break; + } + + node->data = NULL; + XFREE(MTYPE_WQ_WRAPPER, w); + list_delete_node(l, node); + mq->size--; + } +} + +static void rib_meta_queue_free(struct meta_queue *mq, struct list *l, + struct zebra_vrf *zvrf) +{ + struct route_node *rnode; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(l, node, nnode, rnode)) { + rib_dest_t *dest = rib_dest_from_rnode(rnode); + + if (dest && rib_dest_vrf(dest) != zvrf) + continue; + + route_unlock_node(rnode); + node->data = NULL; + list_delete_node(l, node); + mq->size--; + } +} + +static void early_route_meta_queue_free(struct meta_queue *mq, struct list *l, + struct zebra_vrf *zvrf) +{ + struct zebra_early_route *zer; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(l, node, nnode, zer)) { + if (zvrf && zer->re->vrf_id != zvrf->vrf->vrf_id) + continue; + + XFREE(MTYPE_RE, zer); + node->data = NULL; + list_delete_node(l, node); + mq->size--; + } +} + +void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf) +{ + enum meta_queue_indexes i; + + for (i = 0; i < MQ_SIZE; i++) { + /* Some subqueues may need cleanup - nhgs for example */ + switch (i) { + case META_QUEUE_NHG: + nhg_meta_queue_free(mq, mq->subq[i], zvrf); + break; + case META_QUEUE_EVPN: + evpn_meta_queue_free(mq, mq->subq[i], zvrf); + break; + case META_QUEUE_EARLY_ROUTE: + early_route_meta_queue_free(mq, mq->subq[i], zvrf); + break; + case META_QUEUE_EARLY_LABEL: + early_label_meta_queue_free(mq, mq->subq[i], zvrf); + break; + case META_QUEUE_CONNECTED: + case META_QUEUE_KERNEL: + case META_QUEUE_STATIC: + case META_QUEUE_NOTBGP: + case META_QUEUE_BGP: + case META_QUEUE_OTHER: + rib_meta_queue_free(mq, mq->subq[i], zvrf); + break; + } + if (!zvrf) + list_delete(&mq->subq[i]); + } + + if (!zvrf) + XFREE(MTYPE_WORK_QUEUE, mq); +} + +/* initialise zebra rib work queue */ +static void rib_queue_init(void) +{ + if (!(zrouter.ribq = work_queue_new(zrouter.master, + "route_node processing"))) { + flog_err(EC_ZEBRA_WQ_NONEXISTENT, + "%s: could not initialise work queue!", __func__); + return; + } + + /* fill in the work queue spec */ + zrouter.ribq->spec.workfunc = &meta_queue_process; + zrouter.ribq->spec.errorfunc = NULL; + zrouter.ribq->spec.completion_func = NULL; + /* XXX: TODO: These should be runtime configurable via vty */ + zrouter.ribq->spec.max_retries = 3; + zrouter.ribq->spec.hold = ZEBRA_RIB_PROCESS_HOLD_TIME; + zrouter.ribq->spec.retry = ZEBRA_RIB_PROCESS_RETRY_TIME; + + if (!(zrouter.mq = meta_queue_new())) { + flog_err(EC_ZEBRA_WQ_NONEXISTENT, + "%s: could not initialise meta queue!", __func__); + return; + } + return; +} + +rib_dest_t *zebra_rib_create_dest(struct route_node *rn) +{ + rib_dest_t *dest; + + dest = XCALLOC(MTYPE_RIB_DEST, sizeof(rib_dest_t)); + rnh_list_init(&dest->nht); + re_list_init(&dest->routes); + route_lock_node(rn); /* rn route table reference */ + rn->info = dest; + dest->rnode = rn; + + return dest; +} + +/* RIB updates are processed via a queue of pointers to route_nodes. + * + * The queue length is bounded by the maximal size of the routing table, + * as a route_node will not be requeued, if already queued. + * + * REs are submitted via rib_addnode or rib_delnode which set minimal + * state, or static_install_route (when an existing RE is updated) + * and then submit route_node to queue for best-path selection later. + * Order of add/delete state changes are preserved for any given RE. + * + * Deleted REs are reaped during best-path selection. + * + * rib_addnode + * |-> rib_link or unset ROUTE_ENTRY_REMOVE |->Update kernel with + * |-------->| | best RE, if required + * | | + * static_install->|->rib_addqueue...... -> rib_process + * | | + * |-------->| |-> rib_unlink + * |-> set ROUTE_ENTRY_REMOVE | + * rib_delnode (RE freed) + * + * The 'info' pointer of a route_node points to a rib_dest_t + * ('dest'). Queueing state for a route_node is kept on the dest. The + * dest is created on-demand by rib_link() and is kept around at least + * as long as there are ribs hanging off it (@see rib_gc_dest()). + * + * Refcounting (aka "locking" throughout the Zebra and FRR code): + * + * - route_nodes: refcounted by: + * - dest attached to route_node: + * - managed by: rib_link/rib_gc_dest + * - route_node processing queue + * - managed by: rib_addqueue, rib_process. + * + */ + +/* Add RE to head of the route node. */ +static void rib_link(struct route_node *rn, struct route_entry *re, int process) +{ + rib_dest_t *dest; + afi_t afi; + const char *rmap_name; + + assert(re && rn); + + dest = rib_dest_from_rnode(rn); + if (!dest) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + rnode_debug(rn, re->vrf_id, "rn %p adding dest", rn); + + dest = zebra_rib_create_dest(rn); + } + + re_list_add_head(&dest->routes, re); + + afi = (rn->p.family == AF_INET) + ? AFI_IP + : (rn->p.family == AF_INET6) ? AFI_IP6 : AFI_MAX; + if (is_zebra_import_table_enabled(afi, re->vrf_id, re->table)) { + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(re->vrf_id); + + rmap_name = zebra_get_import_table_route_map(afi, re->table); + zebra_add_import_table_entry(zvrf, rn, re, rmap_name); + } + + if (process) + rib_queue_add(rn); +} + +static void rib_addnode(struct route_node *rn, + struct route_entry *re, int process) +{ + /* RE node has been un-removed before route-node is processed. + * route_node must hence already be on the queue for processing.. + */ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { + if (IS_ZEBRA_DEBUG_RIB) + rnode_debug(rn, re->vrf_id, "rn %p, un-removed re %p", + (void *)rn, (void *)re); + + UNSET_FLAG(re->status, ROUTE_ENTRY_REMOVED); + return; + } + rib_link(rn, re, process); +} + +/* + * rib_unlink + * + * Detach a rib structure from a route_node. + * + * Note that a call to rib_unlink() should be followed by a call to + * rib_gc_dest() at some point. This allows a rib_dest_t that is no + * longer required to be deleted. + */ +void rib_unlink(struct route_node *rn, struct route_entry *re) +{ + rib_dest_t *dest; + + assert(rn && re); + + if (IS_ZEBRA_DEBUG_RIB) + rnode_debug(rn, re->vrf_id, "rn %p, re %p", (void *)rn, + (void *)re); + + dest = rib_dest_from_rnode(rn); + + re_list_del(&dest->routes, re); + + if (dest->selected_fib == re) + dest->selected_fib = NULL; + + rib_re_nhg_free(re); + + zapi_re_opaque_free(re->opaque); + + XFREE(MTYPE_RE, re); +} + +void rib_delnode(struct route_node *rn, struct route_entry *re) +{ + afi_t afi; + + if (IS_ZEBRA_DEBUG_RIB) + rnode_debug(rn, re->vrf_id, "rn %p, re %p, removing", + (void *)rn, (void *)re); + SET_FLAG(re->status, ROUTE_ENTRY_REMOVED); + + afi = (rn->p.family == AF_INET) + ? AFI_IP + : (rn->p.family == AF_INET6) ? AFI_IP6 : AFI_MAX; + if (is_zebra_import_table_enabled(afi, re->vrf_id, re->table)) { + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(re->vrf_id); + + zebra_del_import_table_entry(zvrf, rn, re); + /* Just clean up if non main table */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s(%u):%pRN: Freeing route rn %p, re %p (%s)", + vrf_id_to_name(re->vrf_id), re->vrf_id, rn, + rn, re, zebra_route_string(re->type)); + } + + rib_queue_add(rn); +} + +/* + * Helper that debugs a single nexthop within a route-entry + */ +static void _route_entry_dump_nh(const struct route_entry *re, + const char *straddr, + const struct nexthop *nexthop) +{ + char nhname[PREFIX_STRLEN]; + char backup_str[50]; + char wgt_str[50]; + char temp_str[10]; + char label_str[MPLS_LABEL_STRLEN]; + int i; + struct interface *ifp; + struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id); + + switch (nexthop->type) { + case NEXTHOP_TYPE_BLACKHOLE: + snprintf(nhname, sizeof(nhname), "Blackhole"); + break; + case NEXTHOP_TYPE_IFINDEX: + ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); + snprintf(nhname, sizeof(nhname), "%s", + ifp ? ifp->name : "Unknown"); + break; + case NEXTHOP_TYPE_IPV4: + /* fallthrough */ + case NEXTHOP_TYPE_IPV4_IFINDEX: + inet_ntop(AF_INET, &nexthop->gate, nhname, INET6_ADDRSTRLEN); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + inet_ntop(AF_INET6, &nexthop->gate, nhname, INET6_ADDRSTRLEN); + break; + } + + /* Label stack */ + label_str[0] = '\0'; + if (nexthop->nh_label && nexthop->nh_label->num_labels > 0) { + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, label_str, + sizeof(label_str), 0 /*pretty*/); + strlcat(label_str, ", ", sizeof(label_str)); + } + + backup_str[0] = '\0'; + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + snprintf(backup_str, sizeof(backup_str), "backup "); + for (i = 0; i < nexthop->backup_num; i++) { + snprintf(temp_str, sizeof(temp_str), "%d, ", + nexthop->backup_idx[i]); + strlcat(backup_str, temp_str, sizeof(backup_str)); + } + } + + wgt_str[0] = '\0'; + if (nexthop->weight) + snprintf(wgt_str, sizeof(wgt_str), "wgt %d,", nexthop->weight); + + zlog_debug("%s: %s %s[%u] %svrf %s(%u) %s%s with flags %s%s%s%s%s%s%s%s%s", + straddr, (nexthop->rparent ? " NH" : "NH"), nhname, + nexthop->ifindex, label_str, vrf ? vrf->name : "Unknown", + nexthop->vrf_id, + wgt_str, backup_str, + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) + ? "ACTIVE " + : ""), + (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) + ? "FIB " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE) + ? "RECURSIVE " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK) + ? "ONLINK " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE) + ? "DUPLICATE " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED) + ? "FILTERED " : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP) + ? "BACKUP " : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE) + ? "SRTE " : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN) + ? "EVPN " : "")); + +} + +/* This function dumps the contents of a given RE entry into + * standard debug log. Calling function name and IP prefix in + * question are passed as 1st and 2nd arguments. + */ +void _route_entry_dump(const char *func, union prefixconstptr pp, + union prefixconstptr src_pp, + const struct route_entry *re) +{ + const struct prefix *src_p = src_pp.p; + bool is_srcdst = src_p && src_p->prefixlen; + char straddr[PREFIX_STRLEN]; + char srcaddr[PREFIX_STRLEN]; + char flags_buf[128]; + char status_buf[128]; + struct nexthop *nexthop; + struct vrf *vrf = vrf_lookup_by_id(re->vrf_id); + struct nexthop_group *nhg; + + prefix2str(pp, straddr, sizeof(straddr)); + + zlog_debug("%s: dumping RE entry %p for %s%s%s vrf %s(%u)", func, + (const void *)re, straddr, + is_srcdst ? " from " : "", + is_srcdst ? prefix2str(src_pp, srcaddr, sizeof(srcaddr)) + : "", + VRF_LOGNAME(vrf), re->vrf_id); + zlog_debug("%s: uptime == %lu, type == %u, instance == %d, table == %d", + straddr, (unsigned long)re->uptime, re->type, re->instance, + re->table); + zlog_debug( + "%s: metric == %u, mtu == %u, distance == %u, flags == %sstatus == %s", + straddr, re->metric, re->mtu, re->distance, + zclient_dump_route_flags(re->flags, flags_buf, + sizeof(flags_buf)), + _dump_re_status(re, status_buf, sizeof(status_buf))); + zlog_debug("%s: nexthop_num == %u, nexthop_active_num == %u", straddr, + nexthop_group_nexthop_num(&(re->nhe->nhg)), + nexthop_group_active_nexthop_num(&(re->nhe->nhg))); + + /* Dump nexthops */ + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) + _route_entry_dump_nh(re, straddr, nexthop); + + if (zebra_nhg_get_backup_nhg(re->nhe)) { + zlog_debug("%s: backup nexthops:", straddr); + + nhg = zebra_nhg_get_backup_nhg(re->nhe); + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) + _route_entry_dump_nh(re, straddr, nexthop); + } + + zlog_debug("%s: dump complete", straddr); +} + +static int rib_meta_queue_early_route_add(struct meta_queue *mq, void *data) +{ + struct zebra_early_route *ere = data; + + listnode_add(mq->subq[META_QUEUE_EARLY_ROUTE], data); + mq->size++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "Route %pFX(%u) queued for processing into sub-queue %s", + &ere->p, ere->re->vrf_id, + subqueue2str(META_QUEUE_EARLY_ROUTE)); + + return 0; +} + +struct route_entry *zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, + uint8_t instance, uint32_t flags, + uint32_t nhe_id, + uint32_t table_id, + uint32_t metric, uint32_t mtu, + uint8_t distance, route_tag_t tag) +{ + struct route_entry *re; + + re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); + re->type = type; + re->instance = instance; + re->distance = distance; + re->flags = flags; + re->metric = metric; + re->mtu = mtu; + re->table = table_id; + re->vrf_id = vrf_id; + re->uptime = monotime(NULL); + re->tag = tag; + re->nhe_id = nhe_id; + + return re; +} +/* + * Internal route-add implementation; there are a couple of different public + * signatures. Callers in this path are responsible for the memory they + * allocate: if they allocate a nexthop_group or backup nexthop info, they + * must free those objects. If this returns < 0, an error has occurred and the + * route_entry 're' has not been captured; the caller should free that also. + * + * -1 -> error + * 0 -> Add + * 1 -> update + */ +int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, struct route_entry *re, + struct nhg_hash_entry *re_nhe, bool startup) +{ + struct zebra_early_route *ere; + + if (!re) + return -1; + + assert(!src_p || !src_p->prefixlen || afi == AFI_IP6); + + ere = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(*ere)); + ere->afi = afi; + ere->safi = safi; + ere->p = *p; + if (src_p) + ere->src_p = *src_p; + ere->src_p_provided = !!src_p; + ere->re = re; + ere->re_nhe = re_nhe; + ere->startup = startup; + + return mq_add_handler(ere, rib_meta_queue_early_route_add); +} + +/* + * Add a single route. + */ +int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, struct route_entry *re, + struct nexthop_group *ng, bool startup) +{ + int ret; + struct nhg_hash_entry nhe, *n; + + if (!re) + return -1; + + /* We either need nexthop(s) or an existing nexthop id */ + if (ng == NULL && re->nhe_id == 0) + return -1; + + /* + * Use a temporary nhe to convey info to the common/main api. + */ + zebra_nhe_init(&nhe, afi, (ng ? ng->nexthop : NULL)); + if (ng) + nhe.nhg.nexthop = ng->nexthop; + else if (re->nhe_id > 0) + nhe.id = re->nhe_id; + + n = zebra_nhe_copy(&nhe, 0); + ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, n, startup); + + /* In error cases, free the route also */ + if (ret < 0) + XFREE(MTYPE_RE, re); + + return ret; +} + +void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, + unsigned short instance, uint32_t flags, struct prefix *p, + struct prefix_ipv6 *src_p, const struct nexthop *nh, + uint32_t nhe_id, uint32_t table_id, uint32_t metric, + uint8_t distance, bool fromkernel) +{ + struct zebra_early_route *ere; + struct route_entry *re = NULL; + struct nhg_hash_entry *nhe = NULL; + + re = zebra_rib_route_entry_new(vrf_id, type, instance, flags, nhe_id, + table_id, metric, 0, distance, 0); + + if (nh) { + nhe = zebra_nhg_alloc(); + nhe->nhg.nexthop = nexthop_dup(nh, NULL); + } + + ere = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(*ere)); + ere->afi = afi; + ere->safi = safi; + ere->p = *p; + if (src_p) + ere->src_p = *src_p; + ere->src_p_provided = !!src_p; + ere->re = re; + ere->re_nhe = nhe; + ere->startup = false; + ere->deletion = true; + ere->fromkernel = fromkernel; + + mq_add_handler(ere, rib_meta_queue_early_route_add); +} + + +int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, + unsigned short instance, uint32_t flags, struct prefix *p, + struct prefix_ipv6 *src_p, const struct nexthop *nh, + uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu, + uint8_t distance, route_tag_t tag, bool startup) +{ + struct route_entry *re = NULL; + struct nexthop nexthop = {}; + struct nexthop_group ng = {}; + + /* Allocate new route_entry structure. */ + re = zebra_rib_route_entry_new(vrf_id, type, instance, flags, nhe_id, + table_id, metric, mtu, distance, tag); + + /* If the owner of the route supplies a shared nexthop-group id, + * we'll use that. Otherwise, pass the nexthop along directly. + */ + if (!nhe_id) { + /* Add nexthop. */ + nexthop = *nh; + nexthop_group_add_sorted(&ng, &nexthop); + } + + return rib_add_multipath(afi, safi, p, src_p, re, &ng, startup); +} + +static const char *rib_update_event2str(enum rib_update_event event) +{ + const char *ret = "UNKNOWN"; + + switch (event) { + case RIB_UPDATE_KERNEL: + ret = "RIB_UPDATE_KERNEL"; + break; + case RIB_UPDATE_RMAP_CHANGE: + ret = "RIB_UPDATE_RMAP_CHANGE"; + break; + case RIB_UPDATE_OTHER: + ret = "RIB_UPDATE_OTHER"; + break; + case RIB_UPDATE_MAX: + break; + } + + return ret; +} + + +/* Schedule route nodes to be processed if they match the type */ +static void rib_update_route_node(struct route_node *rn, int type) +{ + struct route_entry *re, *next; + bool re_changed = false; + + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (type == ZEBRA_ROUTE_ALL || type == re->type) { + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); + re_changed = true; + } + } + + if (re_changed) + rib_queue_add(rn); +} + +/* Schedule routes of a particular table (address-family) based on event. */ +void rib_update_table(struct route_table *table, enum rib_update_event event, + int rtype) +{ + struct route_node *rn; + + if (IS_ZEBRA_DEBUG_EVENT) { + struct zebra_vrf *zvrf; + struct vrf *vrf; + + zvrf = table->info + ? ((struct rib_table_info *)table->info)->zvrf + : NULL; + vrf = zvrf ? zvrf->vrf : NULL; + + zlog_debug("%s: %s VRF %s Table %u event %s Route type: %s", __func__, + table->info ? afi2str( + ((struct rib_table_info *)table->info)->afi) + : "Unknown", + VRF_LOGNAME(vrf), zvrf ? zvrf->table_id : 0, + rib_update_event2str(event), zebra_route_string(rtype)); + } + + /* Walk all routes and queue for processing, if appropriate for + * the trigger event. + */ + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { + /* + * If we are looking at a route node and the node + * has already been queued we don't + * need to queue it up again + */ + if (rn->info + && CHECK_FLAG(rib_dest_from_rnode(rn)->flags, + RIB_ROUTE_ANY_QUEUED)) + continue; + + switch (event) { + case RIB_UPDATE_KERNEL: + rib_update_route_node(rn, ZEBRA_ROUTE_KERNEL); + break; + case RIB_UPDATE_RMAP_CHANGE: + case RIB_UPDATE_OTHER: + rib_update_route_node(rn, rtype); + break; + default: + break; + } + } +} + +static void rib_update_handle_vrf(vrf_id_t vrf_id, enum rib_update_event event, + int rtype) +{ + struct route_table *table; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Handling VRF %s event %s", __func__, + vrf_id_to_name(vrf_id), rib_update_event2str(event)); + + /* Process routes of interested address-families. */ + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id); + if (table) + rib_update_table(table, event, rtype); + + table = zebra_vrf_table(AFI_IP6, SAFI_UNICAST, vrf_id); + if (table) + rib_update_table(table, event, rtype); +} + +static void rib_update_handle_vrf_all(enum rib_update_event event, int rtype) +{ + struct zebra_router_table *zrt; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Handling VRF (ALL) event %s", __func__, + rib_update_event2str(event)); + + /* Just iterate over all the route tables, rather than vrf lookups */ + RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) + rib_update_table(zrt->table, event, rtype); +} + +struct rib_update_ctx { + enum rib_update_event event; + bool vrf_all; + vrf_id_t vrf_id; +}; + +static struct rib_update_ctx *rib_update_ctx_init(vrf_id_t vrf_id, + enum rib_update_event event) +{ + struct rib_update_ctx *ctx; + + ctx = XCALLOC(MTYPE_RIB_UPDATE_CTX, sizeof(struct rib_update_ctx)); + + ctx->event = event; + ctx->vrf_id = vrf_id; + + return ctx; +} + +static void rib_update_ctx_fini(struct rib_update_ctx **ctx) +{ + XFREE(MTYPE_RIB_UPDATE_CTX, *ctx); +} + +static void rib_update_handler(struct thread *thread) +{ + struct rib_update_ctx *ctx; + + ctx = THREAD_ARG(thread); + + if (ctx->vrf_all) + rib_update_handle_vrf_all(ctx->event, ZEBRA_ROUTE_ALL); + else + rib_update_handle_vrf(ctx->vrf_id, ctx->event, ZEBRA_ROUTE_ALL); + + rib_update_ctx_fini(&ctx); +} + +/* + * Thread list to ensure we don't schedule a ton of events + * if interfaces are flapping for instance. + */ +static struct thread *t_rib_update_threads[RIB_UPDATE_MAX]; + +void rib_update_finish(void) +{ + int i; + + for (i = RIB_UPDATE_KERNEL; i < RIB_UPDATE_MAX; i++) { + if (thread_is_scheduled(t_rib_update_threads[i])) { + struct rib_update_ctx *ctx; + + ctx = THREAD_ARG(t_rib_update_threads[i]); + + rib_update_ctx_fini(&ctx); + THREAD_OFF(t_rib_update_threads[i]); + } + } +} + +/* Schedule a RIB update event for all vrfs */ +void rib_update(enum rib_update_event event) +{ + struct rib_update_ctx *ctx; + + if (thread_is_scheduled(t_rib_update_threads[event])) + return; + + if (zebra_router_in_shutdown()) + return; + + ctx = rib_update_ctx_init(0, event); + ctx->vrf_all = true; + + thread_add_event(zrouter.master, rib_update_handler, ctx, 0, + &t_rib_update_threads[event]); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Scheduled VRF (ALL), event %s", __func__, + rib_update_event2str(event)); +} + +/* Delete self installed routes after zebra is relaunched. */ +void rib_sweep_table(struct route_table *table) +{ + struct route_node *rn; + struct route_entry *re; + struct route_entry *next; + struct nexthop *nexthop; + + if (!table) + return; + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: starting", __func__); + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { + RNODE_FOREACH_RE_SAFE (rn, re, next) { + + if (IS_ZEBRA_DEBUG_RIB) + route_entry_dump(&rn->p, NULL, re); + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + + if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_SELFROUTE)) + continue; + + /* + * If routes are older than startup_time then + * we know we read them in from the kernel. + * As such we can safely remove them. + */ + if (zrouter.startup_time < re->uptime) + continue; + + /* + * So we are starting up and have received + * routes from the kernel that we have installed + * from a previous run of zebra but not cleaned + * up ( say a kill -9 ) + * But since we haven't actually installed + * them yet( we received them from the kernel ) + * we don't think they are active. + * So let's pretend they are active to actually + * remove them. + * In all honesty I'm not sure if we should + * mark them as active when we receive them + * This is startup only so probably ok. + * + * If we ever decide to move rib_sweep_table + * to a different spot (ie startup ) + * this decision needs to be revisited + */ + SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + rib_uninstall_kernel(rn, re); + rib_delnode(rn, re); + } + } + + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: ends", __func__); +} + +/* Sweep all RIB tables. */ +void rib_sweep_route(struct thread *t) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + if ((zvrf = vrf->info) == NULL) + continue; + + rib_sweep_table(zvrf->table[AFI_IP][SAFI_UNICAST]); + rib_sweep_table(zvrf->table[AFI_IP6][SAFI_UNICAST]); + } + + zebra_router_sweep_route(); + zebra_router_sweep_nhgs(); +} + +/* Remove specific by protocol routes from 'table'. */ +unsigned long rib_score_proto_table(uint8_t proto, unsigned short instance, + struct route_table *table) +{ + struct route_node *rn; + struct route_entry *re; + struct route_entry *next; + unsigned long n = 0; + + if (table) + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + if (re->type == proto + && re->instance == instance) { + rib_delnode(rn, re); + n++; + } + } + return n; +} + +/* Remove specific by protocol routes. */ +unsigned long rib_score_proto(uint8_t proto, unsigned short instance) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + struct other_route_table *ort; + unsigned long cnt = 0; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + zvrf = vrf->info; + if (!zvrf) + continue; + + cnt += rib_score_proto_table(proto, instance, + zvrf->table[AFI_IP][SAFI_UNICAST]) + + rib_score_proto_table( + proto, instance, + zvrf->table[AFI_IP6][SAFI_UNICAST]); + + frr_each(otable, &zvrf->other_tables, ort) cnt += + rib_score_proto_table(proto, instance, ort->table); + } + + return cnt; +} + +/* Close RIB and clean up kernel routes. */ +void rib_close_table(struct route_table *table) +{ + struct route_node *rn; + rib_dest_t *dest; + + if (!table) + return; + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + + if (dest && dest->selected_fib) { + rib_uninstall_kernel(rn, dest->selected_fib); + dest->selected_fib = NULL; + } + } +} + +/* + * Handler for async dataplane results after a pseudowire installation + */ +static void handle_pw_result(struct zebra_dplane_ctx *ctx) +{ + struct zebra_pw *pw; + struct zebra_vrf *vrf; + + /* The pseudowire code assumes success - we act on an error + * result for installation attempts here. + */ + if (dplane_ctx_get_op(ctx) != DPLANE_OP_PW_INSTALL) + return; + + if (dplane_ctx_get_status(ctx) != ZEBRA_DPLANE_REQUEST_SUCCESS) { + vrf = zebra_vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); + pw = zebra_pw_find(vrf, dplane_ctx_get_ifname(ctx)); + if (pw) + zebra_pw_install_failure(pw, + dplane_ctx_get_pw_status(ctx)); + } +} + +/* + * Handle results from the dataplane system. Dequeue update context + * structs, dispatch to appropriate internal handlers. + */ +static void rib_process_dplane_results(struct thread *thread) +{ + struct zebra_dplane_ctx *ctx; + struct dplane_ctx_q ctxlist; + bool shut_p = false; + +#ifdef HAVE_SCRIPTING + char *script_name = + frrscript_names_get_script_name(ZEBRA_ON_RIB_PROCESS_HOOK_CALL); + + int ret = 1; + struct frrscript *fs = NULL; + + if (script_name) { + fs = frrscript_new(script_name); + if (fs) + ret = frrscript_load(fs, ZEBRA_ON_RIB_PROCESS_HOOK_CALL, + NULL); + } +#endif /* HAVE_SCRIPTING */ + + /* Dequeue a list of completed updates with one lock/unlock cycle */ + + do { + TAILQ_INIT(&ctxlist); + + /* Take lock controlling queue of results */ + frr_with_mutex (&dplane_mutex) { + /* Dequeue list of context structs */ + dplane_ctx_list_append(&ctxlist, &rib_dplane_q); + } + + /* Dequeue context block */ + ctx = dplane_ctx_dequeue(&ctxlist); + + /* If we've emptied the results queue, we're done */ + if (ctx == NULL) + break; + + /* If zebra is shutting down, avoid processing results, + * just drain the results queue. + */ + shut_p = atomic_load_explicit(&zrouter.in_shutdown, + memory_order_relaxed); + if (shut_p) { + while (ctx) { + dplane_ctx_fini(&ctx); + + ctx = dplane_ctx_dequeue(&ctxlist); + } + + continue; + } + + while (ctx) { +#ifdef HAVE_SCRIPTING + if (ret == 0) + frrscript_call(fs, + ZEBRA_ON_RIB_PROCESS_HOOK_CALL, + ("ctx", ctx)); +#endif /* HAVE_SCRIPTING */ + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + /* Bit of special case for route updates + * that were generated by async notifications: + * we don't want to continue processing these + * in the rib. + */ + if (dplane_ctx_get_notif_provider(ctx) == 0) + rib_process_result(ctx); + break; + + case DPLANE_OP_ROUTE_NOTIFY: + rib_process_dplane_notify(ctx); + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + zebra_nhg_dplane_result(ctx); + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + /* Bit of special case for LSP updates + * that were generated by async notifications: + * we don't want to continue processing these. + */ + if (dplane_ctx_get_notif_provider(ctx) == 0) + zebra_mpls_lsp_dplane_result(ctx); + break; + + case DPLANE_OP_LSP_NOTIFY: + zebra_mpls_process_dplane_notify(ctx); + break; + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + handle_pw_result(ctx); + break; + + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + zebra_vxlan_handle_result(ctx); + break; + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + zebra_pbr_dplane_result(ctx); + break; + + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_INTF_NETCONFIG: + zebra_if_dplane_result(ctx); + break; + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; + + /* Some op codes not handled here */ + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_NONE: + break; + + } /* Dispatch by op code */ + + dplane_ctx_fini(&ctx); + ctx = dplane_ctx_dequeue(&ctxlist); + } + + } while (1); + +#ifdef HAVE_SCRIPTING + if (fs) + frrscript_delete(fs); +#endif +} + +/* + * Results are returned from the dataplane subsystem, in the context of + * the dataplane pthread. We enqueue the results here for processing by + * the main thread later. + */ +static int rib_dplane_results(struct dplane_ctx_q *ctxlist) +{ + /* Take lock controlling queue of results */ + frr_with_mutex (&dplane_mutex) { + /* Enqueue context blocks */ + dplane_ctx_list_append(&rib_dplane_q, ctxlist); + } + + /* Ensure event is signalled to zebra main pthread */ + thread_add_event(zrouter.master, rib_process_dplane_results, NULL, 0, + &t_dplane); + + return 0; +} + +/* + * Ensure there are no empty slots in the route_info array. + * Every route type in zebra should be present there. + */ +static void check_route_info(void) +{ + int len = array_size(route_info); + + /* + * ZEBRA_ROUTE_SYSTEM is special cased since + * its key is 0 anyway. + * + * ZEBRA_ROUTE_ALL is also ignored. + */ + for (int i = 0; i < len; i++) { + assert(route_info[i].key >= ZEBRA_ROUTE_SYSTEM && + route_info[i].key < ZEBRA_ROUTE_MAX); + assert(route_info[i].meta_q_map < MQ_SIZE); + } +} + +/* Routing information base initialize. */ +void rib_init(void) +{ + check_route_info(); + + rib_queue_init(); + + /* Init dataplane, and register for results */ + pthread_mutex_init(&dplane_mutex, NULL); + TAILQ_INIT(&rib_dplane_q); + zebra_dplane_init(rib_dplane_results); +} + +/* + * vrf_id_get_next + * + * Get the first vrf id that is greater than the given vrf id if any. + * + * Returns true if a vrf id was found, false otherwise. + */ +static inline int vrf_id_get_next(vrf_id_t vrf_id, vrf_id_t *next_id_p) +{ + struct vrf *vrf; + + vrf = vrf_lookup_by_id(vrf_id); + if (vrf) { + vrf = RB_NEXT(vrf_id_head, vrf); + if (vrf) { + *next_id_p = vrf->vrf_id; + return 1; + } + } + + return 0; +} + +/* + * rib_tables_iter_next + * + * Returns the next table in the iteration. + */ +struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter) +{ + struct route_table *table; + + /* + * Array that helps us go over all AFI/SAFI combinations via one + * index. + */ + static const struct { + afi_t afi; + safi_t safi; + } afi_safis[] = { + {AFI_IP, SAFI_UNICAST}, {AFI_IP, SAFI_MULTICAST}, + {AFI_IP, SAFI_LABELED_UNICAST}, {AFI_IP6, SAFI_UNICAST}, + {AFI_IP6, SAFI_MULTICAST}, {AFI_IP6, SAFI_LABELED_UNICAST}, + }; + + table = NULL; + + switch (iter->state) { + + case RIB_TABLES_ITER_S_INIT: + iter->vrf_id = VRF_DEFAULT; + iter->afi_safi_ix = -1; + + /* Fall through */ + + case RIB_TABLES_ITER_S_ITERATING: + iter->afi_safi_ix++; + while (1) { + + while (iter->afi_safi_ix + < (int)array_size(afi_safis)) { + table = zebra_vrf_table( + afi_safis[iter->afi_safi_ix].afi, + afi_safis[iter->afi_safi_ix].safi, + iter->vrf_id); + if (table) + break; + + iter->afi_safi_ix++; + } + + /* + * Found another table in this vrf. + */ + if (table) + break; + + /* + * Done with all tables in the current vrf, go to the + * next + * one. + */ + if (!vrf_id_get_next(iter->vrf_id, &iter->vrf_id)) + break; + + iter->afi_safi_ix = 0; + } + + break; + + case RIB_TABLES_ITER_S_DONE: + return NULL; + } + + if (table) + iter->state = RIB_TABLES_ITER_S_ITERATING; + else + iter->state = RIB_TABLES_ITER_S_DONE; + + return table; +} diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c new file mode 100644 index 0000000..7934a9d --- /dev/null +++ b/zebra/zebra_rnh.c @@ -0,0 +1,1414 @@ +/* Zebra next hop tracking code + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "prefix.h" +#include "table.h" +#include "memory.h" +#include "command.h" +#include "if.h" +#include "log.h" +#include "sockunion.h" +#include "linklist.h" +#include "thread.h" +#include "workqueue.h" +#include "prefix.h" +#include "routemap.h" +#include "stream.h" +#include "nexthop.h" +#include "vrf.h" + +#include "zebra/zebra_router.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_routemap.h" +#include "zebra/zebra_srte.h" +#include "zebra/interface.h" +#include "zebra/zebra_errors.h" + +DEFINE_MTYPE_STATIC(ZEBRA, RNH, "Nexthop tracking object"); + +/* UI controls whether to notify about changes that only involve backup + * nexthops. Default is to notify all changes. + */ +static bool rnh_hide_backups; + +static void free_state(vrf_id_t vrf_id, struct route_entry *re, + struct route_node *rn); +static void copy_state(struct rnh *rnh, const struct route_entry *re, + struct route_node *rn); +static bool compare_state(struct route_entry *r1, struct route_entry *r2); +static void print_rnh(struct route_node *rn, struct vty *vty); +static int zebra_client_cleanup_rnh(struct zserv *client); + +void zebra_rnh_init(void) +{ + hook_register(zserv_client_close, zebra_client_cleanup_rnh); +} + +static inline struct route_table *get_rnh_table(vrf_id_t vrfid, afi_t afi, + safi_t safi) +{ + struct zebra_vrf *zvrf; + struct route_table *t = NULL; + + zvrf = zebra_vrf_lookup_by_id(vrfid); + if (zvrf) { + if (safi == SAFI_UNICAST) + t = zvrf->rnh_table[afi]; + else if (safi == SAFI_MULTICAST) + t = zvrf->rnh_table_multicast[afi]; + } + + return t; +} + +static void zebra_rnh_remove_from_routing_table(struct rnh *rnh) +{ + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(rnh->vrf_id); + struct route_table *table = zvrf->table[rnh->afi][rnh->safi]; + struct route_node *rn; + rib_dest_t *dest; + + if (!table) + return; + + rn = route_node_match(table, &rnh->resolved_route); + if (!rn) + return; + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: %s(%u):%pRN removed from tracking on %pRN", + __func__, VRF_LOGNAME(zvrf->vrf), rnh->vrf_id, + rnh->node, rn); + + dest = rib_dest_from_rnode(rn); + rnh_list_del(&dest->nht, rnh); + route_unlock_node(rn); +} + +static void zebra_rnh_store_in_routing_table(struct rnh *rnh) +{ + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(rnh->vrf_id); + struct route_table *table = zvrf->table[rnh->afi][rnh->safi]; + struct route_node *rn; + rib_dest_t *dest; + + rn = route_node_match(table, &rnh->resolved_route); + if (!rn) + return; + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: %s(%u):%pRN added for tracking on %pRN", + __func__, VRF_LOGNAME(zvrf->vrf), rnh->vrf_id, + rnh->node, rn); + + dest = rib_dest_from_rnode(rn); + rnh_list_add_tail(&dest->nht, rnh); + route_unlock_node(rn); +} + +struct rnh *zebra_add_rnh(struct prefix *p, vrf_id_t vrfid, safi_t safi, + bool *exists) +{ + struct route_table *table; + struct route_node *rn; + struct rnh *rnh = NULL; + afi_t afi = family2afi(p->family); + + if (IS_ZEBRA_DEBUG_NHT) { + struct vrf *vrf = vrf_lookup_by_id(vrfid); + + zlog_debug("%s(%u): Add RNH %pFX for safi: %u", + VRF_LOGNAME(vrf), vrfid, p, safi); + } + + table = get_rnh_table(vrfid, afi, safi); + if (!table) { + struct vrf *vrf = vrf_lookup_by_id(vrfid); + + flog_warn(EC_ZEBRA_RNH_NO_TABLE, + "%s(%u): Add RNH %pFX - table not found", + VRF_LOGNAME(vrf), vrfid, p); + *exists = false; + return NULL; + } + + /* Make it sure prefixlen is applied to the prefix. */ + apply_mask(p); + + /* Lookup (or add) route node.*/ + rn = route_node_get(table, p); + + if (!rn->info) { + rnh = XCALLOC(MTYPE_RNH, sizeof(struct rnh)); + + /* + * The resolved route is already 0.0.0.0/0 or + * 0::0/0 due to the calloc right above, but + * we should set the family so that future + * comparisons can just be done + */ + rnh->resolved_route.family = p->family; + rnh->client_list = list_new(); + rnh->vrf_id = vrfid; + rnh->seqno = 0; + rnh->afi = afi; + rnh->safi = safi; + rnh->zebra_pseudowire_list = list_new(); + route_lock_node(rn); + rn->info = rnh; + rnh->node = rn; + *exists = false; + + zebra_rnh_store_in_routing_table(rnh); + } else + *exists = true; + + route_unlock_node(rn); + return (rn->info); +} + +struct rnh *zebra_lookup_rnh(struct prefix *p, vrf_id_t vrfid, safi_t safi) +{ + struct route_table *table; + struct route_node *rn; + + table = get_rnh_table(vrfid, family2afi(PREFIX_FAMILY(p)), safi); + if (!table) + return NULL; + + /* Make it sure prefixlen is applied to the prefix. */ + apply_mask(p); + + /* Lookup route node.*/ + rn = route_node_lookup(table, p); + if (!rn) + return NULL; + + route_unlock_node(rn); + return (rn->info); +} + +void zebra_free_rnh(struct rnh *rnh) +{ + struct zebra_vrf *zvrf; + struct route_table *table; + + zebra_rnh_remove_from_routing_table(rnh); + rnh->flags |= ZEBRA_NHT_DELETED; + list_delete(&rnh->client_list); + list_delete(&rnh->zebra_pseudowire_list); + + zvrf = zebra_vrf_lookup_by_id(rnh->vrf_id); + table = zvrf->table[family2afi(rnh->resolved_route.family)][rnh->safi]; + + if (table) { + struct route_node *rern; + + rern = route_node_match(table, &rnh->resolved_route); + if (rern) { + rib_dest_t *dest; + + route_unlock_node(rern); + + dest = rib_dest_from_rnode(rern); + rnh_list_del(&dest->nht, rnh); + } + } + free_state(rnh->vrf_id, rnh->state, rnh->node); + XFREE(MTYPE_RNH, rnh); +} + +static void zebra_delete_rnh(struct rnh *rnh) +{ + struct route_node *rn; + + if (!list_isempty(rnh->client_list) + || !list_isempty(rnh->zebra_pseudowire_list)) + return; + + if ((rnh->flags & ZEBRA_NHT_DELETED) || !(rn = rnh->node)) + return; + + if (IS_ZEBRA_DEBUG_NHT) { + struct vrf *vrf = vrf_lookup_by_id(rnh->vrf_id); + + zlog_debug("%s(%u): Del RNH %pRN", VRF_LOGNAME(vrf), + rnh->vrf_id, rnh->node); + } + + zebra_free_rnh(rnh); + rn->info = NULL; + route_unlock_node(rn); +} + +/* + * This code will send to the registering client + * the looked up rnh. + * For a rnh that was created, there is no data + * so it will send an empty nexthop group + * If rnh exists then we know it has been evaluated + * and as such it will have a resolved rnh. + */ +void zebra_add_rnh_client(struct rnh *rnh, struct zserv *client, + vrf_id_t vrf_id) +{ + if (IS_ZEBRA_DEBUG_NHT) { + struct vrf *vrf = vrf_lookup_by_id(vrf_id); + + zlog_debug("%s(%u): Client %s registers for RNH %pRN", + VRF_LOGNAME(vrf), vrf_id, + zebra_route_string(client->proto), rnh->node); + } + if (!listnode_lookup(rnh->client_list, client)) + listnode_add(rnh->client_list, client); + + /* + * We always need to respond with known information, + * currently multiple daemons expect this behavior + */ + zebra_send_rnh_update(rnh, client, vrf_id, 0); +} + +void zebra_remove_rnh_client(struct rnh *rnh, struct zserv *client) +{ + if (IS_ZEBRA_DEBUG_NHT) { + struct vrf *vrf = vrf_lookup_by_id(rnh->vrf_id); + + zlog_debug("Client %s unregisters for RNH %s(%u)%pRN", + zebra_route_string(client->proto), VRF_LOGNAME(vrf), + vrf->vrf_id, rnh->node); + } + listnode_delete(rnh->client_list, client); + zebra_delete_rnh(rnh); +} + +/* XXX move this utility function elsewhere? */ +static void addr2hostprefix(int af, const union g_addr *addr, + struct prefix *prefix) +{ + switch (af) { + case AF_INET: + prefix->family = AF_INET; + prefix->prefixlen = IPV4_MAX_BITLEN; + prefix->u.prefix4 = addr->ipv4; + break; + case AF_INET6: + prefix->family = AF_INET6; + prefix->prefixlen = IPV6_MAX_BITLEN; + prefix->u.prefix6 = addr->ipv6; + break; + default: + memset(prefix, 0, sizeof(*prefix)); + zlog_warn("%s: unknown address family %d", __func__, af); + break; + } +} + +void zebra_register_rnh_pseudowire(vrf_id_t vrf_id, struct zebra_pw *pw, + bool *nht_exists) +{ + struct prefix nh; + struct rnh *rnh; + bool exists; + struct zebra_vrf *zvrf; + + *nht_exists = false; + + zvrf = vrf_info_lookup(vrf_id); + if (!zvrf) + return; + + addr2hostprefix(pw->af, &pw->nexthop, &nh); + rnh = zebra_add_rnh(&nh, vrf_id, SAFI_UNICAST, &exists); + if (!rnh) + return; + + if (!listnode_lookup(rnh->zebra_pseudowire_list, pw)) { + listnode_add(rnh->zebra_pseudowire_list, pw); + pw->rnh = rnh; + zebra_evaluate_rnh(zvrf, family2afi(pw->af), 1, &nh, + SAFI_UNICAST); + } else + *nht_exists = true; +} + +void zebra_deregister_rnh_pseudowire(vrf_id_t vrf_id, struct zebra_pw *pw) +{ + struct rnh *rnh; + + rnh = pw->rnh; + if (!rnh) + return; + + listnode_delete(rnh->zebra_pseudowire_list, pw); + pw->rnh = NULL; + + zebra_delete_rnh(rnh); +} + +/* Clear the NEXTHOP_FLAG_RNH_FILTERED flags on all nexthops + */ +static void zebra_rnh_clear_nexthop_rnh_filters(struct route_entry *re) +{ + struct nexthop *nexthop; + + if (re) { + for (nexthop = re->nhe->nhg.nexthop; nexthop; + nexthop = nexthop->next) { + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED); + } + } +} + +/* Apply the NHT route-map for a client to the route (and nexthops) + * resolving a NH. + */ +static int zebra_rnh_apply_nht_rmap(afi_t afi, struct zebra_vrf *zvrf, + struct route_node *prn, + struct route_entry *re, int proto) +{ + int at_least_one = 0; + struct nexthop *nexthop; + route_map_result_t ret; + + if (prn && re) { + for (nexthop = re->nhe->nhg.nexthop; nexthop; + nexthop = nexthop->next) { + ret = zebra_nht_route_map_check( + afi, proto, &prn->p, zvrf, re, nexthop); + if (ret != RMAP_DENYMATCH) + at_least_one++; /* at least one valid NH */ + else { + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RNH_FILTERED); + } + } + } + return (at_least_one); +} + +/* + * Notify clients registered for this nexthop about a change. + */ +static void zebra_rnh_notify_protocol_clients(struct zebra_vrf *zvrf, afi_t afi, + struct route_node *nrn, + struct rnh *rnh, + struct route_node *prn, + struct route_entry *re) +{ + struct listnode *node; + struct zserv *client; + int num_resolving_nh; + + if (IS_ZEBRA_DEBUG_NHT) { + if (prn && re) { + zlog_debug("%s(%u):%pRN: NH resolved over route %pRN", + VRF_LOGNAME(zvrf->vrf), zvrf->vrf->vrf_id, + nrn, prn); + } else + zlog_debug("%s(%u):%pRN: NH has become unresolved", + VRF_LOGNAME(zvrf->vrf), zvrf->vrf->vrf_id, + nrn); + } + + for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client)) { + if (prn && re) { + /* Apply route-map for this client to route resolving + * this + * nexthop to see if it is filtered or not. + */ + zebra_rnh_clear_nexthop_rnh_filters(re); + num_resolving_nh = zebra_rnh_apply_nht_rmap( + afi, zvrf, prn, re, client->proto); + if (num_resolving_nh) + rnh->filtered[client->proto] = 0; + else + rnh->filtered[client->proto] = 1; + + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug( + "%s(%u):%pRN: Notifying client %s about NH %s", + VRF_LOGNAME(zvrf->vrf), + zvrf->vrf->vrf_id, nrn, + zebra_route_string(client->proto), + num_resolving_nh + ? "" + : "(filtered by route-map)"); + } else { + rnh->filtered[client->proto] = 0; + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug( + "%s(%u):%pRN: Notifying client %s about NH (unreachable)", + VRF_LOGNAME(zvrf->vrf), + zvrf->vrf->vrf_id, nrn, + zebra_route_string(client->proto)); + } + + zebra_send_rnh_update(rnh, client, zvrf->vrf->vrf_id, 0); + } + + if (re) + zebra_rnh_clear_nexthop_rnh_filters(re); +} + +/* + * Utility to determine whether a candidate nexthop is useable. We make this + * check in a couple of places, so this is a single home for the logic we + * use. + */ + +static const int RNH_INVALID_NH_FLAGS = (NEXTHOP_FLAG_RECURSIVE | + NEXTHOP_FLAG_DUPLICATE | + NEXTHOP_FLAG_RNH_FILTERED); + +bool rnh_nexthop_valid(const struct route_entry *re, const struct nexthop *nh) +{ + return (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) + && CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) + && !CHECK_FLAG(nh->flags, RNH_INVALID_NH_FLAGS)); +} + +/* + * Determine whether an re's nexthops are valid for tracking. + */ +static bool rnh_check_re_nexthops(const struct route_entry *re, + const struct rnh *rnh) +{ + bool ret = false; + const struct nexthop *nexthop = NULL; + + /* Check route's nexthops */ + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) { + if (rnh_nexthop_valid(re, nexthop)) + break; + } + + /* Check backup nexthops, if any. */ + if (nexthop == NULL && re->nhe->backup_info && + re->nhe->backup_info->nhe) { + for (ALL_NEXTHOPS(re->nhe->backup_info->nhe->nhg, nexthop)) { + if (rnh_nexthop_valid(re, nexthop)) + break; + } + } + + if (nexthop == NULL) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + " Route Entry %s no nexthops", + zebra_route_string(re->type)); + + goto done; + } + + /* Some special checks if registration asked for them. */ + if (CHECK_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED)) { + if ((re->type == ZEBRA_ROUTE_CONNECT) + || (re->type == ZEBRA_ROUTE_STATIC)) + ret = true; + if (re->type == ZEBRA_ROUTE_NHRP) { + + for (nexthop = re->nhe->nhg.nexthop; + nexthop; + nexthop = nexthop->next) + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) + break; + if (nexthop) + ret = true; + } + } else { + ret = true; + } + +done: + return ret; +} + +/* + * Determine appropriate route (route entry) resolving a tracked + * nexthop. + */ +static struct route_entry * +zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, + struct route_node *nrn, const struct rnh *rnh, + struct route_node **prn) +{ + struct route_table *route_table; + struct route_node *rn; + struct route_entry *re; + + *prn = NULL; + + route_table = zvrf->table[afi][rnh->safi]; + if (!route_table) + return NULL; + + rn = route_node_match(route_table, &nrn->p); + if (!rn) + return NULL; + + /* Unlock route node - we don't need to lock when walking the tree. */ + route_unlock_node(rn); + + /* While resolving nexthops, we may need to walk up the tree from the + * most-specific match. Do similar logic as in zebra_rib.c + */ + while (rn) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: %s(%u):%pRN Possible Match to %pRN", + __func__, VRF_LOGNAME(zvrf->vrf), + rnh->vrf_id, rnh->node, rn); + + /* Do not resolve over default route unless allowed && + * match route to be exact if so specified + */ + if (is_default_prefix(&rn->p) + && (!CHECK_FLAG(rnh->flags, ZEBRA_NHT_RESOLVE_VIA_DEFAULT) + && !rnh_resolve_via_default(zvrf, rn->p.family))) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + " Not allowed to resolve through default prefix: rnh->resolve_via_default: %u", + CHECK_FLAG( + rnh->flags, + ZEBRA_NHT_RESOLVE_VIA_DEFAULT)); + return NULL; + } + + /* Identify appropriate route entry. */ + RNODE_FOREACH_RE (rn, re) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + " Route Entry %s removed", + zebra_route_string(re->type)); + continue; + } + if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED) && + !CHECK_FLAG(re->flags, ZEBRA_FLAG_FIB_OVERRIDE)) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + " Route Entry %s !selected", + zebra_route_string(re->type)); + continue; + } + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED)) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug( + " Route Entry %s queued", + zebra_route_string(re->type)); + continue; + } + + /* Just being SELECTED isn't quite enough - must + * have an installed nexthop to be useful. + */ + if (rnh_check_re_nexthops(re, rnh)) + break; + } + + /* Route entry found, we're done; else, walk up the tree. */ + if (re) { + *prn = rn; + return re; + } else { + /* Resolve the nexthop recursively by finding matching + * route with lower prefix length + */ + rn = rn->parent; + } + } + + return NULL; +} + +static void zebra_rnh_process_pseudowires(vrf_id_t vrfid, struct rnh *rnh) +{ + struct zebra_pw *pw; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(rnh->zebra_pseudowire_list, node, pw)) + zebra_pw_update(pw); +} + +/* + * See if a tracked nexthop entry has undergone any change, and if so, + * take appropriate action; this involves notifying any clients and/or + * scheduling dependent static routes for processing. + */ +static void zebra_rnh_eval_nexthop_entry(struct zebra_vrf *zvrf, afi_t afi, + int force, struct route_node *nrn, + struct rnh *rnh, + struct route_node *prn, + struct route_entry *re) +{ + int state_changed = 0; + + /* If we're resolving over a different route, resolution has changed or + * the resolving route has some change (e.g., metric), there is a state + * change. + */ + zebra_rnh_remove_from_routing_table(rnh); + if (!prefix_same(&rnh->resolved_route, prn ? &prn->p : NULL)) { + if (prn) + prefix_copy(&rnh->resolved_route, &prn->p); + else { + /* + * Just quickly store the family of the resolved + * route so that we can reset it in a second here + */ + int family = rnh->resolved_route.family; + + memset(&rnh->resolved_route, 0, sizeof(struct prefix)); + rnh->resolved_route.family = family; + } + + copy_state(rnh, re, nrn); + state_changed = 1; + } else if (compare_state(re, rnh->state)) { + copy_state(rnh, re, nrn); + state_changed = 1; + } + zebra_rnh_store_in_routing_table(rnh); + + if (state_changed || force) { + /* NOTE: Use the "copy" of resolving route stored in 'rnh' i.e., + * rnh->state. + */ + /* Notify registered protocol clients. */ + zebra_rnh_notify_protocol_clients(zvrf, afi, nrn, rnh, prn, + rnh->state); + + /* Process pseudowires attached to this nexthop */ + zebra_rnh_process_pseudowires(zvrf->vrf->vrf_id, rnh); + } +} + +/* Evaluate one tracked entry */ +static void zebra_rnh_evaluate_entry(struct zebra_vrf *zvrf, afi_t afi, + int force, struct route_node *nrn) +{ + struct rnh *rnh; + struct route_entry *re; + struct route_node *prn; + + if (IS_ZEBRA_DEBUG_NHT) { + zlog_debug("%s(%u):%pRN: Evaluate RNH, %s", + VRF_LOGNAME(zvrf->vrf), zvrf->vrf->vrf_id, nrn, + force ? "(force)" : ""); + } + + rnh = nrn->info; + + /* Identify route entry (RE) resolving this tracked entry. */ + re = zebra_rnh_resolve_nexthop_entry(zvrf, afi, nrn, rnh, &prn); + + /* If the entry cannot be resolved and that is also the existing state, + * there is nothing further to do. + */ + if (!re && rnh->state == NULL && !force) + return; + + /* Process based on type of entry. */ + zebra_rnh_eval_nexthop_entry(zvrf, afi, force, nrn, rnh, prn, re); +} + +/* + * Clear the ROUTE_ENTRY_NEXTHOPS_CHANGED flag + * from the re entries. + * + * Please note we are doing this *after* we have + * notified the world about each nexthop as that + * we can have a situation where one re entry + * covers multiple nexthops we are interested in. + */ +static void zebra_rnh_clear_nhc_flag(struct zebra_vrf *zvrf, afi_t afi, + struct route_node *nrn) +{ + struct rnh *rnh; + struct route_entry *re; + struct route_node *prn; + + rnh = nrn->info; + + /* Identify route entry (RIB) resolving this tracked entry. */ + re = zebra_rnh_resolve_nexthop_entry(zvrf, afi, nrn, rnh, &prn); + + if (re) + UNSET_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED); +} + +/* Evaluate all tracked entries (nexthops or routes for import into BGP) + * of a particular VRF and address-family or a specific prefix. + */ +void zebra_evaluate_rnh(struct zebra_vrf *zvrf, afi_t afi, int force, + const struct prefix *p, safi_t safi) +{ + struct route_table *rnh_table; + struct route_node *nrn; + + rnh_table = get_rnh_table(zvrf->vrf->vrf_id, afi, safi); + if (!rnh_table) // unexpected + return; + + if (p) { + /* Evaluating a specific entry, make sure it exists. */ + nrn = route_node_lookup(rnh_table, p); + if (nrn && nrn->info) + zebra_rnh_evaluate_entry(zvrf, afi, force, nrn); + + if (nrn) + route_unlock_node(nrn); + } else { + /* Evaluate entire table. */ + nrn = route_top(rnh_table); + while (nrn) { + if (nrn->info) + zebra_rnh_evaluate_entry(zvrf, afi, force, nrn); + nrn = route_next(nrn); /* this will also unlock nrn */ + } + nrn = route_top(rnh_table); + while (nrn) { + if (nrn->info) + zebra_rnh_clear_nhc_flag(zvrf, afi, nrn); + nrn = route_next(nrn); /* this will also unlock nrn */ + } + } +} + +void zebra_print_rnh_table(vrf_id_t vrfid, afi_t afi, safi_t safi, + struct vty *vty, const struct prefix *p) +{ + struct route_table *table; + struct route_node *rn; + + table = get_rnh_table(vrfid, afi, safi); + if (!table) { + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug("print_rnhs: rnh table not found"); + return; + } + + for (rn = route_top(table); rn; rn = route_next(rn)) { + if (p && !prefix_match(&rn->p, p)) + continue; + + if (rn->info) + print_rnh(rn, vty); + } +} + +/** + * free_state - free up the re structure associated with the rnh. + */ +static void free_state(vrf_id_t vrf_id, struct route_entry *re, + struct route_node *rn) +{ + if (!re) + return; + + /* free RE and nexthops */ + zebra_nhg_free(re->nhe); + XFREE(MTYPE_RE, re); +} + +static void copy_state(struct rnh *rnh, const struct route_entry *re, + struct route_node *rn) +{ + struct route_entry *state; + + if (rnh->state) { + free_state(rnh->vrf_id, rnh->state, rn); + rnh->state = NULL; + } + + if (!re) + return; + + state = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); + state->type = re->type; + state->distance = re->distance; + state->metric = re->metric; + state->vrf_id = re->vrf_id; + state->status = re->status; + + state->nhe = zebra_nhe_copy(re->nhe, 0); + + /* Copy the 'fib' nexthops also, if present - we want to capture + * the true installed nexthops. + */ + if (re->fib_ng.nexthop) + nexthop_group_copy(&state->fib_ng, &re->fib_ng); + if (re->fib_backup_ng.nexthop) + nexthop_group_copy(&state->fib_backup_ng, &re->fib_backup_ng); + + rnh->state = state; +} + +/* + * Locate the next primary nexthop, used when comparing current rnh info with + * an updated route. + */ +static struct nexthop *next_valid_primary_nh(struct route_entry *re, + struct nexthop *nh) +{ + struct nexthop_group *nhg; + struct nexthop *bnh; + int i, idx; + bool default_path = true; + + /* Fib backup ng present: some backups are installed, + * and we're configured for special handling if there are backups. + */ + if (rnh_hide_backups && (re->fib_backup_ng.nexthop != NULL)) + default_path = false; + + /* Default path: no special handling, just using the 'installed' + * primary nexthops and the common validity test. + */ + if (default_path) { + if (nh == NULL) { + nhg = rib_get_fib_nhg(re); + nh = nhg->nexthop; + } else + nh = nexthop_next(nh); + + while (nh) { + if (rnh_nexthop_valid(re, nh)) + break; + else + nh = nexthop_next(nh); + } + + return nh; + } + + /* Hide backup activation/switchover events. + * + * If we've had a switchover, an inactive primary won't be in + * the fib list at all - the 'fib' list could even be empty + * in the case where no primary is installed. But we want to consider + * those primaries "valid" if they have an activated backup nh. + * + * The logic is something like: + * if (!fib_nhg) + * // then all primaries are installed + * else + * for each primary in re nhg + * if in fib_nhg + * primary is installed + * else if a backup is installed + * primary counts as installed + * else + * primary !installed + */ + + /* Start with the first primary */ + if (nh == NULL) + nh = re->nhe->nhg.nexthop; + else + nh = nexthop_next(nh); + + while (nh) { + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: checking primary NH %pNHv", + __func__, nh); + + /* If this nexthop is in the fib list, it's installed */ + nhg = rib_get_fib_nhg(re); + + for (bnh = nhg->nexthop; bnh; bnh = nexthop_next(bnh)) { + if (nexthop_cmp(nh, bnh) == 0) + break; + } + + if (bnh != NULL) { + /* Found the match */ + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: NH in fib list", __func__); + break; + } + + /* Else if this nexthop's backup is installed, it counts */ + nhg = rib_get_fib_backup_nhg(re); + bnh = nhg->nexthop; + + for (idx = 0; bnh != NULL; idx++) { + /* If we find an active backup nh for this + * primary, we're done; + */ + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: checking backup %pNHv [%d]", + __func__, bnh, idx); + + if (!CHECK_FLAG(bnh->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + for (i = 0; i < nh->backup_num; i++) { + /* Found a matching activated backup nh */ + if (nh->backup_idx[i] == idx) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: backup %d activated", + __func__, i); + + goto done; + } + } + + /* Note that we're not recursing here if the + * backups are recursive: the primary's index is + * only valid in the top-level backup list. + */ + bnh = bnh->next; + } + + /* Try the next primary nexthop */ + nh = nexthop_next(nh); + } + +done: + + return nh; +} + +/* + * Compare two route_entries' nexthops. Account for backup nexthops + * and for the 'fib' nexthop lists, if present. + */ +static bool compare_valid_nexthops(struct route_entry *r1, + struct route_entry *r2) +{ + bool matched_p = false; + struct nexthop_group *nhg1, *nhg2; + struct nexthop *nh1, *nh2; + + /* Start with the primary nexthops */ + + nh1 = next_valid_primary_nh(r1, NULL); + nh2 = next_valid_primary_nh(r2, NULL); + + while (1) { + /* Find any differences in the nexthop lists */ + + if (nh1 && nh2) { + /* Any difference is a no-match */ + if (nexthop_cmp(nh1, nh2) != 0) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: nh1: %pNHv, nh2: %pNHv differ", + __func__, nh1, nh2); + goto done; + } + + } else if (nh1 || nh2) { + /* One list has more valid nexthops than the other */ + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: nh1 %s, nh2 %s", __func__, + nh1 ? "non-NULL" : "NULL", + nh2 ? "non-NULL" : "NULL"); + goto done; + } else + break; /* Done with both lists */ + + nh1 = next_valid_primary_nh(r1, nh1); + nh2 = next_valid_primary_nh(r2, nh2); + } + + /* If configured, don't compare installed backup state - we've + * accounted for that with the primaries above. + * + * But we do want to compare the routes' backup info, + * in case the owning route has changed the backups - + * that change we do want to report. + */ + if (rnh_hide_backups) { + uint32_t hash1 = 0, hash2 = 0; + + if (r1->nhe->backup_info) + hash1 = nexthop_group_hash( + &r1->nhe->backup_info->nhe->nhg); + + if (r2->nhe->backup_info) + hash2 = nexthop_group_hash( + &r2->nhe->backup_info->nhe->nhg); + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: backup hash1 %#x, hash2 %#x", + __func__, hash1, hash2); + + if (hash1 != hash2) + goto done; + else + goto finished; + } + + /* The test for the backups is slightly different: the only installed + * backups will be in the 'fib' list. + */ + nhg1 = rib_get_fib_backup_nhg(r1); + nhg2 = rib_get_fib_backup_nhg(r2); + + nh1 = nhg1->nexthop; + nh2 = nhg2->nexthop; + + while (1) { + /* Find each backup list's next valid nexthop */ + while ((nh1 != NULL) && !rnh_nexthop_valid(r1, nh1)) + nh1 = nexthop_next(nh1); + + while ((nh2 != NULL) && !rnh_nexthop_valid(r2, nh2)) + nh2 = nexthop_next(nh2); + + if (nh1 && nh2) { + /* Any difference is a no-match */ + if (nexthop_cmp(nh1, nh2) != 0) { + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: backup nh1: %pNHv, nh2: %pNHv differ", + __func__, nh1, nh2); + goto done; + } + + nh1 = nexthop_next(nh1); + nh2 = nexthop_next(nh2); + } else if (nh1 || nh2) { + /* One list has more valid nexthops than the other */ + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: backup nh1 %s, nh2 %s", + __func__, + nh1 ? "non-NULL" : "NULL", + nh2 ? "non-NULL" : "NULL"); + goto done; + } else + break; /* Done with both lists */ + } + +finished: + + /* Well, it's a match */ + matched_p = true; + +done: + + if (IS_ZEBRA_DEBUG_NHT_DETAILED) + zlog_debug("%s: %smatched", + __func__, (matched_p ? "" : "NOT ")); + + return matched_p; +} + +/* Returns 'false' if no difference. */ +static bool compare_state(struct route_entry *r1, + struct route_entry *r2) +{ + if (!r1 && !r2) + return false; + + if ((!r1 && r2) || (r1 && !r2)) + return true; + + if (r1->distance != r2->distance) + return true; + + if (r1->metric != r2->metric) + return true; + + if (!compare_valid_nexthops(r1, r2)) + return true; + + return false; +} + +int zebra_send_rnh_update(struct rnh *rnh, struct zserv *client, + vrf_id_t vrf_id, uint32_t srte_color) +{ + struct stream *s = NULL; + struct route_entry *re; + unsigned long nump; + uint8_t num; + struct nexthop *nh; + struct route_node *rn; + int ret; + uint32_t message = 0; + + rn = rnh->node; + re = rnh->state; + + /* Get output stream. */ + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_NEXTHOP_UPDATE, vrf_id); + + /* Message flags. */ + if (srte_color) + SET_FLAG(message, ZAPI_MESSAGE_SRTE); + stream_putl(s, message); + + /* + * Put what we were told to match against + */ + stream_putw(s, rnh->safi); + stream_putw(s, rn->p.family); + stream_putc(s, rn->p.prefixlen); + switch (rn->p.family) { + case AF_INET: + stream_put_in_addr(s, &rn->p.u.prefix4); + break; + case AF_INET6: + stream_put(s, &rn->p.u.prefix6, IPV6_MAX_BYTELEN); + break; + default: + flog_err(EC_ZEBRA_RNH_UNKNOWN_FAMILY, + "%s: Unknown family (%d) notification attempted", + __func__, rn->p.family); + goto failure; + } + + /* + * What we matched against + */ + stream_putw(s, rnh->resolved_route.family); + stream_putc(s, rnh->resolved_route.prefixlen); + switch (rnh->resolved_route.family) { + case AF_INET: + stream_put_in_addr(s, &rnh->resolved_route.u.prefix4); + break; + case AF_INET6: + stream_put(s, &rnh->resolved_route.u.prefix6, IPV6_MAX_BYTELEN); + break; + default: + flog_err(EC_ZEBRA_RNH_UNKNOWN_FAMILY, + "%s: Unknown family (%d) notification attempted", + __func__, rn->p.family); + goto failure; + } + + if (srte_color) + stream_putl(s, srte_color); + + if (re) { + struct zapi_nexthop znh; + struct nexthop_group *nhg; + + stream_putc(s, re->type); + stream_putw(s, re->instance); + stream_putc(s, re->distance); + stream_putl(s, re->metric); + num = 0; + nump = stream_get_endp(s); + stream_putc(s, 0); + + nhg = rib_get_fib_nhg(re); + for (ALL_NEXTHOPS_PTR(nhg, nh)) + if (rnh_nexthop_valid(re, nh)) { + zapi_nexthop_from_nexthop(&znh, nh); + ret = zapi_nexthop_encode(s, &znh, 0, message); + if (ret < 0) + goto failure; + + num++; + } + + nhg = rib_get_fib_backup_nhg(re); + if (nhg) { + for (ALL_NEXTHOPS_PTR(nhg, nh)) + if (rnh_nexthop_valid(re, nh)) { + zapi_nexthop_from_nexthop(&znh, nh); + ret = zapi_nexthop_encode( + s, &znh, 0 /* flags */, + 0 /* message */); + if (ret < 0) + goto failure; + + num++; + } + } + + stream_putc_at(s, nump, num); + } else { + stream_putc(s, 0); // type + stream_putw(s, 0); // instance + stream_putc(s, 0); // distance + stream_putl(s, 0); // metric + stream_putc(s, 0); // nexthops + } + stream_putw_at(s, 0, stream_get_endp(s)); + + client->nh_last_upd_time = monotime(NULL); + return zserv_send_message(client, s); + +failure: + + stream_free(s); + return -1; +} + +static void print_nh(struct nexthop *nexthop, struct vty *vty) +{ + char buf[BUFSIZ]; + struct zebra_ns *zns = zebra_ns_lookup(nexthop->vrf_id); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out(vty, " via %pI4", &nexthop->gate.ipv4); + if (nexthop->ifindex) + vty_out(vty, ", %s", + ifindex2ifname_per_ns(zns, nexthop->ifindex)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " %s", + inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf, BUFSIZ)); + if (nexthop->ifindex) + vty_out(vty, ", via %s", + ifindex2ifname_per_ns(zns, nexthop->ifindex)); + break; + case NEXTHOP_TYPE_IFINDEX: + vty_out(vty, " is directly connected, %s", + ifindex2ifname_per_ns(zns, nexthop->ifindex)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + vty_out(vty, " is directly connected, Null0"); + break; + default: + break; + } + vty_out(vty, "\n"); +} + +static void print_rnh(struct route_node *rn, struct vty *vty) +{ + struct rnh *rnh; + struct nexthop *nexthop; + struct listnode *node; + struct zserv *client; + char buf[BUFSIZ]; + + rnh = rn->info; + vty_out(vty, "%s%s\n", + inet_ntop(rn->p.family, &rn->p.u.prefix, buf, BUFSIZ), + CHECK_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED) ? "(Connected)" + : ""); + if (rnh->state) { + vty_out(vty, " resolved via %s\n", + zebra_route_string(rnh->state->type)); + for (nexthop = rnh->state->nhe->nhg.nexthop; nexthop; + nexthop = nexthop->next) + print_nh(nexthop, vty); + } else + vty_out(vty, " unresolved%s\n", + CHECK_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED) + ? "(Connected)" + : ""); + + vty_out(vty, " Client list:"); + for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client)) + vty_out(vty, " %s(fd %d)%s", zebra_route_string(client->proto), + client->sock, + rnh->filtered[client->proto] ? "(filtered)" : ""); + if (!list_isempty(rnh->zebra_pseudowire_list)) + vty_out(vty, " zebra[pseudowires]"); + vty_out(vty, "\n"); +} + +static int zebra_cleanup_rnh_client(vrf_id_t vrf_id, afi_t afi, safi_t safi, + struct zserv *client) +{ + struct route_table *ntable; + struct route_node *nrn; + struct rnh *rnh; + + if (IS_ZEBRA_DEBUG_NHT) { + struct vrf *vrf = vrf_lookup_by_id(vrf_id); + + zlog_debug("%s(%u): Client %s RNH cleanup for family %s", + VRF_LOGNAME(vrf), vrf_id, + zebra_route_string(client->proto), afi2str(afi)); + } + + ntable = get_rnh_table(vrf_id, afi, safi); + if (!ntable) { + zlog_debug("cleanup_rnh_client: rnh table not found"); + return -1; + } + + for (nrn = route_top(ntable); nrn; nrn = route_next(nrn)) { + if (!nrn->info) + continue; + + rnh = nrn->info; + zebra_remove_rnh_client(rnh, client); + } + return 1; +} + +/* Cleanup registered nexthops (across VRFs) upon client disconnect. */ +static int zebra_client_cleanup_rnh(struct zserv *client) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + zvrf = vrf->info; + if (zvrf) { + zebra_cleanup_rnh_client(zvrf_id(zvrf), AFI_IP, + SAFI_UNICAST, client); + zebra_cleanup_rnh_client(zvrf_id(zvrf), AFI_IP, + SAFI_MULTICAST, client); + zebra_cleanup_rnh_client(zvrf_id(zvrf), AFI_IP6, + SAFI_UNICAST, client); + zebra_cleanup_rnh_client(zvrf_id(zvrf), AFI_IP6, + SAFI_MULTICAST, client); + } + } + + return 0; +} + +int rnh_resolve_via_default(struct zebra_vrf *zvrf, int family) +{ + if (((family == AF_INET) && zvrf->zebra_rnh_ip_default_route) + || ((family == AF_INET6) && zvrf->zebra_rnh_ipv6_default_route)) + return 1; + else + return 0; +} + +/* + * UI control to avoid notifications if backup nexthop status changes + */ +void rnh_set_hide_backups(bool hide_p) +{ + rnh_hide_backups = hide_p; +} + +bool rnh_get_hide_backups(void) +{ + return rnh_hide_backups; +} diff --git a/zebra/zebra_rnh.h b/zebra/zebra_rnh.h new file mode 100644 index 0000000..70eda72 --- /dev/null +++ b/zebra/zebra_rnh.h @@ -0,0 +1,64 @@ +/* + * Zebra next hop tracking header + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_RNH_H +#define _ZEBRA_RNH_H + +#include "prefix.h" +#include "vty.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void zebra_rnh_init(void); + +extern struct rnh *zebra_add_rnh(struct prefix *p, vrf_id_t vrfid, safi_t safi, + bool *exists); +extern struct rnh *zebra_lookup_rnh(struct prefix *p, vrf_id_t vrfid, + safi_t safi); +extern void zebra_free_rnh(struct rnh *rnh); +extern void zebra_add_rnh_client(struct rnh *rnh, struct zserv *client, + vrf_id_t vrfid); +extern int zebra_send_rnh_update(struct rnh *rnh, struct zserv *client, + vrf_id_t vrf_id, uint32_t srte_color); +extern void zebra_register_rnh_pseudowire(vrf_id_t, struct zebra_pw *, bool *); +extern void zebra_deregister_rnh_pseudowire(vrf_id_t, struct zebra_pw *); +extern void zebra_remove_rnh_client(struct rnh *rnh, struct zserv *client); +extern void zebra_evaluate_rnh(struct zebra_vrf *zvrf, afi_t afi, int force, + const struct prefix *p, safi_t safi); +extern void zebra_print_rnh_table(vrf_id_t vrfid, afi_t afi, safi_t safi, + struct vty *vty, const struct prefix *p); + +extern int rnh_resolve_via_default(struct zebra_vrf *zvrf, int family); + +extern bool rnh_nexthop_valid(const struct route_entry *re, + const struct nexthop *nh); + +/* UI control to avoid notifications if backup nexthop status changes */ +void rnh_set_hide_backups(bool hide_p); +bool rnh_get_hide_backups(void); + +#ifdef __cplusplus +} +#endif + +#endif /*_ZEBRA_RNH_H */ diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c new file mode 100644 index 0000000..4fc49e4 --- /dev/null +++ b/zebra/zebra_routemap.c @@ -0,0 +1,1990 @@ +/* zebra routemap. + * Copyright (C) 2006 IBM Corporation + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "memory.h" +#include "prefix.h" +#include "rib.h" +#include "vty.h" +#include "routemap.h" +#include "command.h" +#include "filter.h" +#include "plist.h" +#include "nexthop.h" +#include "northbound_cli.h" +#include "lib/route_types.h" +#include "vrf.h" +#include "frrstr.h" + +#include "zebra/zebra_router.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zebra_routemap.h" + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_routemap_clippy.c" +#endif + +static uint32_t zebra_rmap_update_timer = ZEBRA_RMAP_DEFAULT_UPDATE_TIMER; +static struct thread *zebra_t_rmap_update = NULL; +char *zebra_import_table_routemap[AFI_MAX][ZEBRA_KERNEL_TABLE_MAX]; + +struct nh_rmap_obj { + struct nexthop *nexthop; + vrf_id_t vrf_id; + uint32_t source_protocol; + uint8_t instance; + int metric; + route_tag_t tag; +}; + +static void zebra_route_map_set_delay_timer(uint32_t value); + +/* 'match tag TAG' + * Match function return 1 if match is success else return 0 + */ +static enum route_map_cmd_result_t +route_match_tag(void *rule, const struct prefix *prefix, void *object) +{ + route_tag_t *tag; + struct nh_rmap_obj *nh_data; + + tag = rule; + nh_data = object; + + if (nh_data->tag == *tag) + return RMAP_MATCH; + + return RMAP_NOMATCH; +} + +/* Route map commands for tag matching */ +static const struct route_map_rule_cmd route_match_tag_cmd = { + "tag", + route_match_tag, + route_map_rule_tag_compile, + route_map_rule_tag_free, +}; + + +/* `match interface IFNAME' */ +/* Match function return 1 if match is success else return zero. */ +static enum route_map_cmd_result_t +route_match_interface(void *rule, const struct prefix *prefix, void *object) +{ + struct nh_rmap_obj *nh_data; + char *ifname = rule; + ifindex_t ifindex; + + if (strcasecmp(ifname, "any") == 0) + return RMAP_MATCH; + nh_data = object; + if (!nh_data || !nh_data->nexthop) + return RMAP_NOMATCH; + ifindex = ifname2ifindex(ifname, nh_data->vrf_id); + if (ifindex == 0) + return RMAP_NOMATCH; + if (nh_data->nexthop->ifindex == ifindex) + return RMAP_MATCH; + + return RMAP_NOMATCH; +} + +/* Route map `match interface' match statement. `arg' is IFNAME value */ +static void *route_match_interface_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +/* Free route map's compiled `match interface' value. */ +static void route_match_interface_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static void show_vrf_proto_rm(struct vty *vty, struct zebra_vrf *zvrf, + int af_type) +{ + int i; + + vty_out(vty, "Protocol : route-map\n"); + vty_out(vty, "-------------------------------------\n"); + + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { + if (PROTO_RM_NAME(zvrf, af_type, i)) + vty_out(vty, "%-24s : %-10s\n", zebra_route_string(i), + PROTO_RM_NAME(zvrf, af_type, i)); + else + vty_out(vty, "%-24s : none\n", zebra_route_string(i)); + } + + if (PROTO_RM_NAME(zvrf, af_type, i)) + vty_out(vty, "%-24s : %-10s\n", "any", + PROTO_RM_NAME(zvrf, af_type, i)); + else + vty_out(vty, "%-24s : none\n", "any"); +} + +static void show_vrf_nht_rm(struct vty *vty, struct zebra_vrf *zvrf, + int af_type) +{ + int i; + + vty_out(vty, "Protocol : route-map\n"); + vty_out(vty, "-------------------------------------\n"); + + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { + if (NHT_RM_NAME(zvrf, af_type, i)) + vty_out(vty, "%-24s : %-10s\n", zebra_route_string(i), + NHT_RM_NAME(zvrf, af_type, i)); + else + vty_out(vty, "%-24s : none\n", zebra_route_string(i)); + } + + if (NHT_RM_NAME(zvrf, af_type, i)) + vty_out(vty, "%-24s : %-10s\n", "any", + NHT_RM_NAME(zvrf, af_type, i)); + else + vty_out(vty, "%-24s : none\n", "any"); +} + +static int show_proto_rm(struct vty *vty, int af_type, const char *vrf_all, + const char *vrf_name) +{ + struct zebra_vrf *zvrf; + + if (vrf_all) { + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = (struct zebra_vrf *)vrf->info; + if (zvrf == NULL) + continue; + vty_out(vty, "VRF: %s\n", zvrf->vrf->name); + show_vrf_proto_rm(vty, zvrf, af_type); + } + } else { + vrf_id_t vrf_id = VRF_DEFAULT; + + if (vrf_name) + VRF_GET_ID(vrf_id, vrf_name, false); + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + if (!zvrf) + return CMD_SUCCESS; + + vty_out(vty, "VRF: %s\n", zvrf->vrf->name); + show_vrf_proto_rm(vty, zvrf, af_type); + } + + return CMD_SUCCESS; +} + +static int show_nht_rm(struct vty *vty, int af_type, const char *vrf_all, + const char *vrf_name) +{ + struct zebra_vrf *zvrf; + + if (vrf_all) { + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = (struct zebra_vrf *)vrf->info; + if (zvrf == NULL) + continue; + + vty_out(vty, "VRF: %s\n", zvrf->vrf->name); + show_vrf_nht_rm(vty, zvrf, af_type); + } + } else { + vrf_id_t vrf_id = VRF_DEFAULT; + + if (vrf_name) + VRF_GET_ID(vrf_id, vrf_name, false); + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + if (!zvrf) + return CMD_SUCCESS; + + vty_out(vty, "VRF: %s\n", zvrf->vrf->name); + show_vrf_nht_rm(vty, zvrf, af_type); + } + + return CMD_SUCCESS; +} + +/* Route map commands for interface matching */ +static const struct route_map_rule_cmd route_match_interface_cmd = { + "interface", + route_match_interface, + route_match_interface_compile, + route_match_interface_free +}; + +static int ip_protocol_rm_add(struct zebra_vrf *zvrf, const char *rmap, + int rtype, afi_t afi, safi_t safi) +{ + struct route_table *table; + + if (PROTO_RM_NAME(zvrf, afi, rtype)) { + if (strcmp(PROTO_RM_NAME(zvrf, afi, rtype), rmap) == 0) + return CMD_SUCCESS; + + XFREE(MTYPE_ROUTE_MAP_NAME, PROTO_RM_NAME(zvrf, afi, rtype)); + } + route_map_counter_decrement(PROTO_RM_MAP(zvrf, afi, rtype)); + PROTO_RM_NAME(zvrf, afi, rtype) = XSTRDUP(MTYPE_ROUTE_MAP_NAME, rmap); + PROTO_RM_MAP(zvrf, afi, rtype) = + route_map_lookup_by_name(PROTO_RM_NAME(zvrf, afi, rtype)); + route_map_counter_increment(PROTO_RM_MAP(zvrf, afi, rtype)); + + if (PROTO_RM_MAP(zvrf, afi, rtype)) { + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "%u: IPv4 Routemap config for protocol %d scheduling RIB processing", + zvrf->vrf->vrf_id, rtype); + /* Process routes of interested address-families. */ + table = zebra_vrf_table(afi, safi, zvrf->vrf->vrf_id); + if (table) + rib_update_table(table, RIB_UPDATE_RMAP_CHANGE, + rtype); + } + + return CMD_SUCCESS; +} + +static int ip_protocol_rm_del(struct zebra_vrf *zvrf, const char *rmap, + int rtype, afi_t afi, safi_t safi) +{ + struct route_table *table; + + if (!PROTO_RM_NAME(zvrf, afi, rtype)) + return CMD_SUCCESS; + + if (!rmap || strcmp(rmap, PROTO_RM_NAME(zvrf, afi, rtype)) == 0) { + + route_map_counter_decrement(PROTO_RM_MAP(zvrf, afi, rtype)); + if (PROTO_RM_MAP(zvrf, afi, rtype)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "%u: IPv4 Routemap unconfig for protocol %d, scheduling RIB processing", + zvrf->vrf->vrf_id, rtype); + PROTO_RM_MAP(zvrf, afi, rtype) = NULL; + + /* Process routes of interested address-families. */ + table = zebra_vrf_table(afi, safi, zvrf->vrf->vrf_id); + if (table) + rib_update_table(table, RIB_UPDATE_RMAP_CHANGE, + rtype); + } + XFREE(MTYPE_ROUTE_MAP_NAME, PROTO_RM_NAME(zvrf, afi, rtype)); + } + return CMD_SUCCESS; +} + +static int ip_nht_rm_add(struct zebra_vrf *zvrf, const char *rmap, int rtype, + int afi) +{ + + if (NHT_RM_NAME(zvrf, afi, rtype)) { + if (strcmp(NHT_RM_NAME(zvrf, afi, rtype), rmap) == 0) + return CMD_SUCCESS; + + XFREE(MTYPE_ROUTE_MAP_NAME, NHT_RM_NAME(zvrf, afi, rtype)); + } + route_map_counter_decrement(NHT_RM_MAP(zvrf, afi, rtype)); + NHT_RM_NAME(zvrf, afi, rtype) = XSTRDUP(MTYPE_ROUTE_MAP_NAME, rmap); + NHT_RM_MAP(zvrf, afi, rtype) = + route_map_lookup_by_name(NHT_RM_NAME(zvrf, afi, rtype)); + route_map_counter_increment(NHT_RM_MAP(zvrf, afi, rtype)); + + if (NHT_RM_MAP(zvrf, afi, rtype)) + zebra_evaluate_rnh(zvrf, AFI_IP, 1, NULL, SAFI_UNICAST); + + return CMD_SUCCESS; +} + +static int ip_nht_rm_del(struct zebra_vrf *zvrf, const char *rmap, int rtype, + int afi) +{ + + if (!NHT_RM_NAME(zvrf, afi, rtype)) + return CMD_SUCCESS; + + if (!rmap || strcmp(rmap, NHT_RM_NAME(zvrf, afi, rtype)) == 0) { + route_map_counter_decrement(NHT_RM_MAP(zvrf, afi, rtype)); + if (NHT_RM_MAP(zvrf, afi, rtype)) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "%u: IPv4 Routemap unconfig for protocol %d, scheduling RIB processing", + zvrf->vrf->vrf_id, rtype); + NHT_RM_MAP(zvrf, afi, rtype) = NULL; + + zebra_evaluate_rnh(zvrf, AFI_IP, 1, NULL, SAFI_UNICAST); + } + XFREE(MTYPE_ROUTE_MAP_NAME, NHT_RM_NAME(zvrf, afi, rtype)); + } + return CMD_SUCCESS; +} + +DEFPY_YANG( + match_ip_address_prefix_len, match_ip_address_prefix_len_cmd, + "match ip address prefix-len (0-32)$length", + MATCH_STR + IP_STR + "Match prefix length of IP address\n" + "Match prefix length of IP address\n" + "Prefix length\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:ipv4-prefix-length']"; + char xpath_value[XPATH_MAXLEN]; + + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + snprintf( + xpath_value, sizeof(xpath_value), + "%s/rmap-match-condition/frr-zebra-route-map:ipv4-prefix-length", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, length_str); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + no_match_ip_address_prefix_len, no_match_ip_address_prefix_len_cmd, + "no match ip address prefix-len [(0-32)]", + NO_STR + MATCH_STR + IP_STR + "Match prefix length of IP address\n" + "Match prefix length of IP address\n" + "Prefix length\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:ipv4-prefix-length']"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + match_ipv6_address_prefix_len, match_ipv6_address_prefix_len_cmd, + "match ipv6 address prefix-len (0-128)$length", + MATCH_STR + IPV6_STR + "Match prefix length of IPv6 address\n" + "Match prefix length of IPv6 address\n" + "Prefix length\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:ipv6-prefix-length']"; + char xpath_value[XPATH_MAXLEN]; + + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + snprintf( + xpath_value, sizeof(xpath_value), + "%s/rmap-match-condition/frr-zebra-route-map:ipv6-prefix-length", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, length_str); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + no_match_ipv6_address_prefix_len, no_match_ipv6_address_prefix_len_cmd, + "no match ipv6 address prefix-len [(0-128)]", + NO_STR + MATCH_STR + IPV6_STR + "Match prefix length of IPv6 address\n" + "Match prefix length of IPv6 address\n" + "Prefix length\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:ipv6-prefix-length']"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + match_ip_nexthop_prefix_len, match_ip_nexthop_prefix_len_cmd, + "match ip next-hop prefix-len (0-32)$length", + MATCH_STR + IP_STR + "Match prefixlen of nexthop IP address\n" + "Match prefixlen of given nexthop\n" + "Prefix length\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:ipv4-next-hop-prefix-length']"; + char xpath_value[XPATH_MAXLEN]; + + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + snprintf( + xpath_value, sizeof(xpath_value), + "%s/rmap-match-condition/frr-zebra-route-map:ipv4-prefix-length", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, length_str); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + no_match_ip_nexthop_prefix_len, no_match_ip_nexthop_prefix_len_cmd, + "no match ip next-hop prefix-len [(0-32)]", + NO_STR + MATCH_STR + IP_STR + "Match prefixlen of nexthop IP address\n" + "Match prefix length of nexthop\n" + "Prefix length\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:ipv4-next-hop-prefix-length']"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + match_source_protocol, match_source_protocol_cmd, + "match source-protocol " FRR_REDIST_STR_ZEBRA "$proto", + MATCH_STR + "Match protocol via which the route was learnt\n" + FRR_REDIST_HELP_STR_ZEBRA) +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:source-protocol']"; + char xpath_value[XPATH_MAXLEN]; + + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + snprintf(xpath_value, sizeof(xpath_value), + "%s/rmap-match-condition/frr-zebra-route-map:source-protocol", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, proto); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + no_match_source_protocol, no_match_source_protocol_cmd, + "no match source-protocol [" FRR_REDIST_STR_ZEBRA "]", + NO_STR + MATCH_STR + "Match protocol via which the route was learnt\n" + FRR_REDIST_HELP_STR_ZEBRA) +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:source-protocol']"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + match_source_instance, match_source_instance_cmd, + "match source-instance (0-255)$instance", + MATCH_STR + "Match the protocol's instance number\n" + "The instance number\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:source-instance']"; + char xpath_value[XPATH_MAXLEN]; + + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + snprintf(xpath_value, sizeof(xpath_value), + "%s/rmap-match-condition/frr-zebra-route-map:source-instance", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, instance_str); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + no_match_source_instance, no_match_source_instance_cmd, + "no match source-instance [(0-255)]", + NO_STR MATCH_STR + "Match the protocol's instance number\n" + "The instance number\n") +{ + const char *xpath = + "./match-condition[condition='frr-zebra-route-map:source-instance']"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +/* set functions */ + +DEFPY_YANG( + set_src, set_src_cmd, + "set src <A.B.C.D$addrv4|X:X::X:X$addrv6>", + SET_STR + "src address for route\n" + "IPv4 src address\n" + "IPv6 src address\n") +{ + const char *xpath = + "./set-action[action='frr-zebra-route-map:src-address']"; + char xpath_value[XPATH_MAXLEN]; + + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + if (addrv4_str) { + snprintf( + xpath_value, sizeof(xpath_value), + "%s/rmap-set-action/frr-zebra-route-map:ipv4-src-address", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, + addrv4_str); + } else { + snprintf( + xpath_value, sizeof(xpath_value), + "%s/rmap-set-action/frr-zebra-route-map:ipv6-src-address", + xpath); + nb_cli_enqueue_change(vty, xpath_value, NB_OP_MODIFY, + addrv6_str); + } + + return nb_cli_apply_changes(vty, NULL); +} + +DEFPY_YANG( + no_set_src, no_set_src_cmd, + "no set src [<A.B.C.D|X:X::X:X>]", + NO_STR + SET_STR + "Source address for route\n" + "IPv4 address\n" + "IPv6 address\n") +{ + const char *xpath = + "./set-action[action='frr-zebra-route-map:src-address']"; + + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFUN_YANG (zebra_route_map_timer, + zebra_route_map_timer_cmd, + "zebra route-map delay-timer (0-600)", + ZEBRA_STR + "Set route-map parameters\n" + "Time to wait before route-map updates are processed\n" + "0 means route-map changes are run immediately instead of delaying\n") +{ + int idx_number = 3; + uint32_t rmap_delay_timer; + + rmap_delay_timer = strtoul(argv[idx_number]->arg, NULL, 10); + zebra_route_map_set_delay_timer(rmap_delay_timer); + + return (CMD_SUCCESS); +} + +DEFUN_YANG (no_zebra_route_map_timer, + no_zebra_route_map_timer_cmd, + "no zebra route-map delay-timer [(0-600)]", + NO_STR + ZEBRA_STR + "Set route-map parameters\n" + "Reset delay-timer to default value, 30 secs\n" + "0 means route-map changes are run immediately instead of delaying\n") +{ + zebra_route_map_set_delay_timer(ZEBRA_RMAP_DEFAULT_UPDATE_TIMER); + + return (CMD_SUCCESS); +} + +DEFPY_YANG (ip_protocol, + ip_protocol_cmd, + "ip protocol " FRR_IP_PROTOCOL_MAP_STR_ZEBRA + " $proto route-map ROUTE-MAP$rmap", + IP_STR + "Filter routing info exchanged between zebra and protocol\n" + FRR_IP_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route-map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(proto); + assert(rmap); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_protocol_rm_add(zvrf, rmap, rtype, AFI_IP, SAFI_UNICAST); + + return ret; +} + +DEFPY_YANG (no_ip_protocol, + no_ip_protocol_cmd, + "no ip protocol " FRR_IP_PROTOCOL_MAP_STR_ZEBRA + " $proto [route-map ROUTE-MAP$rmap]", + NO_STR + IP_STR + "Stop filtering routing info between zebra and protocol\n" + FRR_IP_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route-map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(proto); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_protocol_rm_del(zvrf, rmap, rtype, AFI_IP, SAFI_UNICAST); + + return ret; +} + +DEFPY_YANG (show_ip_protocol, + show_ip_protocol_cmd, + "show ip protocol [vrf <NAME$vrf_name|all$vrf_all>]", + SHOW_STR + IP_STR + "IP protocol filtering status\n" + VRF_FULL_CMD_HELP_STR) +{ + int ret = show_proto_rm(vty, AFI_IP, vrf_all, vrf_name); + + return ret; +} + +DEFPY_YANG (ipv6_protocol, + ipv6_protocol_cmd, + "ipv6 protocol " FRR_IP6_PROTOCOL_MAP_STR_ZEBRA + " $proto route-map ROUTE-MAP$rmap", + IP6_STR + "Filter IPv6 routing info exchanged between zebra and protocol\n" + FRR_IP6_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route-map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(rmap); + assert(proto); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_protocol_rm_add(zvrf, rmap, rtype, AFI_IP6, SAFI_UNICAST); + + return ret; +} + +DEFPY_YANG (no_ipv6_protocol, + no_ipv6_protocol_cmd, + "no ipv6 protocol " FRR_IP6_PROTOCOL_MAP_STR_ZEBRA + " $proto [route-map ROUTE-MAP$rmap]", + NO_STR + IP6_STR + "Stop filtering IPv6 routing info between zebra and protocol\n" + FRR_IP6_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route-map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(proto); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_protocol_rm_del(zvrf, rmap, rtype, AFI_IP6, SAFI_UNICAST); + + return ret; +} + +DEFPY_YANG (show_ipv6_protocol, + show_ipv6_protocol_cmd, + "show ipv6 protocol [vrf <NAME$vrf_name|all$vrf_all>]", + SHOW_STR + IP6_STR + "IPv6 protocol filtering status\n" + VRF_FULL_CMD_HELP_STR) +{ + int ret = show_proto_rm(vty, AFI_IP6, vrf_all, vrf_name); + + return ret; +} + +DEFPY_YANG (ip_protocol_nht_rmap, + ip_protocol_nht_rmap_cmd, + "ip nht " FRR_IP_PROTOCOL_MAP_STR_ZEBRA + " $proto route-map ROUTE-MAP$rmap", + IP_STR + "Filter Next Hop tracking route resolution\n" + FRR_IP_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route map\n" + "Route map name\n") +{ + + int ret, rtype; + + assert(proto); + assert(rmap); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_nht_rm_add(zvrf, rmap, rtype, AFI_IP); + + return ret; +} + +DEFPY_YANG (no_ip_protocol_nht_rmap, + no_ip_protocol_nht_rmap_cmd, + "no ip nht " FRR_IP_PROTOCOL_MAP_STR_ZEBRA + " $proto route-map [ROUTE-MAP$rmap]", + NO_STR + IP_STR + "Filter Next Hop tracking route resolution\n" + FRR_IP_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(proto); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_nht_rm_del(zvrf, rmap, rtype, AFI_IP); + + return ret; +} + +DEFPY_YANG (show_ip_protocol_nht, + show_ip_protocol_nht_cmd, + "show ip nht route-map [vrf <NAME$vrf_name|all$vrf_all>]", + SHOW_STR + IP_STR + "IP nexthop tracking table\n" + "IP Next Hop tracking filtering status\n" + VRF_FULL_CMD_HELP_STR) +{ + int ret = show_nht_rm(vty, AFI_IP, vrf_all, vrf_name); + + return ret; +} + +DEFPY_YANG (ipv6_protocol_nht_rmap, + ipv6_protocol_nht_rmap_cmd, + "ipv6 nht " FRR_IP6_PROTOCOL_MAP_STR_ZEBRA + " $proto route-map ROUTE-MAP$rmap", + IP6_STR + "Filter Next Hop tracking route resolution\n" + FRR_IP6_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(rmap); + assert(proto); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_nht_rm_add(zvrf, rmap, rtype, AFI_IP6); + + return ret; +} + +DEFPY_YANG (no_ipv6_protocol_nht_rmap, + no_ipv6_protocol_nht_rmap_cmd, + "no ipv6 nht " FRR_IP6_PROTOCOL_MAP_STR_ZEBRA + " $proto [route-map ROUTE-MAP$rmap]", + NO_STR + IP6_STR + "Filter Next Hop tracking route resolution\n" + FRR_IP6_PROTOCOL_MAP_HELP_STR_ZEBRA + "Specify route map\n" + "Route map name\n") +{ + int ret, rtype; + + assert(proto); + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (strcasecmp(proto, "any") == 0) + rtype = ZEBRA_ROUTE_MAX; + else + rtype = proto_name2num(proto); + if (rtype < 0) { + vty_out(vty, "invalid protocol name \"%s\"\n", proto); + return CMD_WARNING_CONFIG_FAILED; + } + + ret = ip_nht_rm_del(zvrf, rmap, rtype, AFI_IP6); + + return ret; +} + +DEFPY_YANG (show_ipv6_protocol_nht, + show_ipv6_protocol_nht_cmd, + "show ipv6 nht route-map [vrf <NAME$vrf_name|all$vrf_all>]", + SHOW_STR + IP6_STR + "Next Hop filtering status\n" + "Route-map\n" + VRF_FULL_CMD_HELP_STR) +{ + int ret = show_nht_rm(vty, AFI_IP6, vrf_all, vrf_name); + + return ret; +} + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ + +/* `match ip next-hop IP_ACCESS_LIST' */ + +/* Match function return 1 if match is success else return zero. */ +static enum route_map_cmd_result_t +route_match_ip_next_hop(void *rule, const struct prefix *prefix, void *object) +{ + struct access_list *alist; + struct nh_rmap_obj *nh_data; + struct prefix_ipv4 p; + + nh_data = object; + if (!nh_data) + return RMAP_NOMATCH; + + switch (nh_data->nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + /* Interface routes can't match ip next-hop */ + return RMAP_NOMATCH; + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV4: + p.family = AF_INET; + p.prefix = nh_data->nexthop->gate.ipv4; + p.prefixlen = IPV4_MAX_BITLEN; + break; + default: + return RMAP_NOMATCH; + } + alist = access_list_lookup(AFI_IP, (char *)rule); + if (alist == NULL) + return RMAP_NOMATCH; + + return (access_list_apply(alist, &p) == FILTER_DENY ? RMAP_NOMATCH + : RMAP_MATCH); +} + +/* Route map `ip next-hop' match statement. `arg' should be + access-list name. */ +static void *route_match_ip_next_hop_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +/* Free route map's compiled `. */ +static void route_match_ip_next_hop_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +/* Route map commands for ip next-hop matching. */ +static const struct route_map_rule_cmd route_match_ip_next_hop_cmd = { + "ip next-hop", + route_match_ip_next_hop, + route_match_ip_next_hop_compile, + route_match_ip_next_hop_free +}; + +/* `match ip next-hop prefix-list PREFIX_LIST' */ + +static enum route_map_cmd_result_t +route_match_ip_next_hop_prefix_list(void *rule, const struct prefix *prefix, + void *object) +{ + struct prefix_list *plist; + struct nh_rmap_obj *nh_data; + struct prefix_ipv4 p; + + nh_data = (struct nh_rmap_obj *)object; + if (!nh_data) + return RMAP_NOMATCH; + + switch (nh_data->nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + /* Interface routes can't match ip next-hop */ + return RMAP_NOMATCH; + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV4: + p.family = AF_INET; + p.prefix = nh_data->nexthop->gate.ipv4; + p.prefixlen = IPV4_MAX_BITLEN; + break; + default: + return RMAP_NOMATCH; + } + plist = prefix_list_lookup(AFI_IP, (char *)rule); + if (plist == NULL) + return RMAP_NOMATCH; + + return (prefix_list_apply(plist, &p) == PREFIX_DENY ? RMAP_NOMATCH + : RMAP_MATCH); +} + +static void *route_match_ip_next_hop_prefix_list_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +static void route_match_ip_next_hop_prefix_list_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd + route_match_ip_next_hop_prefix_list_cmd = { + "ip next-hop prefix-list", + route_match_ip_next_hop_prefix_list, + route_match_ip_next_hop_prefix_list_compile, + route_match_ip_next_hop_prefix_list_free +}; + +/* `match ip address IP_ACCESS_LIST' */ + +/* Match function should return 1 if match is success else return + zero. */ +static enum route_map_cmd_result_t +route_match_address(afi_t afi, void *rule, const struct prefix *prefix, + void *object) +{ + struct access_list *alist; + + alist = access_list_lookup(afi, (char *)rule); + if (alist == NULL) + return RMAP_NOMATCH; + + return (access_list_apply(alist, prefix) == FILTER_DENY ? RMAP_NOMATCH + : RMAP_MATCH); +} + +static enum route_map_cmd_result_t +route_match_ip_address(void *rule, const struct prefix *prefix, void *object) +{ + return route_match_address(AFI_IP, rule, prefix, object); +} + +static enum route_map_cmd_result_t +route_match_ipv6_address(void *rule, const struct prefix *prefix, void *object) +{ + return route_match_address(AFI_IP6, rule, prefix, object); +} + +/* Route map `ip address' match statement. `arg' should be + access-list name. */ +static void *route_match_address_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +/* Free route map's compiled `ip address' value. */ +static void route_match_address_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +/* Route map commands for ip address matching. */ +static const struct route_map_rule_cmd route_match_ip_address_cmd = { + "ip address", + route_match_ip_address, + route_match_address_compile, + route_match_address_free +}; + +/* Route map commands for ipv6 address matching. */ +static const struct route_map_rule_cmd route_match_ipv6_address_cmd = { + "ipv6 address", + route_match_ipv6_address, + route_match_address_compile, + route_match_address_free +}; + +/* `match ip address prefix-list PREFIX_LIST' */ + +static enum route_map_cmd_result_t +route_match_address_prefix_list(void *rule, const struct prefix *prefix, + void *object, afi_t afi) +{ + struct prefix_list *plist; + + plist = prefix_list_lookup(afi, (char *)rule); + if (plist == NULL) + return RMAP_NOMATCH; + + return (prefix_list_apply(plist, prefix) == PREFIX_DENY ? RMAP_NOMATCH + : RMAP_MATCH); +} + +static enum route_map_cmd_result_t +route_match_ip_address_prefix_list(void *rule, const struct prefix *prefix, + void *object) +{ + return (route_match_address_prefix_list(rule, prefix, object, AFI_IP)); +} + +static void *route_match_address_prefix_list_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +static void route_match_address_prefix_list_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd + route_match_ip_address_prefix_list_cmd = { + "ip address prefix-list", + route_match_ip_address_prefix_list, + route_match_address_prefix_list_compile, + route_match_address_prefix_list_free +}; + +static enum route_map_cmd_result_t +route_match_ipv6_address_prefix_list(void *rule, const struct prefix *prefix, + void *object) +{ + return (route_match_address_prefix_list(rule, prefix, object, AFI_IP6)); +} + +static const struct route_map_rule_cmd + route_match_ipv6_address_prefix_list_cmd = { + "ipv6 address prefix-list", + route_match_ipv6_address_prefix_list, + route_match_address_prefix_list_compile, + route_match_address_prefix_list_free +}; + +/* `match ipv6 next-hop type <TYPE>' */ + +static enum route_map_cmd_result_t +route_match_ipv6_next_hop_type(void *rule, const struct prefix *prefix, + void *object) +{ + struct nh_rmap_obj *nh_data; + + if (prefix->family == AF_INET6) { + nh_data = (struct nh_rmap_obj *)object; + if (!nh_data) + return RMAP_NOMATCH; + + if (nh_data->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) + return RMAP_MATCH; + } + + return RMAP_NOMATCH; +} + +static void *route_match_ipv6_next_hop_type_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +static void route_match_ipv6_next_hop_type_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd + route_match_ipv6_next_hop_type_cmd = { + "ipv6 next-hop type", + route_match_ipv6_next_hop_type, + route_match_ipv6_next_hop_type_compile, + route_match_ipv6_next_hop_type_free +}; + +/* `match ip address prefix-len PREFIXLEN' */ + +static enum route_map_cmd_result_t +route_match_address_prefix_len(void *rule, const struct prefix *prefix, + void *object) +{ + uint32_t *prefixlen = (uint32_t *)rule; + + return ((prefix->prefixlen == *prefixlen) ? RMAP_MATCH : RMAP_NOMATCH); +} + +static void *route_match_address_prefix_len_compile(const char *arg) +{ + uint32_t *prefix_len; + char *endptr = NULL; + unsigned long tmpval; + + /* prefix len value shoud be integer. */ + if (!all_digit(arg)) + return NULL; + + errno = 0; + tmpval = strtoul(arg, &endptr, 10); + if (*endptr != '\0' || errno || tmpval > UINT32_MAX) + return NULL; + + prefix_len = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(uint32_t)); + + *prefix_len = tmpval; + return prefix_len; +} + +static void route_match_address_prefix_len_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd + route_match_ip_address_prefix_len_cmd = { + "ip address prefix-len", + route_match_address_prefix_len, + route_match_address_prefix_len_compile, + route_match_address_prefix_len_free +}; + +static const struct route_map_rule_cmd + route_match_ipv6_address_prefix_len_cmd = { + "ipv6 address prefix-len", + route_match_address_prefix_len, + route_match_address_prefix_len_compile, + route_match_address_prefix_len_free +}; + +/* `match ip nexthop prefix-len PREFIXLEN' */ + +static enum route_map_cmd_result_t +route_match_ip_nexthop_prefix_len(void *rule, const struct prefix *prefix, + void *object) +{ + uint32_t *prefixlen = (uint32_t *)rule; + struct nh_rmap_obj *nh_data; + struct prefix_ipv4 p; + + nh_data = (struct nh_rmap_obj *)object; + if (!nh_data || !nh_data->nexthop) + return RMAP_NOMATCH; + + switch (nh_data->nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + /* Interface routes can't match ip next-hop */ + return RMAP_NOMATCH; + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV4: + p.family = AF_INET; + p.prefix = nh_data->nexthop->gate.ipv4; + p.prefixlen = IPV4_MAX_BITLEN; + break; + default: + return RMAP_NOMATCH; + } + return ((p.prefixlen == *prefixlen) ? RMAP_MATCH : RMAP_NOMATCH); +} + +static const struct route_map_rule_cmd + route_match_ip_nexthop_prefix_len_cmd = { + "ip next-hop prefix-len", + route_match_ip_nexthop_prefix_len, + route_match_address_prefix_len_compile, /* reuse */ + route_match_address_prefix_len_free /* reuse */ +}; + +/* `match ip next-hop type <blackhole>' */ + +static enum route_map_cmd_result_t +route_match_ip_next_hop_type(void *rule, const struct prefix *prefix, + void *object) +{ + struct nh_rmap_obj *nh_data; + + if (prefix->family == AF_INET) { + nh_data = (struct nh_rmap_obj *)object; + if (!nh_data) + return RMAP_NOMATCH; + + if (nh_data->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) + return RMAP_MATCH; + } + + return RMAP_NOMATCH; +} + +static void *route_match_ip_next_hop_type_compile(const char *arg) +{ + return XSTRDUP(MTYPE_ROUTE_MAP_COMPILED, arg); +} + +static void route_match_ip_next_hop_type_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd + route_match_ip_next_hop_type_cmd = { + "ip next-hop type", + route_match_ip_next_hop_type, + route_match_ip_next_hop_type_compile, + route_match_ip_next_hop_type_free +}; + +/* `match source-protocol PROTOCOL' */ + +static enum route_map_cmd_result_t +route_match_source_protocol(void *rule, const struct prefix *p, void *object) +{ + uint32_t *rib_type = (uint32_t *)rule; + struct nh_rmap_obj *nh_data; + + nh_data = (struct nh_rmap_obj *)object; + if (!nh_data) + return RMAP_NOMATCH; + + return ((nh_data->source_protocol == *rib_type) ? RMAP_MATCH + : RMAP_NOMATCH); +} + +static void *route_match_source_protocol_compile(const char *arg) +{ + uint32_t *rib_type; + int i; + + i = proto_name2num(arg); + rib_type = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(uint32_t)); + + *rib_type = i; + + return rib_type; +} + +static void route_match_source_protocol_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd route_match_source_protocol_cmd = { + "source-protocol", + route_match_source_protocol, + route_match_source_protocol_compile, + route_match_source_protocol_free +}; + +/* `source-instance` */ +static enum route_map_cmd_result_t +route_match_source_instance(void *rule, const struct prefix *p, void *object) +{ + uint8_t *instance = (uint8_t *)rule; + struct nh_rmap_obj *nh_data; + + nh_data = (struct nh_rmap_obj *)object; + if (!nh_data) + return RMAP_NOMATCH; + + return (nh_data->instance == *instance) ? RMAP_MATCH : RMAP_NOMATCH; +} + +static void *route_match_source_instance_compile(const char *arg) +{ + uint8_t *instance; + int i; + + i = atoi(arg); + instance = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(uint8_t)); + + *instance = i; + + return instance; +} + +static void route_match_source_instance_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +static const struct route_map_rule_cmd route_match_source_instance_cmd = { + "source-instance", + route_match_source_instance, + route_match_source_instance_compile, + route_match_source_instance_free +}; + +/* `set src A.B.C.D' */ + +/* Set src. */ +static enum route_map_cmd_result_t +route_set_src(void *rule, const struct prefix *prefix, void *object) +{ + struct nh_rmap_obj *nh_data; + + nh_data = (struct nh_rmap_obj *)object; + nh_data->nexthop->rmap_src = *(union g_addr *)rule; + + return RMAP_OKAY; +} + +/* set src compilation. */ +static void *route_set_src_compile(const char *arg) +{ + union g_addr src, *psrc; + + if ((inet_pton(AF_INET6, arg, &src.ipv6) == 1) + || (inet_pton(AF_INET, arg, &src.ipv4) == 1)) { + psrc = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(union g_addr)); + *psrc = src; + return psrc; + } + return NULL; +} + +/* Free route map's compiled `set src' value. */ +static void route_set_src_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +/* Set src rule structure. */ +static const struct route_map_rule_cmd route_set_src_cmd = { + "src", + route_set_src, + route_set_src_compile, + route_set_src_free, +}; + +/* The function checks if the changed routemap specified by parameter rmap + * matches the configured protocol routemaps in proto_rm table. If there is + * a match then rib_update_table() to process the routes. + */ +static void zebra_rib_table_rm_update(const char *rmap) +{ + int i = 0; + struct route_table *table; + struct vrf *vrf = NULL; + struct zebra_vrf *zvrf = NULL; + char *rmap_name; + struct route_map *old = NULL; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + if (!zvrf) + continue; + for (i = 0; i <= ZEBRA_ROUTE_MAX; i++) { + rmap_name = PROTO_RM_NAME(zvrf, AFI_IP, i); + if (rmap_name && (strcmp(rmap_name, rmap) == 0)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s : AFI_IP rmap %s, route type %s", + __func__, rmap, + zebra_route_string(i)); + + old = PROTO_RM_MAP(zvrf, AFI_IP, i); + + PROTO_RM_MAP(zvrf, AFI_IP, i) = + route_map_lookup_by_name(rmap_name); + /* old is NULL. i.e Route map creation event. + * So update applied_counter. + * If Old is not NULL, i.e It may be routemap + * updation or deletion. + * So no need to update the counter. + */ + if (!old) + route_map_counter_increment( + PROTO_RM_MAP(zvrf, AFI_IP, i)); + /* There is single rib table for all protocols + */ + table = zvrf->table[AFI_IP][SAFI_UNICAST]; + if (table) { + rib_update_table( + table, + RIB_UPDATE_RMAP_CHANGE, + i); + } + } + rmap_name = PROTO_RM_NAME(zvrf, AFI_IP6, i); + if (rmap_name && (strcmp(rmap_name, rmap) == 0)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s : AFI_IP6 rmap %s, route type %s", + __func__, rmap, + zebra_route_string(i)); + + old = PROTO_RM_MAP(zvrf, AFI_IP6, i); + + PROTO_RM_MAP(zvrf, AFI_IP6, i) = + route_map_lookup_by_name(rmap_name); + if (!old) + route_map_counter_increment( + PROTO_RM_MAP(zvrf, AFI_IP6, i)); + /* There is single rib table for all protocols + */ + table = zvrf->table[AFI_IP6][SAFI_UNICAST]; + if (table) { + rib_update_table( + table, + RIB_UPDATE_RMAP_CHANGE, + i); + } + } + } + } +} + +/* The function checks if the changed routemap specified by parameter rmap + * matches the configured protocol routemaps in nht_rm table. If there is + * a match then zebra_evaluate_rnh() to process the nexthops. + */ +static void zebra_nht_rm_update(const char *rmap) +{ + int i = 0; + struct route_table *table; + struct vrf *vrf = NULL; + struct zebra_vrf *zvrf = NULL; + char *rmap_name; + char afi_ip = 0; + char afi_ipv6 = 0; + struct route_map *old = NULL; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + if (!zvrf) + continue; + for (i = 0; i <= ZEBRA_ROUTE_MAX; i++) { + rmap_name = NHT_RM_NAME(zvrf, AFI_IP, i); + if (rmap_name && (strcmp(rmap_name, rmap) == 0)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s : AFI_IP rmap %s, route type %s", + __func__, rmap, + zebra_route_string(i)); + + old = NHT_RM_MAP(zvrf, AFI_IP, i); + + NHT_RM_MAP(zvrf, AFI_IP, i) = + route_map_lookup_by_name(rmap_name); + if (!old) + route_map_counter_increment( + NHT_RM_MAP(zvrf, AFI_IP, i)); + /* There is single rib table for all protocols + */ + if (afi_ip == 0) { + table = zvrf->table[AFI_IP] + [SAFI_UNICAST]; + if (table) { + + afi_ip = 1; + + zebra_evaluate_rnh( + zvrf, AFI_IP, 1, NULL, + SAFI_UNICAST); + } + } + } + + rmap_name = NHT_RM_NAME(zvrf, AFI_IP6, i); + if (rmap_name && (strcmp(rmap_name, rmap) == 0)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s : AFI_IP6 rmap %s, route type %s", + __func__, rmap, + zebra_route_string(i)); + + old = NHT_RM_MAP(zvrf, AFI_IP6, i); + + NHT_RM_MAP(zvrf, AFI_IP6, i) = + route_map_lookup_by_name(rmap_name); + if (!old) + route_map_counter_increment( + NHT_RM_MAP(zvrf, AFI_IP6, i)); + /* There is single rib table for all protocols + */ + if (afi_ipv6 == 0) { + table = zvrf->table[AFI_IP6] + [SAFI_UNICAST]; + if (table) { + + afi_ipv6 = 1; + + zebra_evaluate_rnh( + zvrf, AFI_IP, 1, NULL, + SAFI_UNICAST); + } + } + } + } + } +} + +static void zebra_route_map_process_update_cb(char *rmap_name) +{ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("Event handler for route-map: %s", + rmap_name); + zebra_import_table_rm_update(rmap_name); + zebra_rib_table_rm_update(rmap_name); + zebra_nht_rm_update(rmap_name); +} + +static void zebra_route_map_update_timer(struct thread *thread) +{ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("Event driven route-map update triggered"); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug( + "%u: Routemap update-timer fired, scheduling RIB processing", + VRF_DEFAULT); + + route_map_walk_update_list(zebra_route_map_process_update_cb); + + /* + * This code needs to be updated to be: + * 1) VRF Aware <sigh> + * 2) Route-map aware + */ +} + +static void zebra_route_map_set_delay_timer(uint32_t value) +{ + zebra_rmap_update_timer = value; + if (!value && zebra_t_rmap_update) { + /* Event driven route map updates is being disabled */ + /* But there's a pending timer. Fire it off now */ + THREAD_OFF(zebra_t_rmap_update); + zebra_route_map_update_timer(NULL); + } +} + +void zebra_routemap_finish(void) +{ + /* Set zebra_rmap_update_timer to 0 so that it wont schedule again */ + zebra_rmap_update_timer = 0; + /* Thread off if any scheduled already */ + THREAD_OFF(zebra_t_rmap_update); + route_map_finish(); +} + +route_map_result_t +zebra_route_map_check(afi_t family, int rib_type, uint8_t instance, + const struct prefix *p, struct nexthop *nexthop, + struct zebra_vrf *zvrf, route_tag_t tag) +{ + struct route_map *rmap = NULL; + char *rm_name; + route_map_result_t ret = RMAP_PERMITMATCH; + struct nh_rmap_obj nh_obj; + + nh_obj.nexthop = nexthop; + nh_obj.vrf_id = nexthop->vrf_id; + nh_obj.source_protocol = rib_type; + nh_obj.instance = instance; + nh_obj.metric = 0; + nh_obj.tag = tag; + + if (rib_type >= 0 && rib_type < ZEBRA_ROUTE_MAX) { + rm_name = PROTO_RM_NAME(zvrf, family, rib_type); + rmap = PROTO_RM_MAP(zvrf, family, rib_type); + + if (rm_name && !rmap) + return RMAP_DENYMATCH; + } + if (!rmap) { + rm_name = PROTO_RM_NAME(zvrf, family, ZEBRA_ROUTE_MAX); + rmap = PROTO_RM_MAP(zvrf, family, ZEBRA_ROUTE_MAX); + + if (rm_name && !rmap) + return RMAP_DENYMATCH; + } + if (rmap) { + ret = route_map_apply(rmap, p, &nh_obj); + } + + return (ret); +} + +char *zebra_get_import_table_route_map(afi_t afi, uint32_t table) +{ + return zebra_import_table_routemap[afi][table]; +} + +void zebra_add_import_table_route_map(afi_t afi, const char *rmap_name, + uint32_t table) +{ + zebra_import_table_routemap[afi][table] = + XSTRDUP(MTYPE_ROUTE_MAP_NAME, rmap_name); +} + +void zebra_del_import_table_route_map(afi_t afi, uint32_t table) +{ + XFREE(MTYPE_ROUTE_MAP_NAME, zebra_import_table_routemap[afi][table]); +} + +route_map_result_t +zebra_import_table_route_map_check(int family, int re_type, uint8_t instance, + const struct prefix *p, + struct nexthop *nexthop, + vrf_id_t vrf_id, route_tag_t tag, + const char *rmap_name) +{ + struct route_map *rmap = NULL; + route_map_result_t ret = RMAP_DENYMATCH; + struct nh_rmap_obj nh_obj; + + nh_obj.nexthop = nexthop; + nh_obj.vrf_id = vrf_id; + nh_obj.source_protocol = re_type; + nh_obj.instance = instance; + nh_obj.metric = 0; + nh_obj.tag = tag; + + if (re_type >= 0 && re_type < ZEBRA_ROUTE_MAX) + rmap = route_map_lookup_by_name(rmap_name); + if (rmap) { + ret = route_map_apply(rmap, p, &nh_obj); + } + + return (ret); +} + +route_map_result_t zebra_nht_route_map_check(afi_t afi, int client_proto, + const struct prefix *p, + struct zebra_vrf *zvrf, + struct route_entry *re, + struct nexthop *nexthop) +{ + struct route_map *rmap = NULL; + route_map_result_t ret = RMAP_PERMITMATCH; + struct nh_rmap_obj nh_obj; + + nh_obj.nexthop = nexthop; + nh_obj.vrf_id = nexthop->vrf_id; + nh_obj.source_protocol = re->type; + nh_obj.instance = re->instance; + nh_obj.metric = re->metric; + nh_obj.tag = re->tag; + + if (client_proto >= 0 && client_proto < ZEBRA_ROUTE_MAX) + rmap = NHT_RM_MAP(zvrf, afi, client_proto); + if (!rmap && NHT_RM_MAP(zvrf, afi, ZEBRA_ROUTE_MAX)) + rmap = NHT_RM_MAP(zvrf, afi, ZEBRA_ROUTE_MAX); + if (rmap) + ret = route_map_apply(rmap, p, &nh_obj); + + return ret; +} + +static void zebra_route_map_mark_update(const char *rmap_name) +{ + /* rmap_update_timer of 0 means don't do route updates */ + if (zebra_rmap_update_timer) + THREAD_OFF(zebra_t_rmap_update); + + thread_add_timer(zrouter.master, zebra_route_map_update_timer, + NULL, zebra_rmap_update_timer, &zebra_t_rmap_update); +} + +static void zebra_route_map_add(const char *rmap_name) +{ + if (route_map_mark_updated(rmap_name) == 0) + zebra_route_map_mark_update(rmap_name); + + route_map_notify_dependencies(rmap_name, RMAP_EVENT_MATCH_ADDED); +} + +static void zebra_route_map_delete(const char *rmap_name) +{ + if (route_map_mark_updated(rmap_name) == 0) + zebra_route_map_mark_update(rmap_name); + + route_map_notify_dependencies(rmap_name, RMAP_EVENT_MATCH_DELETED); +} + +static void zebra_route_map_event(const char *rmap_name) +{ + if (route_map_mark_updated(rmap_name) == 0) + zebra_route_map_mark_update(rmap_name); + + route_map_notify_dependencies(rmap_name, RMAP_EVENT_MATCH_ADDED); +} + +void zebra_routemap_vrf_delete(struct zebra_vrf *zvrf) +{ + afi_t afi; + uint8_t type; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (type = 0; type <= ZEBRA_ROUTE_MAX; type++) { + if (PROTO_RM_NAME(zvrf, afi, type)) + XFREE(MTYPE_ROUTE_MAP_NAME, + PROTO_RM_NAME(zvrf, afi, type)); + if (NHT_RM_NAME(zvrf, afi, type)) + XFREE(MTYPE_ROUTE_MAP_NAME, + NHT_RM_NAME(zvrf, afi, type)); + } + } +} + +/* ip protocol configuration write function */ +void zebra_routemap_config_write_protocol(struct vty *vty, + struct zebra_vrf *zvrf) +{ + int i; + char space[2]; + + memset(space, 0, sizeof(space)); + + if (zvrf_id(zvrf) != VRF_DEFAULT) + snprintf(space, sizeof(space), "%s", " "); + + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { + if (PROTO_RM_NAME(zvrf, AFI_IP, i)) + vty_out(vty, "%sip protocol %s route-map %s\n", space, + zebra_route_string(i), + PROTO_RM_NAME(zvrf, AFI_IP, i)); + + if (PROTO_RM_NAME(zvrf, AFI_IP6, i)) + vty_out(vty, "%sipv6 protocol %s route-map %s\n", space, + zebra_route_string(i), + PROTO_RM_NAME(zvrf, AFI_IP6, i)); + + if (NHT_RM_NAME(zvrf, AFI_IP, i)) + vty_out(vty, "%sip nht %s route-map %s\n", space, + zebra_route_string(i), + NHT_RM_NAME(zvrf, AFI_IP, i)); + + if (NHT_RM_NAME(zvrf, AFI_IP6, i)) + vty_out(vty, "%sipv6 nht %s route-map %s\n", space, + zebra_route_string(i), + NHT_RM_NAME(zvrf, AFI_IP6, i)); + } + + if (PROTO_RM_NAME(zvrf, AFI_IP, ZEBRA_ROUTE_MAX)) + vty_out(vty, "%sip protocol %s route-map %s\n", space, "any", + PROTO_RM_NAME(zvrf, AFI_IP, ZEBRA_ROUTE_MAX)); + + if (PROTO_RM_NAME(zvrf, AFI_IP6, ZEBRA_ROUTE_MAX)) + vty_out(vty, "%sipv6 protocol %s route-map %s\n", space, "any", + PROTO_RM_NAME(zvrf, AFI_IP6, ZEBRA_ROUTE_MAX)); + + if (NHT_RM_NAME(zvrf, AFI_IP, ZEBRA_ROUTE_MAX)) + vty_out(vty, "%sip nht %s route-map %s\n", space, "any", + NHT_RM_NAME(zvrf, AFI_IP, ZEBRA_ROUTE_MAX)); + + if (NHT_RM_NAME(zvrf, AFI_IP6, ZEBRA_ROUTE_MAX)) + vty_out(vty, "%sipv6 nht %s route-map %s\n", space, "any", + NHT_RM_NAME(zvrf, AFI_IP6, ZEBRA_ROUTE_MAX)); + + if (zvrf_id(zvrf) == VRF_DEFAULT + && zebra_rmap_update_timer != ZEBRA_RMAP_DEFAULT_UPDATE_TIMER) + vty_out(vty, "zebra route-map delay-timer %d\n", + zebra_rmap_update_timer); +} + +void zebra_route_map_init(void) +{ + install_element(CONFIG_NODE, &ip_protocol_cmd); + install_element(CONFIG_NODE, &no_ip_protocol_cmd); + install_element(VRF_NODE, &ip_protocol_cmd); + install_element(VRF_NODE, &no_ip_protocol_cmd); + install_element(VIEW_NODE, &show_ip_protocol_cmd); + install_element(CONFIG_NODE, &ipv6_protocol_cmd); + install_element(CONFIG_NODE, &no_ipv6_protocol_cmd); + install_element(VRF_NODE, &ipv6_protocol_cmd); + install_element(VRF_NODE, &no_ipv6_protocol_cmd); + install_element(VIEW_NODE, &show_ipv6_protocol_cmd); + install_element(CONFIG_NODE, &ip_protocol_nht_rmap_cmd); + install_element(CONFIG_NODE, &no_ip_protocol_nht_rmap_cmd); + install_element(VRF_NODE, &ip_protocol_nht_rmap_cmd); + install_element(VRF_NODE, &no_ip_protocol_nht_rmap_cmd); + install_element(VIEW_NODE, &show_ip_protocol_nht_cmd); + install_element(CONFIG_NODE, &ipv6_protocol_nht_rmap_cmd); + install_element(CONFIG_NODE, &no_ipv6_protocol_nht_rmap_cmd); + install_element(VRF_NODE, &ipv6_protocol_nht_rmap_cmd); + install_element(VRF_NODE, &no_ipv6_protocol_nht_rmap_cmd); + install_element(VIEW_NODE, &show_ipv6_protocol_nht_cmd); + install_element(CONFIG_NODE, &zebra_route_map_timer_cmd); + install_element(CONFIG_NODE, &no_zebra_route_map_timer_cmd); + + route_map_init(); + + route_map_add_hook(zebra_route_map_add); + route_map_delete_hook(zebra_route_map_delete); + route_map_event_hook(zebra_route_map_event); + + route_map_match_interface_hook(generic_match_add); + route_map_no_match_interface_hook(generic_match_delete); + + route_map_match_ip_address_hook(generic_match_add); + route_map_no_match_ip_address_hook(generic_match_delete); + + route_map_match_ip_address_prefix_list_hook(generic_match_add); + route_map_no_match_ip_address_prefix_list_hook(generic_match_delete); + + route_map_match_ip_next_hop_hook(generic_match_add); + route_map_no_match_ip_next_hop_hook(generic_match_delete); + + route_map_match_ip_next_hop_prefix_list_hook(generic_match_add); + route_map_no_match_ip_next_hop_prefix_list_hook(generic_match_delete); + + route_map_match_ip_next_hop_type_hook(generic_match_add); + route_map_no_match_ip_next_hop_type_hook(generic_match_delete); + + route_map_match_tag_hook(generic_match_add); + route_map_no_match_tag_hook(generic_match_delete); + + route_map_match_ipv6_address_hook(generic_match_add); + route_map_no_match_ipv6_address_hook(generic_match_delete); + + route_map_match_ipv6_address_prefix_list_hook(generic_match_add); + route_map_no_match_ipv6_address_prefix_list_hook(generic_match_delete); + + route_map_match_ipv6_next_hop_type_hook(generic_match_add); + route_map_no_match_ipv6_next_hop_type_hook(generic_match_delete); + + route_map_install_match(&route_match_tag_cmd); + route_map_install_match(&route_match_interface_cmd); + route_map_install_match(&route_match_ip_next_hop_cmd); + route_map_install_match(&route_match_ip_next_hop_prefix_list_cmd); + route_map_install_match(&route_match_ip_address_cmd); + route_map_install_match(&route_match_ipv6_address_cmd); + route_map_install_match(&route_match_ip_address_prefix_list_cmd); + route_map_install_match(&route_match_ipv6_address_prefix_list_cmd); + route_map_install_match(&route_match_ip_address_prefix_len_cmd); + route_map_install_match(&route_match_ipv6_address_prefix_len_cmd); + route_map_install_match(&route_match_ip_nexthop_prefix_len_cmd); + route_map_install_match(&route_match_ip_next_hop_type_cmd); + route_map_install_match(&route_match_ipv6_next_hop_type_cmd); + route_map_install_match(&route_match_source_protocol_cmd); + route_map_install_match(&route_match_source_instance_cmd); + + /* */ + route_map_install_set(&route_set_src_cmd); + /* */ + install_element(RMAP_NODE, &match_ip_nexthop_prefix_len_cmd); + install_element(RMAP_NODE, &no_match_ip_nexthop_prefix_len_cmd); + install_element(RMAP_NODE, &match_ip_address_prefix_len_cmd); + install_element(RMAP_NODE, &match_ipv6_address_prefix_len_cmd); + install_element(RMAP_NODE, &no_match_ipv6_address_prefix_len_cmd); + install_element(RMAP_NODE, &no_match_ip_address_prefix_len_cmd); + install_element(RMAP_NODE, &match_source_protocol_cmd); + install_element(RMAP_NODE, &no_match_source_protocol_cmd); + install_element(RMAP_NODE, &match_source_instance_cmd); + install_element(RMAP_NODE, &no_match_source_instance_cmd); + + /* */ + install_element(RMAP_NODE, &set_src_cmd); + install_element(RMAP_NODE, &no_set_src_cmd); +} diff --git a/zebra/zebra_routemap.h b/zebra/zebra_routemap.h new file mode 100644 index 0000000..02b01fa --- /dev/null +++ b/zebra/zebra_routemap.h @@ -0,0 +1,62 @@ +/* + * Zebra routemap header + * Copyright (C) 2015 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __ZEBRA_ROUTEMAP_H__ +#define __ZEBRA_ROUTEMAP_H__ + +#include "lib/routemap.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void zebra_route_map_init(void); +extern void zebra_routemap_config_write_protocol(struct vty *vty, + struct zebra_vrf *vrf); +extern char *zebra_get_import_table_route_map(afi_t afi, uint32_t table); +extern void zebra_add_import_table_route_map(afi_t afi, const char *rmap_name, + uint32_t table); +extern void zebra_del_import_table_route_map(afi_t afi, uint32_t table); + +extern route_map_result_t +zebra_import_table_route_map_check(int family, int rib_type, uint8_t instance, + const struct prefix *p, + struct nexthop *nexthop, vrf_id_t vrf_id, + route_tag_t tag, const char *rmap_name); +extern route_map_result_t +zebra_route_map_check(afi_t family, int rib_type, uint8_t instance, + const struct prefix *p, struct nexthop *nexthop, + struct zebra_vrf *zvrf, route_tag_t tag); +extern route_map_result_t +zebra_nht_route_map_check(afi_t afi, int client_proto, const struct prefix *p, + struct zebra_vrf *zvrf, struct route_entry *, + struct nexthop *nexthop); + +extern void zebra_routemap_vrf_delete(struct zebra_vrf *zvrf); + +#ifdef __cplusplus +} +#endif + +extern void zebra_routemap_finish(void); + +extern const struct frr_yang_module_info frr_zebra_route_map_info; +#endif diff --git a/zebra/zebra_routemap_nb.c b/zebra/zebra_routemap_nb.c new file mode 100644 index 0000000..9da4589 --- /dev/null +++ b/zebra/zebra_routemap_nb.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2020 Vmware + * Sarita Patra + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "northbound.h" +#include "libfrr.h" +#include "zebra_routemap_nb.h" + +/* clang-format off */ +const struct frr_yang_module_info frr_zebra_route_map_info = { + .name = "frr-zebra-route-map", + .nodes = { + { + .xpath = "/frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:ipv4-prefix-length", + .cbs = { + .modify = lib_route_map_entry_match_condition_rmap_match_condition_ipv4_prefix_length_modify, + .destroy = lib_route_map_entry_match_condition_rmap_match_condition_ipv4_prefix_length_destroy, + } + }, + { + .xpath = "/frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:ipv6-prefix-length", + .cbs = { + .modify = lib_route_map_entry_match_condition_rmap_match_condition_ipv6_prefix_length_modify, + .destroy = lib_route_map_entry_match_condition_rmap_match_condition_ipv6_prefix_length_destroy, + } + }, + { + .xpath = "/frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:source-instance", + .cbs = { + .modify = lib_route_map_entry_match_condition_rmap_match_condition_source_instance_modify, + .destroy = lib_route_map_entry_match_condition_rmap_match_condition_source_instance_destroy, + } + }, + { + .xpath = "/frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:source-protocol", + .cbs = { + .modify = lib_route_map_entry_match_condition_rmap_match_condition_source_protocol_modify, + .destroy = lib_route_map_entry_match_condition_rmap_match_condition_source_protocol_destroy, + } + }, + { + .xpath = "/frr-route-map:lib/route-map/entry/set-action/rmap-set-action/frr-zebra-route-map:ipv4-src-address", + .cbs = { + .modify = lib_route_map_entry_set_action_rmap_set_action_ipv4_src_address_modify, + .destroy = lib_route_map_entry_set_action_rmap_set_action_ipv4_src_address_destroy, + } + }, + { + .xpath = "/frr-route-map:lib/route-map/entry/set-action/rmap-set-action/frr-zebra-route-map:ipv6-src-address", + .cbs = { + .modify = lib_route_map_entry_set_action_rmap_set_action_ipv6_src_address_modify, + .destroy = lib_route_map_entry_set_action_rmap_set_action_ipv6_src_address_destroy, + } + }, + { + .xpath = NULL, + }, + } +}; diff --git a/zebra/zebra_routemap_nb.h b/zebra/zebra_routemap_nb.h new file mode 100644 index 0000000..a43f2b2 --- /dev/null +++ b/zebra/zebra_routemap_nb.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2020 Vmware + * Sarita Patra + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _FRR_ZEBRA_ROUTEMAP_NB_H_ +#define _FRR_ZEBRA_ROUTEMAP_NB_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* prototypes */ +int lib_route_map_entry_match_condition_rmap_match_condition_ipv4_prefix_length_modify(struct nb_cb_modify_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_ipv4_prefix_length_destroy(struct nb_cb_destroy_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_ipv6_prefix_length_modify(struct nb_cb_modify_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_ipv6_prefix_length_destroy(struct nb_cb_destroy_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_source_instance_modify(struct nb_cb_modify_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_source_instance_destroy(struct nb_cb_destroy_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_source_protocol_modify(struct nb_cb_modify_args *args); +int lib_route_map_entry_match_condition_rmap_match_condition_source_protocol_destroy(struct nb_cb_destroy_args *args); +int lib_route_map_entry_set_action_rmap_set_action_ipv4_src_address_modify(struct nb_cb_modify_args *args); +int lib_route_map_entry_set_action_rmap_set_action_ipv4_src_address_destroy(struct nb_cb_destroy_args *args); +int lib_route_map_entry_set_action_rmap_set_action_ipv6_src_address_modify(struct nb_cb_modify_args *args); +int lib_route_map_entry_set_action_rmap_set_action_ipv6_src_address_destroy(struct nb_cb_destroy_args *args); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_routemap_nb_config.c b/zebra/zebra_routemap_nb_config.c new file mode 100644 index 0000000..5bcfb72 --- /dev/null +++ b/zebra/zebra_routemap_nb_config.c @@ -0,0 +1,368 @@ +#include <zebra.h> + +#include "lib/command.h" +#include "lib/log.h" +#include "lib/northbound.h" +#include "lib/routemap.h" +#include "zebra/rib.h" +#include "zebra/zebra_routemap_nb.h" + +/* + * XPath: /frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:ipv4-prefix-length + */ +int +lib_route_map_entry_match_condition_rmap_match_condition_ipv4_prefix_length_modify( + struct nb_cb_modify_args *args) +{ + struct routemap_hook_context *rhc; + const char *length; + int rv; + const char *condition; + + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + return NB_OK; + case NB_EV_APPLY: + /* Add configuration. */ + rhc = nb_running_get_entry(args->dnode, NULL, true); + length = yang_dnode_get_string(args->dnode, NULL); + condition = yang_dnode_get_string(args->dnode, + "../../frr-route-map:condition"); + + if (IS_MATCH_IPv4_PREFIX_LEN(condition)) + rhc->rhc_rule = "ip address prefix-len"; + else if (IS_MATCH_IPv4_NH_PREFIX_LEN(condition)) + rhc->rhc_rule = "ip next-hop prefix-len"; + + rhc->rhc_mhook = generic_match_delete; + rhc->rhc_event = RMAP_EVENT_MATCH_DELETED; + + rv = generic_match_add(rhc->rhc_rmi, rhc->rhc_rule, + length, RMAP_EVENT_MATCH_ADDED, + args->errmsg, args->errmsg_len); + if (rv != CMD_SUCCESS) { + rhc->rhc_mhook = NULL; + return NB_ERR_INCONSISTENCY; + } + } + + return NB_OK; +} + +int +lib_route_map_entry_match_condition_rmap_match_condition_ipv4_prefix_length_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + return lib_route_map_entry_match_destroy(args); + } + + return NB_OK; +} + +/* + * XPath: /frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:ipv6-prefix-length + */ +int +lib_route_map_entry_match_condition_rmap_match_condition_ipv6_prefix_length_modify( + struct nb_cb_modify_args *args) +{ + struct routemap_hook_context *rhc; + const char *length; + int rv; + + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + return NB_OK; + case NB_EV_APPLY: + /* Add configuration. */ + rhc = nb_running_get_entry(args->dnode, NULL, true); + length = yang_dnode_get_string(args->dnode, NULL); + + /* Set destroy information. */ + rhc->rhc_mhook = generic_match_delete; + rhc->rhc_rule = "ipv6 address prefix-len"; + rhc->rhc_event = RMAP_EVENT_MATCH_DELETED; + + rv = generic_match_add(rhc->rhc_rmi, "ipv6 address prefix-len", + length, RMAP_EVENT_MATCH_ADDED, + args->errmsg, args->errmsg_len); + if (rv != CMD_SUCCESS) { + rhc->rhc_mhook = NULL; + return NB_ERR_INCONSISTENCY; + } + } + + return NB_OK; +} + +int +lib_route_map_entry_match_condition_rmap_match_condition_ipv6_prefix_length_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + return lib_route_map_entry_match_destroy(args); + } + + return NB_OK; + +} + +/* + * XPath: /frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:source-instance + */ +int +lib_route_map_entry_match_condition_rmap_match_condition_source_instance_modify( + struct nb_cb_modify_args *args) +{ + struct routemap_hook_context *rhc; + const char *type; + int rv; + + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + /* Add configuration. */ + rhc = nb_running_get_entry(args->dnode, NULL, true); + type = yang_dnode_get_string(args->dnode, NULL); + + /* Set destroy information. */ + rhc->rhc_mhook = generic_match_delete; + rhc->rhc_rule = "source-instance"; + rhc->rhc_event = RMAP_EVENT_MATCH_DELETED; + + rv = generic_match_add(rhc->rhc_rmi, "source-instance", + type, RMAP_EVENT_MATCH_ADDED, + args->errmsg, args->errmsg_len); + if (rv != CMD_SUCCESS) { + rhc->rhc_mhook = NULL; + return NB_ERR_INCONSISTENCY; + } + } + + return NB_OK; +} + +int +lib_route_map_entry_match_condition_rmap_match_condition_source_instance_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + return lib_route_map_entry_match_destroy(args); + } + + return NB_OK; +} + +/* + * XPath: /frr-route-map:lib/route-map/entry/match-condition/rmap-match-condition/frr-zebra-route-map:source-protocol + */ +int +lib_route_map_entry_match_condition_rmap_match_condition_source_protocol_modify( + struct nb_cb_modify_args *args) +{ + struct routemap_hook_context *rhc; + const char *type; + int rv; + + switch (args->event) { + case NB_EV_VALIDATE: + type = yang_dnode_get_string(args->dnode, NULL); + if (proto_name2num(type) == -1) { + zlog_warn("%s: invalid protocol: %s", __func__, type); + return NB_ERR_VALIDATION; + } + return NB_OK; + case NB_EV_PREPARE: + case NB_EV_ABORT: + return NB_OK; + case NB_EV_APPLY: + /* NOTHING */ + break; + } + + /* Add configuration. */ + rhc = nb_running_get_entry(args->dnode, NULL, true); + type = yang_dnode_get_string(args->dnode, NULL); + + /* Set destroy information. */ + rhc->rhc_mhook = generic_match_delete; + rhc->rhc_rule = "source-protocol"; + rhc->rhc_event = RMAP_EVENT_MATCH_DELETED; + + rv = generic_match_add(rhc->rhc_rmi, "source-protocol", type, + RMAP_EVENT_MATCH_ADDED, + args->errmsg, args->errmsg_len); + if (rv != CMD_SUCCESS) { + rhc->rhc_mhook = NULL; + return NB_ERR_INCONSISTENCY; + } + + return NB_OK; +} + +int +lib_route_map_entry_match_condition_rmap_match_condition_source_protocol_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + return lib_route_map_entry_match_destroy(args); + } + + return NB_OK; +} + +/* + * XPath: /frr-route-map:lib/route-map/entry/set-action/rmap-set-action/frr-zebra-route-map:ipv4-src-address + */ +int +lib_route_map_entry_set_action_rmap_set_action_ipv4_src_address_modify( + struct nb_cb_modify_args *args) +{ + struct routemap_hook_context *rhc; + const char *source; + struct prefix p; + int rv; + + switch (args->event) { + case NB_EV_VALIDATE: + memset(&p, 0, sizeof(p)); + yang_dnode_get_ipv4p(&p, args->dnode, NULL); + if (zebra_check_addr(&p) == 0) { + zlog_warn("%s: invalid IPv4 address: %s", __func__, + yang_dnode_get_string(args->dnode, NULL)); + return NB_ERR_VALIDATION; + } + return NB_OK; + case NB_EV_PREPARE: + case NB_EV_ABORT: + return NB_OK; + case NB_EV_APPLY: + /* NOTHING */ + break; + } + + /* Add configuration. */ + rhc = nb_running_get_entry(args->dnode, NULL, true); + source = yang_dnode_get_string(args->dnode, NULL); + + /* Set destroy information. */ + rhc->rhc_shook = generic_set_delete; + rhc->rhc_rule = "src"; + + rv = generic_set_add(rhc->rhc_rmi, "src", source, + args->errmsg, args->errmsg_len); + if (rv != CMD_SUCCESS) { + rhc->rhc_shook = NULL; + return NB_ERR_INCONSISTENCY; + } + + return NB_OK; +} + +int +lib_route_map_entry_set_action_rmap_set_action_ipv4_src_address_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + return lib_route_map_entry_set_destroy(args); + } + + return NB_OK; +} + +/* + * XPath: /frr-route-map:lib/route-map/entry/set-action/rmap-set-action/frr-zebra-route-map:ipv6-src-address + */ +int +lib_route_map_entry_set_action_rmap_set_action_ipv6_src_address_modify( + struct nb_cb_modify_args *args) +{ + struct routemap_hook_context *rhc; + const char *source; + struct prefix p; + int rv; + + switch (args->event) { + case NB_EV_VALIDATE: + memset(&p, 0, sizeof(p)); + yang_dnode_get_ipv6p(&p, args->dnode, NULL); + if (zebra_check_addr(&p) == 0) { + zlog_warn("%s: invalid IPv6 address: %s", __func__, + yang_dnode_get_string(args->dnode, NULL)); + return NB_ERR_VALIDATION; + } + return NB_OK; + case NB_EV_PREPARE: + case NB_EV_ABORT: + return NB_OK; + case NB_EV_APPLY: + /* NOTHING */ + break; + } + + /* Add configuration. */ + rhc = nb_running_get_entry(args->dnode, NULL, true); + source = yang_dnode_get_string(args->dnode, NULL); + + /* Set destroy information. */ + rhc->rhc_shook = generic_set_delete; + rhc->rhc_rule = "src"; + + rv = generic_set_add(rhc->rhc_rmi, "src", source, + args->errmsg, args->errmsg_len); + if (rv != CMD_SUCCESS) { + rhc->rhc_shook = NULL; + return NB_ERR_INCONSISTENCY; + } + + return NB_OK; +} + +int +lib_route_map_entry_set_action_rmap_set_action_ipv6_src_address_destroy( + struct nb_cb_destroy_args *args) +{ + switch (args->event) { + case NB_EV_VALIDATE: + case NB_EV_PREPARE: + case NB_EV_ABORT: + break; + case NB_EV_APPLY: + return lib_route_map_entry_set_destroy(args); + } + + return NB_OK; +} diff --git a/zebra/zebra_router.c b/zebra/zebra_router.c new file mode 100644 index 0000000..c668498 --- /dev/null +++ b/zebra/zebra_router.c @@ -0,0 +1,324 @@ +/* Zebra Router Code. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include "zebra.h" + +#include <pthread.h> +#include "lib/frratomic.h" + +#include "zebra_router.h" +#include "zebra_pbr.h" +#include "zebra_vxlan.h" +#include "zebra_mlag.h" +#include "zebra_nhg.h" +#include "zebra_neigh.h" +#include "debug.h" +#include "zebra_script.h" + +DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info"); +DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_RT_TABLE, "Zebra VRF table"); + +struct zebra_router zrouter = { + .multipath_num = MULTIPATH_NUM, + .ipv4_multicast_mode = MCAST_NO_CONFIG, +}; + +static inline int +zebra_router_table_entry_compare(const struct zebra_router_table *e1, + const struct zebra_router_table *e2); + +RB_GENERATE(zebra_router_table_head, zebra_router_table, + zebra_router_table_entry, zebra_router_table_entry_compare); + + +static inline int +zebra_router_table_entry_compare(const struct zebra_router_table *e1, + const struct zebra_router_table *e2) +{ + if (e1->tableid < e2->tableid) + return -1; + if (e1->tableid > e2->tableid) + return 1; + if (e1->ns_id < e2->ns_id) + return -1; + if (e1->ns_id > e2->ns_id) + return 1; + if (e1->afi < e2->afi) + return -1; + if (e1->afi > e2->afi) + return 1; + return (e1->safi - e2->safi); +} + +struct zebra_router_table *zebra_router_find_zrt(struct zebra_vrf *zvrf, + uint32_t tableid, afi_t afi, + safi_t safi) +{ + struct zebra_router_table finder; + struct zebra_router_table *zrt; + + memset(&finder, 0, sizeof(finder)); + finder.afi = afi; + finder.safi = safi; + finder.tableid = tableid; + finder.ns_id = zvrf->zns->ns_id; + zrt = RB_FIND(zebra_router_table_head, &zrouter.tables, &finder); + + return zrt; +} + +struct route_table *zebra_router_find_table(struct zebra_vrf *zvrf, + uint32_t tableid, afi_t afi, + safi_t safi) +{ + struct zebra_router_table finder; + struct zebra_router_table *zrt; + + memset(&finder, 0, sizeof(finder)); + finder.afi = afi; + finder.safi = safi; + finder.tableid = tableid; + finder.ns_id = zvrf->zns->ns_id; + zrt = RB_FIND(zebra_router_table_head, &zrouter.tables, &finder); + + if (zrt) + return zrt->table; + else + return NULL; +} + +struct route_table *zebra_router_get_table(struct zebra_vrf *zvrf, + uint32_t tableid, afi_t afi, + safi_t safi) +{ + struct zebra_router_table finder; + struct zebra_router_table *zrt; + struct rib_table_info *info; + + memset(&finder, 0, sizeof(finder)); + finder.afi = afi; + finder.safi = safi; + finder.tableid = tableid; + finder.ns_id = zvrf->zns->ns_id; + zrt = RB_FIND(zebra_router_table_head, &zrouter.tables, &finder); + + if (zrt) + return zrt->table; + + zrt = XCALLOC(MTYPE_ZEBRA_RT_TABLE, sizeof(*zrt)); + zrt->tableid = tableid; + zrt->afi = afi; + zrt->safi = safi; + zrt->ns_id = zvrf->zns->ns_id; + zrt->table = + (afi == AFI_IP6) ? srcdest_table_init() : route_table_init(); + + info = XCALLOC(MTYPE_RIB_TABLE_INFO, sizeof(*info)); + info->zvrf = zvrf; + info->afi = afi; + info->safi = safi; + info->table_id = tableid; + route_table_set_info(zrt->table, info); + zrt->table->cleanup = zebra_rtable_node_cleanup; + + RB_INSERT(zebra_router_table_head, &zrouter.tables, zrt); + return zrt->table; +} + +void zebra_router_show_table_summary(struct vty *vty) +{ + struct zebra_router_table *zrt; + + vty_out(vty, + "VRF NS ID VRF ID AFI SAFI Table Count\n"); + vty_out(vty, + "---------------------------------------------------------------------------\n"); + RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) { + struct rib_table_info *info = route_table_get_info(zrt->table); + + vty_out(vty, "%-16s%5d %9d %7s %15s %8d %10lu\n", info->zvrf->vrf->name, + zrt->ns_id, info->zvrf->vrf->vrf_id, + afi2str(zrt->afi), safi2str(zrt->safi), + zrt->tableid, + zrt->table->count); + } +} + +void zebra_router_sweep_route(void) +{ + struct zebra_router_table *zrt; + + RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) { + if (zrt->ns_id != NS_DEFAULT) + continue; + rib_sweep_table(zrt->table); + } +} + +void zebra_router_sweep_nhgs(void) +{ + zebra_nhg_sweep_table(zrouter.nhgs_id); +} + +static void zebra_router_free_table(struct zebra_router_table *zrt) +{ + void *table_info; + + table_info = route_table_get_info(zrt->table); + route_table_finish(zrt->table); + RB_REMOVE(zebra_router_table_head, &zrouter.tables, zrt); + + XFREE(MTYPE_RIB_TABLE_INFO, table_info); + XFREE(MTYPE_ZEBRA_RT_TABLE, zrt); +} + +void zebra_router_release_table(struct zebra_vrf *zvrf, uint32_t tableid, + afi_t afi, safi_t safi) +{ + struct zebra_router_table finder; + struct zebra_router_table *zrt; + + memset(&finder, 0, sizeof(finder)); + finder.afi = afi; + finder.safi = safi; + finder.tableid = tableid; + finder.ns_id = zvrf->zns->ns_id; + zrt = RB_FIND(zebra_router_table_head, &zrouter.tables, &finder); + + if (!zrt) + return; + + zebra_router_free_table(zrt); +} + +uint32_t zebra_router_get_next_sequence(void) +{ + return 1 + + atomic_fetch_add_explicit(&zrouter.sequence_num, 1, + memory_order_relaxed); +} + +void multicast_mode_ipv4_set(enum multicast_mode mode) +{ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: multicast lookup mode set (%d)", __func__, + mode); + zrouter.ipv4_multicast_mode = mode; +} + +enum multicast_mode multicast_mode_ipv4_get(void) +{ + return zrouter.ipv4_multicast_mode; +} + +void zebra_router_terminate(void) +{ + struct zebra_router_table *zrt, *tmp; + + THREAD_OFF(zrouter.sweeper); + + RB_FOREACH_SAFE (zrt, zebra_router_table_head, &zrouter.tables, tmp) + zebra_router_free_table(zrt); + + work_queue_free_and_null(&zrouter.ribq); + meta_queue_free(zrouter.mq, NULL); + + zebra_vxlan_disable(); + zebra_mlag_terminate(); + zebra_neigh_terminate(); + + /* Free NHE in ID table only since it has unhashable entries as well */ + hash_iterate(zrouter.nhgs_id, zebra_nhg_hash_free_zero_id, NULL); + hash_clean(zrouter.nhgs_id, zebra_nhg_hash_free); + hash_free(zrouter.nhgs_id); + hash_clean(zrouter.nhgs, NULL); + hash_free(zrouter.nhgs); + + hash_clean(zrouter.rules_hash, zebra_pbr_rules_free); + hash_free(zrouter.rules_hash); + + hash_clean(zrouter.ipset_entry_hash, zebra_pbr_ipset_entry_free), + hash_clean(zrouter.ipset_hash, zebra_pbr_ipset_free); + hash_free(zrouter.ipset_hash); + hash_free(zrouter.ipset_entry_hash); + hash_clean(zrouter.iptable_hash, zebra_pbr_iptable_free); + hash_free(zrouter.iptable_hash); + +#ifdef HAVE_SCRIPTING + zebra_script_destroy(); +#endif + + /* OS-specific deinit */ + kernel_router_terminate(); +} + +bool zebra_router_notify_on_ack(void) +{ + return !zrouter.asic_offloaded || zrouter.notify_on_ack; +} + +void zebra_router_init(bool asic_offload, bool notify_on_ack) +{ + zrouter.sequence_num = 0; + + zrouter.allow_delete = false; + + zrouter.packets_to_process = ZEBRA_ZAPI_PACKETS_TO_PROCESS; + + zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER; + + zebra_vxlan_init(); + zebra_mlag_init(); + zebra_neigh_init(); + + zrouter.rules_hash = hash_create_size(8, zebra_pbr_rules_hash_key, + zebra_pbr_rules_hash_equal, + "Rules Hash"); + + zrouter.ipset_hash = + hash_create_size(8, zebra_pbr_ipset_hash_key, + zebra_pbr_ipset_hash_equal, "IPset Hash"); + + zrouter.ipset_entry_hash = hash_create_size( + 8, zebra_pbr_ipset_entry_hash_key, + zebra_pbr_ipset_entry_hash_equal, "IPset Hash Entry"); + + zrouter.iptable_hash = hash_create_size(8, zebra_pbr_iptable_hash_key, + zebra_pbr_iptable_hash_equal, + "IPtable Hash Entry"); + + zrouter.nhgs = + hash_create_size(8, zebra_nhg_hash_key, zebra_nhg_hash_equal, + "Zebra Router Nexthop Groups"); + zrouter.nhgs_id = + hash_create_size(8, zebra_nhg_id_key, zebra_nhg_hash_id_equal, + "Zebra Router Nexthop Groups ID index"); + + zrouter.asic_offloaded = asic_offload; + zrouter.notify_on_ack = notify_on_ack; + +#ifdef HAVE_SCRIPTING + zebra_script_init(); +#endif + + /* OS-specific init */ + kernel_router_init(); +} diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h new file mode 100644 index 0000000..992bcd5 --- /dev/null +++ b/zebra/zebra_router.h @@ -0,0 +1,300 @@ +/* Zebra Router header. + * Copyright (C) 2018 Cumulus Networks, Inc. + * Donald Sharp + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#ifndef __ZEBRA_ROUTER_H__ +#define __ZEBRA_ROUTER_H__ + +#include "lib/mlag.h" + +#include "zebra/zebra_ns.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This header file contains the idea of a router and as such + * owns data that is associated with a router from zebra's + * perspective. + */ + +struct zebra_router_table { + RB_ENTRY(zebra_router_table) zebra_router_table_entry; + + uint32_t tableid; + afi_t afi; + safi_t safi; + ns_id_t ns_id; + + struct route_table *table; +}; +RB_HEAD(zebra_router_table_head, zebra_router_table); +RB_PROTOTYPE(zebra_router_table_head, zebra_router_table, + zebra_router_table_entry, zebra_router_table_entry_compare) + +/* RPF lookup behaviour */ +enum multicast_mode { + MCAST_NO_CONFIG = 0, /* MIX_MRIB_FIRST, but no show in config write */ + MCAST_MRIB_ONLY, /* MRIB only */ + MCAST_URIB_ONLY, /* URIB only */ + MCAST_MIX_MRIB_FIRST, /* MRIB, if nothing at all then URIB */ + MCAST_MIX_DISTANCE, /* MRIB & URIB, lower distance wins */ + MCAST_MIX_PFXLEN, /* MRIB & URIB, longer prefix wins */ + /* on equal value, MRIB wins for last 2 */ +}; + +/* An interface can be error-disabled if a protocol (such as EVPN or + * VRRP) detects a problem with keeping it operationally-up. + * If any of the protodown bits are set protodown-on is programmed + * in the dataplane. This results in a carrier/L1 down on the + * physical device. + */ +enum protodown_reasons { + /* A process outside of FRR's control protodowned the interface */ + ZEBRA_PROTODOWN_EXTERNAL = (1 << 0), + /* On startup local ESs are held down for some time to + * allow the underlay to converge and EVPN routes to + * get learnt + */ + ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY = (1 << 1), + /* If all the uplinks are down the switch has lost access + * to the VxLAN overlay and must shut down the access + * ports to allow servers to re-direct their traffic to + * other switches on the Ethernet Segment + */ + ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN = (1 << 2), + ZEBRA_PROTODOWN_EVPN_ALL = (ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN | + ZEBRA_PROTODOWN_EVPN_STARTUP_DELAY), + ZEBRA_PROTODOWN_VRRP = (1 << 3), + /* This reason used exclusively for testing */ + ZEBRA_PROTODOWN_SHARP = (1 << 4), + /* Just used to clear our fields on shutdown, externel not included */ + ZEBRA_PROTODOWN_ALL = (ZEBRA_PROTODOWN_EVPN_ALL | ZEBRA_PROTODOWN_VRRP | + ZEBRA_PROTODOWN_SHARP) +}; +#define ZEBRA_PROTODOWN_RC_STR_LEN 80 + +struct zebra_mlag_info { + /* Role this zebra router is playing */ + enum mlag_role role; + + /* The peerlink being used for mlag */ + char *peerlink; + ifindex_t peerlink_ifindex; + + /* The system mac being used */ + struct ethaddr mac; + /* + * Zebra will open the communication channel with MLAGD only if any + * clients are interested and it is controlled dynamically based on + * client registers & un-registers. + */ + uint32_t clients_interested_cnt; + + /* coomunication channel with MLAGD is established */ + bool connected; + + /* connection retry timer is running */ + bool timer_running; + + /* Holds the client data(unencoded) that need to be pushed to MCLAGD*/ + struct stream_fifo *mlag_fifo; + + /* + * A new Kernel thread will be created to post the data to MCLAGD. + * where as, read will be performed from the zebra main thread, because + * read involves accessing client registartion data structures. + */ + struct frr_pthread *zebra_pth_mlag; + + /* MLAG Thread context 'master' */ + struct thread_master *th_master; + + /* + * Event for Initial MLAG Connection setup & Data Read + * Read can be performed only after successful connection establishment, + * so no issues. + * + */ + struct thread *t_read; + /* Event for MLAG write */ + struct thread *t_write; +}; + +struct zebra_router { + atomic_bool in_shutdown; + + /* Thread master */ + struct thread_master *master; + + /* Lists of clients who have connected to us */ + struct list *client_list; + + /* List of clients in GR */ + struct list *stale_client_list; + + struct zebra_router_table_head tables; + + /* L3-VNI hash table (for EVPN). Only in default instance */ + struct hash *l3vni_table; + + /* Tables and other global info maintained for EVPN multihoming */ + struct zebra_evpn_mh_info *mh_info; + + struct zebra_neigh_info *neigh_info; + + /* EVPN MH broadcast domains indexed by the VID */ + struct hash *evpn_vlan_table; + + struct hash *rules_hash; + + struct hash *ipset_hash; + + struct hash *ipset_entry_hash; + + struct hash *iptable_hash; + + /* A sequence number used for tracking routes */ + _Atomic uint32_t sequence_num; + + /* rib work queue */ +#define ZEBRA_RIB_PROCESS_HOLD_TIME 10 +#define ZEBRA_RIB_PROCESS_RETRY_TIME 1 + struct work_queue *ribq; + + /* Meta Queue Information */ + struct meta_queue *mq; + + /* LSP work queue */ + struct work_queue *lsp_process_q; + +#define ZEBRA_ZAPI_PACKETS_TO_PROCESS 1000 + _Atomic uint32_t packets_to_process; + + /* Mlag information for the router */ + struct zebra_mlag_info mlag_info; + + /* + * The EVPN instance, if any + */ + struct zebra_vrf *evpn_vrf; + + uint32_t multipath_num; + + /* RPF Lookup behavior */ + enum multicast_mode ipv4_multicast_mode; + + /* + * Time for when we sweep the rib from old routes + */ + time_t startup_time; + struct thread *sweeper; + + /* + * The hash of nexthop groups associated with this router + */ + struct hash *nhgs; + struct hash *nhgs_id; + + /* + * Does the underlying system provide an asic offload + */ + bool asic_offloaded; + bool notify_on_ack; + + bool supports_nhgs; + + bool all_mc_forwardingv4, default_mc_forwardingv4; + bool all_mc_forwardingv6, default_mc_forwardingv6; + bool all_linkdownv4, default_linkdownv4; + bool all_linkdownv6, default_linkdownv6; + +#define ZEBRA_DEFAULT_NHG_KEEP_TIMER 180 + uint32_t nhg_keep; + + /* Should we allow non FRR processes to delete our routes */ + bool allow_delete; +}; + +#define GRACEFUL_RESTART_TIME 60 + +extern struct zebra_router zrouter; +extern uint32_t rcvbufsize; + +extern void zebra_router_init(bool asic_offload, bool notify_on_ack); +extern void zebra_router_cleanup(void); +extern void zebra_router_terminate(void); + +extern struct zebra_router_table *zebra_router_find_zrt(struct zebra_vrf *zvrf, + uint32_t tableid, + afi_t afi, safi_t safi); +extern struct route_table *zebra_router_find_table(struct zebra_vrf *zvrf, + uint32_t tableid, afi_t afi, + safi_t safi); +extern struct route_table *zebra_router_get_table(struct zebra_vrf *zvrf, + uint32_t tableid, afi_t afi, + safi_t safi); +extern void zebra_router_release_table(struct zebra_vrf *zvrf, uint32_t tableid, + afi_t afi, safi_t safi); + +extern int zebra_router_config_write(struct vty *vty); + +extern void zebra_router_sweep_route(void); +extern void zebra_router_sweep_nhgs(void); + +extern void zebra_router_show_table_summary(struct vty *vty); + +extern uint32_t zebra_router_get_next_sequence(void); + +static inline vrf_id_t zebra_vrf_get_evpn_id(void) +{ + return zrouter.evpn_vrf ? zvrf_id(zrouter.evpn_vrf) : VRF_DEFAULT; +} +static inline struct zebra_vrf *zebra_vrf_get_evpn(void) +{ + return zrouter.evpn_vrf ? zrouter.evpn_vrf + : zebra_vrf_lookup_by_id(VRF_DEFAULT); +} + +extern void multicast_mode_ipv4_set(enum multicast_mode mode); + +extern enum multicast_mode multicast_mode_ipv4_get(void); + +extern bool zebra_router_notify_on_ack(void); + +static inline void zebra_router_set_supports_nhgs(bool support) +{ + zrouter.supports_nhgs = support; +} + +static inline bool zebra_router_in_shutdown(void) +{ + return atomic_load_explicit(&zrouter.in_shutdown, memory_order_relaxed); +} + +/* zebra_northbound.c */ +extern const struct frr_yang_module_info frr_zebra_info; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/zebra/zebra_script.c b/zebra/zebra_script.c new file mode 100644 index 0000000..e9e8857 --- /dev/null +++ b/zebra/zebra_script.c @@ -0,0 +1,430 @@ +/* + * frrscript encoders and decoders for data structures in Zebra + * Copyright (C) 2021 Donald Lee + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "zebra.h" + +#include "zebra_script.h" + +#ifdef HAVE_SCRIPTING + +void zebra_script_init(void) +{ + frrscript_names_add_function_name(ZEBRA_ON_RIB_PROCESS_HOOK_CALL); +} + +void zebra_script_destroy(void) +{ + frrscript_names_destroy(); +} + +void lua_pushnh_grp(lua_State *L, const struct nh_grp *nh_grp) +{ + lua_newtable(L); + lua_pushinteger(L, nh_grp->id); + lua_setfield(L, -2, "id"); + lua_pushinteger(L, nh_grp->weight); + lua_setfield(L, -2, "weight"); +} + +void lua_pushzebra_dplane_ctx(lua_State *L, const struct zebra_dplane_ctx *ctx) +{ + + lua_newtable(L); + lua_pushinteger(L, dplane_ctx_get_op(ctx)); + lua_setfield(L, -2, "zd_op"); + lua_pushinteger(L, dplane_ctx_get_status(ctx)); + lua_setfield(L, -2, "zd_status"); + lua_pushinteger(L, dplane_ctx_get_provider(ctx)); + lua_setfield(L, -2, "zd_provider"); + lua_pushinteger(L, dplane_ctx_get_vrf(ctx)); + lua_setfield(L, -2, "zd_vrf_id"); + lua_pushinteger(L, dplane_ctx_get_table(ctx)); + lua_setfield(L, -2, "zd_table_id"); + lua_pushstring(L, dplane_ctx_get_ifname(ctx)); + lua_setfield(L, -2, "zd_ifname"); + lua_pushinteger(L, dplane_ctx_get_ifindex(ctx)); + lua_setfield(L, -2, "zd_ifindex"); + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + /* rinfo */ + lua_newtable(L); + { + lua_pushprefix(L, dplane_ctx_get_dest(ctx)); + lua_setfield(L, -2, "zd_dest"); + const struct prefix *src_pfx = dplane_ctx_get_src(ctx); + + if (src_pfx) { + lua_pushprefix(L, src_pfx); + lua_setfield(L, -2, "zd_src"); + } + lua_pushinteger(L, dplane_ctx_get_afi(ctx)); + lua_setfield(L, -2, "zd_afi"); + lua_pushinteger(L, dplane_ctx_get_safi(ctx)); + lua_setfield(L, -2, "zd_safi"); + lua_pushinteger(L, dplane_ctx_get_type(ctx)); + lua_setfield(L, -2, "zd_type"); + lua_pushinteger(L, dplane_ctx_get_old_type(ctx)); + lua_setfield(L, -2, "zd_old_type"); + lua_pushinteger(L, dplane_ctx_get_tag(ctx)); + lua_setfield(L, -2, "zd_tag"); + lua_pushinteger(L, dplane_ctx_get_old_tag(ctx)); + lua_setfield(L, -2, "zd_old_tag"); + lua_pushinteger(L, dplane_ctx_get_metric(ctx)); + lua_setfield(L, -2, "zd_metric"); + lua_pushinteger(L, dplane_ctx_get_old_metric(ctx)); + lua_setfield(L, -2, "zd_old_metric"); + lua_pushinteger(L, dplane_ctx_get_instance(ctx)); + lua_setfield(L, -2, "zd_instance"); + lua_pushinteger(L, dplane_ctx_get_old_instance(ctx)); + lua_setfield(L, -2, "zd_old_instance"); + lua_pushinteger(L, dplane_ctx_get_distance(ctx)); + lua_setfield(L, -2, "zd_distance"); + lua_pushinteger(L, dplane_ctx_get_old_distance(ctx)); + lua_setfield(L, -2, "zd_old_distance"); + lua_pushinteger(L, dplane_ctx_get_mtu(ctx)); + lua_setfield(L, -2, "zd_mtu"); + lua_pushinteger(L, dplane_ctx_get_nh_mtu(ctx)); + lua_setfield(L, -2, "zd_nexthop_mtu"); + /* nhe */ + lua_newtable(L); + { + lua_pushinteger(L, dplane_ctx_get_nhe_id(ctx)); + lua_setfield(L, -2, "id"); + lua_pushinteger(L, + dplane_ctx_get_old_nhe_id(ctx)); + lua_setfield(L, -2, "old_id"); + lua_pushinteger(L, dplane_ctx_get_nhe_afi(ctx)); + lua_setfield(L, -2, "afi"); + lua_pushinteger(L, + dplane_ctx_get_nhe_vrf_id(ctx)); + lua_setfield(L, -2, "vrf_id"); + lua_pushinteger(L, + dplane_ctx_get_nhe_type(ctx)); + lua_setfield(L, -2, "type"); + lua_pushnexthop_group( + L, dplane_ctx_get_nhe_ng(ctx)); + lua_setfield(L, -2, "ng"); + lua_pushnh_grp(L, + dplane_ctx_get_nhe_nh_grp(ctx)); + lua_setfield(L, -2, "nh_grp"); + lua_pushinteger( + L, + dplane_ctx_get_nhe_nh_grp_count(ctx)); + lua_setfield(L, -2, "nh_grp_count"); + } + lua_setfield(L, -2, "nhe"); + lua_pushinteger(L, dplane_ctx_get_nhg_id(ctx)); + lua_setfield(L, -2, "zd_nhg_id"); + lua_pushnexthop_group(L, dplane_ctx_get_ng(ctx)); + lua_setfield(L, -2, "zd_ng"); + lua_pushnexthop_group(L, dplane_ctx_get_backup_ng(ctx)); + lua_setfield(L, -2, "backup_ng"); + lua_pushnexthop_group(L, dplane_ctx_get_old_ng(ctx)); + lua_setfield(L, -2, "zd_old_ng"); + lua_pushnexthop_group( + L, dplane_ctx_get_old_backup_ng(ctx)); + lua_setfield(L, -2, "old_backup_ng"); + } + lua_setfield(L, -2, "rinfo"); + break; + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + lua_pushinteger(L, (int)dplane_ctx_get_in_label(ctx)); + lua_setfield(L, -2, "label"); + break; + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + /* pw*/ + lua_newtable(L); + { + lua_pushinteger(L, dplane_ctx_get_pw_type(ctx)); + lua_setfield(L, -2, "type"); + lua_pushinteger(L, dplane_ctx_get_pw_af(ctx)); + lua_setfield(L, -2, "af"); + lua_pushinteger(L, dplane_ctx_get_pw_status(ctx)); + lua_setfield(L, -2, "status"); + lua_pushinteger(L, dplane_ctx_get_pw_flags(ctx)); + lua_setfield(L, -2, "flags"); + lua_pushinteger(L, dplane_ctx_get_pw_local_label(ctx)); + lua_setfield(L, -2, "local_label"); + lua_pushinteger(L, dplane_ctx_get_pw_remote_label(ctx)); + lua_setfield(L, -2, "remote_label"); + } + lua_setfield(L, -2, "pw"); + break; + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + /* nothing to encode */ + break; + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + /* macinfo */ + lua_newtable(L); + { + lua_pushinteger(L, dplane_ctx_mac_get_vlan(ctx)); + lua_setfield(L, -2, "vid"); + lua_pushinteger(L, dplane_ctx_mac_get_br_ifindex(ctx)); + lua_setfield(L, -2, "br_ifindex"); + lua_pushethaddr(L, dplane_ctx_mac_get_addr(ctx)); + lua_setfield(L, -2, "mac"); + lua_pushinaddr(L, dplane_ctx_mac_get_vtep_ip(ctx)); + lua_setfield(L, -2, "vtep_ip"); + lua_pushinteger(L, dplane_ctx_mac_is_sticky(ctx)); + lua_setfield(L, -2, "is_sticky"); + lua_pushinteger(L, dplane_ctx_mac_get_nhg_id(ctx)); + lua_setfield(L, -2, "nhg_id"); + lua_pushinteger(L, + dplane_ctx_mac_get_update_flags(ctx)); + lua_setfield(L, -2, "update_flags"); + } + lua_setfield(L, -2, "macinfo"); + break; + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + /* rule */ + lua_newtable(L); + { + lua_pushinteger(L, dplane_ctx_rule_get_sock(ctx)); + lua_setfield(L, -2, "sock"); + lua_pushinteger(L, dplane_ctx_rule_get_unique(ctx)); + lua_setfield(L, -2, "unique"); + lua_pushinteger(L, dplane_ctx_rule_get_seq(ctx)); + lua_setfield(L, -2, "seq"); + lua_pushstring(L, dplane_ctx_rule_get_ifname(ctx)); + lua_setfield(L, -2, "ifname"); + lua_pushinteger(L, dplane_ctx_rule_get_priority(ctx)); + lua_setfield(L, -2, "priority"); + lua_pushinteger(L, + dplane_ctx_rule_get_old_priority(ctx)); + lua_setfield(L, -2, "old_priority"); + lua_pushinteger(L, dplane_ctx_rule_get_table(ctx)); + lua_setfield(L, -2, "table"); + lua_pushinteger(L, dplane_ctx_rule_get_old_table(ctx)); + lua_setfield(L, -2, "old_table"); + lua_pushinteger(L, dplane_ctx_rule_get_filter_bm(ctx)); + lua_setfield(L, -2, "filter_bm"); + lua_pushinteger(L, + dplane_ctx_rule_get_old_filter_bm(ctx)); + lua_setfield(L, -2, "old_filter_bm"); + lua_pushinteger(L, dplane_ctx_rule_get_fwmark(ctx)); + lua_setfield(L, -2, "fwmark"); + lua_pushinteger(L, dplane_ctx_rule_get_old_fwmark(ctx)); + lua_setfield(L, -2, "old_fwmark"); + lua_pushinteger(L, dplane_ctx_rule_get_dsfield(ctx)); + lua_setfield(L, -2, "dsfield"); + lua_pushinteger(L, + dplane_ctx_rule_get_old_dsfield(ctx)); + lua_setfield(L, -2, "old_dsfield"); + lua_pushinteger(L, dplane_ctx_rule_get_ipproto(ctx)); + lua_setfield(L, -2, "ip_proto"); + lua_pushinteger(L, + dplane_ctx_rule_get_old_ipproto(ctx)); + lua_setfield(L, -2, "old_ip_proto"); + lua_pushprefix(L, dplane_ctx_rule_get_src_ip(ctx)); + lua_setfield(L, -2, "src_ip"); + lua_pushprefix(L, dplane_ctx_rule_get_old_src_ip(ctx)); + lua_setfield(L, -2, "old_src_ip"); + lua_pushprefix(L, dplane_ctx_rule_get_dst_ip(ctx)); + lua_setfield(L, -2, "dst_ip"); + lua_pushprefix(L, dplane_ctx_rule_get_old_dst_ip(ctx)); + lua_setfield(L, -2, "old_dst_ip"); + } + lua_setfield(L, -2, "rule"); + break; + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: { + struct zebra_pbr_iptable iptable; + + dplane_ctx_get_pbr_iptable(ctx, &iptable); + /* iptable */ + lua_newtable(L); + { + lua_pushinteger(L, iptable.sock); + lua_setfield(L, -2, "sock"); + lua_pushinteger(L, iptable.vrf_id); + lua_setfield(L, -2, "vrf_id"); + lua_pushinteger(L, iptable.unique); + lua_setfield(L, -2, "unique"); + lua_pushinteger(L, iptable.type); + lua_setfield(L, -2, "type"); + lua_pushinteger(L, iptable.filter_bm); + lua_setfield(L, -2, "filter_bm"); + lua_pushinteger(L, iptable.fwmark); + lua_setfield(L, -2, "fwmark"); + lua_pushinteger(L, iptable.action); + lua_setfield(L, -2, "action"); + lua_pushinteger(L, iptable.pkt_len_min); + lua_setfield(L, -2, "pkt_len_min"); + lua_pushinteger(L, iptable.pkt_len_max); + lua_setfield(L, -2, "pkt_len_max"); + lua_pushinteger(L, iptable.tcp_flags); + lua_setfield(L, -2, "tcp_flags"); + lua_pushinteger(L, iptable.dscp_value); + lua_setfield(L, -2, "dscp_value"); + lua_pushinteger(L, iptable.fragment); + lua_setfield(L, -2, "fragment"); + lua_pushinteger(L, iptable.protocol); + lua_setfield(L, -2, "protocol"); + lua_pushinteger(L, iptable.nb_interface); + lua_setfield(L, -2, "nb_interface"); + lua_pushinteger(L, iptable.flow_label); + lua_setfield(L, -2, "flow_label"); + lua_pushinteger(L, iptable.family); + lua_setfield(L, -2, "family"); + lua_pushstring(L, iptable.ipset_name); + lua_setfield(L, -2, "ipset_name"); + } + lua_setfield(L, -2, "iptable"); + break; + } + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: { + struct zebra_pbr_ipset ipset; + + dplane_ctx_get_pbr_ipset(ctx, &ipset); + /* ipset */ + lua_newtable(L); + { + lua_pushinteger(L, ipset.sock); + lua_setfield(L, -2, "sock"); + lua_pushinteger(L, ipset.vrf_id); + lua_setfield(L, -2, "vrf_id"); + lua_pushinteger(L, ipset.unique); + lua_setfield(L, -2, "unique"); + lua_pushinteger(L, ipset.type); + lua_setfield(L, -2, "type"); + lua_pushinteger(L, ipset.family); + lua_setfield(L, -2, "family"); + lua_pushstring(L, ipset.ipset_name); + lua_setfield(L, -2, "ipset_name"); + } + lua_setfield(L, -2, "ipset"); + break; + } + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + /* neigh */ + lua_newtable(L); + { + lua_pushipaddr(L, dplane_ctx_neigh_get_ipaddr(ctx)); + lua_setfield(L, -2, "ip_addr"); + /* link */ + lua_newtable(L); + { + lua_pushethaddr(L, + dplane_ctx_neigh_get_mac(ctx)); + lua_setfield(L, -2, "mac"); + lua_pushipaddr( + L, dplane_ctx_neigh_get_link_ip(ctx)); + lua_setfield(L, -2, "ip_addr"); + } + lua_setfield(L, -2, "link"); + lua_pushinteger(L, dplane_ctx_neigh_get_flags(ctx)); + lua_setfield(L, -2, "flags"); + lua_pushinteger(L, dplane_ctx_neigh_get_state(ctx)); + lua_setfield(L, -2, "state"); + lua_pushinteger(L, + dplane_ctx_neigh_get_update_flags(ctx)); + lua_setfield(L, -2, "update_flags"); + } + lua_setfield(L, -2, "neigh"); + break; + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + break; + case DPLANE_OP_BR_PORT_UPDATE: + /* br_port */ + lua_newtable(L); + { + lua_pushinteger( + L, dplane_ctx_get_br_port_sph_filter_cnt(ctx)); + lua_setfield(L, -2, "sph_filter_cnt"); + lua_pushinteger(L, dplane_ctx_get_br_port_flags(ctx)); + lua_setfield(L, -2, "flags"); + lua_pushinteger( + L, dplane_ctx_get_br_port_backup_nhg_id(ctx)); + lua_setfield(L, -2, "backup_nhg_id"); + } + lua_setfield(L, -2, "br_port"); + break; + case DPLANE_OP_NEIGH_TABLE_UPDATE: + /* neightable */ + lua_newtable(L); + { + lua_pushinteger(L, + dplane_ctx_neightable_get_family(ctx)); + lua_setfield(L, -2, "family"); + lua_pushinteger( + L, dplane_ctx_neightable_get_app_probes(ctx)); + lua_setfield(L, -2, "app_probes"); + lua_pushinteger( + L, dplane_ctx_neightable_get_mcast_probes(ctx)); + lua_setfield(L, -2, "ucast_probes"); + lua_pushinteger( + L, dplane_ctx_neightable_get_ucast_probes(ctx)); + lua_setfield(L, -2, "mcast_probes"); + } + lua_setfield(L, -2, "neightable"); + break; + case DPLANE_OP_GRE_SET: + /* gre */ + lua_newtable(L); + { + lua_pushinteger(L, + dplane_ctx_gre_get_link_ifindex(ctx)); + lua_setfield(L, -2, "link_ifindex"); + lua_pushinteger(L, dplane_ctx_gre_get_mtu(ctx)); + lua_setfield(L, -2, "mtu"); + } + lua_setfield(L, -2, "gre"); + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + /* Not currently handled */ + case DPLANE_OP_INTF_NETCONFIG: /*NYI*/ + case DPLANE_OP_NONE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; + } /* Dispatch by op code */ +} + +#endif /* HAVE_SCRIPTING */ diff --git a/zebra/zebra_script.h b/zebra/zebra_script.h new file mode 100644 index 0000000..5eb5975 --- /dev/null +++ b/zebra/zebra_script.h @@ -0,0 +1,41 @@ +/* + * frrscript encoders and decoders for data structures in Zebra + * Copyright (C) 2021 Donald Lee + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_SCRIPT_H +#define _ZEBRA_SCRIPT_H + +#include "zebra.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_pbr.h" + +#ifdef HAVE_SCRIPTING + +#include "frrlua.h" + +void zebra_script_init(void); + +void zebra_script_destroy(void); + +void lua_pushnh_grp(lua_State *L, const struct nh_grp *nh_grp); + +void lua_pushzebra_dplane_ctx(lua_State *L, const struct zebra_dplane_ctx *ctx); + +#endif /* HAVE_SCRIPTING */ + +#endif /* _ZEBRA_SCRIPT_H */ diff --git a/zebra/zebra_snmp.c b/zebra/zebra_snmp.c new file mode 100644 index 0000000..6fe24df --- /dev/null +++ b/zebra/zebra_snmp.c @@ -0,0 +1,570 @@ +/* FIB SNMP. + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Currently SNMP is only running properly for MIBs in the default VRF. + */ + +#include <zebra.h> + +#include <net-snmp/net-snmp-config.h> +#include <net-snmp/net-snmp-includes.h> + +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "command.h" +#include "smux.h" +#include "table.h" +#include "vrf.h" +#include "hook.h" +#include "libfrr.h" +#include "lib/version.h" + +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" + +#define IPFWMIB 1,3,6,1,2,1,4,24 + +/* ipForwardTable */ +#define IPFORWARDDEST 1 +#define IPFORWARDMASK 2 +#define IPFORWARDPOLICY 3 +#define IPFORWARDNEXTHOP 4 +#define IPFORWARDIFINDEX 5 +#define IPFORWARDTYPE 6 +#define IPFORWARDPROTO 7 +#define IPFORWARDAGE 8 +#define IPFORWARDINFO 9 +#define IPFORWARDNEXTHOPAS 10 +#define IPFORWARDMETRIC1 11 +#define IPFORWARDMETRIC2 12 +#define IPFORWARDMETRIC3 13 +#define IPFORWARDMETRIC4 14 +#define IPFORWARDMETRIC5 15 + +/* ipCidrRouteTable */ +#define IPCIDRROUTEDEST 1 +#define IPCIDRROUTEMASK 2 +#define IPCIDRROUTETOS 3 +#define IPCIDRROUTENEXTHOP 4 +#define IPCIDRROUTEIFINDEX 5 +#define IPCIDRROUTETYPE 6 +#define IPCIDRROUTEPROTO 7 +#define IPCIDRROUTEAGE 8 +#define IPCIDRROUTEINFO 9 +#define IPCIDRROUTENEXTHOPAS 10 +#define IPCIDRROUTEMETRIC1 11 +#define IPCIDRROUTEMETRIC2 12 +#define IPCIDRROUTEMETRIC3 13 +#define IPCIDRROUTEMETRIC4 14 +#define IPCIDRROUTEMETRIC5 15 +#define IPCIDRROUTESTATUS 16 + +#define INTEGER32 ASN_INTEGER +#define GAUGE32 ASN_GAUGE +#define ENUMERATION ASN_INTEGER +#define ROWSTATUS ASN_INTEGER +#define IPADDRESS ASN_IPADDRESS +#define OBJECTIDENTIFIER ASN_OBJECT_ID + +static oid ipfw_oid[] = {IPFWMIB}; + +/* Hook functions. */ +static uint8_t *ipFwNumber(struct variable *, oid[], size_t *, int, size_t *, + WriteMethod **); +static uint8_t *ipFwTable(struct variable *, oid[], size_t *, int, size_t *, + WriteMethod **); +static uint8_t *ipCidrNumber(struct variable *, oid[], size_t *, int, size_t *, + WriteMethod **); +static uint8_t *ipCidrTable(struct variable *, oid[], size_t *, int, size_t *, + WriteMethod **); + +static struct variable zebra_variables[] = { + {0, GAUGE32, RONLY, ipFwNumber, 1, {1}}, + {IPFORWARDDEST, IPADDRESS, RONLY, ipFwTable, 3, {2, 1, 1}}, + {IPFORWARDMASK, IPADDRESS, RONLY, ipFwTable, 3, {2, 1, 2}}, + {IPFORWARDPOLICY, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 3}}, + {IPFORWARDNEXTHOP, IPADDRESS, RONLY, ipFwTable, 3, {2, 1, 4}}, + {IPFORWARDIFINDEX, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 5}}, + {IPFORWARDTYPE, ENUMERATION, RONLY, ipFwTable, 3, {2, 1, 6}}, + {IPFORWARDPROTO, ENUMERATION, RONLY, ipFwTable, 3, {2, 1, 7}}, + {IPFORWARDAGE, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 8}}, + {IPFORWARDINFO, OBJECTIDENTIFIER, RONLY, ipFwTable, 3, {2, 1, 9}}, + {IPFORWARDNEXTHOPAS, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 10}}, + {IPFORWARDMETRIC1, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 11}}, + {IPFORWARDMETRIC2, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 12}}, + {IPFORWARDMETRIC3, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 13}}, + {IPFORWARDMETRIC4, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 14}}, + {IPFORWARDMETRIC5, INTEGER32, RONLY, ipFwTable, 3, {2, 1, 15}}, + {0, GAUGE32, RONLY, ipCidrNumber, 1, {3}}, + {IPCIDRROUTEDEST, IPADDRESS, RONLY, ipCidrTable, 3, {4, 1, 1}}, + {IPCIDRROUTEMASK, IPADDRESS, RONLY, ipCidrTable, 3, {4, 1, 2}}, + {IPCIDRROUTETOS, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 3}}, + {IPCIDRROUTENEXTHOP, IPADDRESS, RONLY, ipCidrTable, 3, {4, 1, 4}}, + {IPCIDRROUTEIFINDEX, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 5}}, + {IPCIDRROUTETYPE, ENUMERATION, RONLY, ipCidrTable, 3, {4, 1, 6}}, + {IPCIDRROUTEPROTO, ENUMERATION, RONLY, ipCidrTable, 3, {4, 1, 7}}, + {IPCIDRROUTEAGE, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 8}}, + {IPCIDRROUTEINFO, OBJECTIDENTIFIER, RONLY, ipCidrTable, 3, {4, 1, 9}}, + {IPCIDRROUTENEXTHOPAS, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 10}}, + {IPCIDRROUTEMETRIC1, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 11}}, + {IPCIDRROUTEMETRIC2, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 12}}, + {IPCIDRROUTEMETRIC3, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 13}}, + {IPCIDRROUTEMETRIC4, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 14}}, + {IPCIDRROUTEMETRIC5, INTEGER32, RONLY, ipCidrTable, 3, {4, 1, 15}}, + {IPCIDRROUTESTATUS, ROWSTATUS, RONLY, ipCidrTable, 3, {4, 1, 16}}}; + + +static uint8_t *ipFwNumber(struct variable *v, oid objid[], size_t *objid_len, + int exact, size_t *val_len, + WriteMethod **write_method) +{ + static int result; + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + + if (smux_header_generic(v, objid, objid_len, exact, val_len, + write_method) + == MATCH_FAILED) + return NULL; + + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, VRF_DEFAULT); + if (!table) + return NULL; + + /* Return number of routing entries. */ + result = 0; + for (rn = route_top(table); rn; rn = route_next(rn)) + RNODE_FOREACH_RE (rn, re) { + result++; + } + + return (uint8_t *)&result; +} + +static uint8_t *ipCidrNumber(struct variable *v, oid objid[], size_t *objid_len, + int exact, size_t *val_len, + WriteMethod **write_method) +{ + static int result; + struct route_table *table; + struct route_node *rn; + struct route_entry *re; + + if (smux_header_generic(v, objid, objid_len, exact, val_len, + write_method) + == MATCH_FAILED) + return NULL; + + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, VRF_DEFAULT); + if (!table) + return 0; + + /* Return number of routing entries. */ + result = 0; + for (rn = route_top(table); rn; rn = route_next(rn)) + RNODE_FOREACH_RE (rn, re) { + result++; + } + + return (uint8_t *)&result; +} + +static int in_addr_cmp(uint8_t *p1, uint8_t *p2) +{ + int i; + + for (i = 0; i < 4; i++) { + if (*p1 < *p2) + return -1; + if (*p1 > *p2) + return 1; + p1++; + p2++; + } + return 0; +} + +static int in_addr_add(uint8_t *p, int num) +{ + int i, ip0; + + ip0 = *p; + p += 4; + for (i = 3; 0 <= i; i--) { + p--; + if (*p + num > 255) { + *p += num; + num = 1; + } else { + *p += num; + return 1; + } + } + if (ip0 > *p) { + /* ip + num > 0xffffffff */ + return 0; + } + + return 1; +} + +static int proto_trans(int type) +{ + switch (type) { + case ZEBRA_ROUTE_SYSTEM: + return 1; /* other */ + case ZEBRA_ROUTE_KERNEL: + return 1; /* other */ + case ZEBRA_ROUTE_CONNECT: + return 2; /* local interface */ + case ZEBRA_ROUTE_STATIC: + return 3; /* static route */ + case ZEBRA_ROUTE_RIP: + return 8; /* rip */ + case ZEBRA_ROUTE_RIPNG: + return 1; /* shouldn't happen */ + case ZEBRA_ROUTE_OSPF: + return 13; /* ospf */ + case ZEBRA_ROUTE_OSPF6: + return 1; /* shouldn't happen */ + case ZEBRA_ROUTE_BGP: + return 14; /* bgp */ + default: + return 1; /* other */ + } +} + +static void check_replace(struct route_node *np2, struct route_entry *re2, + struct route_node **np, struct route_entry **re) +{ + int proto, proto2; + + if (!*np) { + *np = np2; + *re = re2; + return; + } + + if (prefix_cmp(&(*np)->p, &np2->p) < 0) + return; + if (prefix_cmp(&(*np)->p, &np2->p) > 0) { + *np = np2; + *re = re2; + return; + } + + proto = proto_trans((*re)->type); + proto2 = proto_trans(re2->type); + + if (proto2 > proto) + return; + if (proto2 < proto) { + *np = np2; + *re = re2; + return; + } + + if (in_addr_cmp((uint8_t *)&(*re)->nhe->nhg.nexthop->gate.ipv4, + (uint8_t *)&re2->nhe->nhg.nexthop->gate.ipv4) + <= 0) + return; + + *np = np2; + *re = re2; + return; +} + +static void get_fwtable_route_node(struct variable *v, oid objid[], + size_t *objid_len, int exact, + struct route_node **np, + struct route_entry **re) +{ + struct in_addr dest; + struct route_table *table; + struct route_node *np2; + struct route_entry *re2; + int proto; + int policy; + struct in_addr nexthop; + uint8_t *pnt; + int i; + + /* Init index variables */ + + pnt = (uint8_t *)&dest; + for (i = 0; i < 4; i++) + *pnt++ = 0; + + pnt = (uint8_t *)&nexthop; + for (i = 0; i < 4; i++) + *pnt++ = 0; + + proto = 0; + policy = 0; + + /* Init return variables */ + + *np = NULL; + *re = NULL; + + /* Short circuit exact matches of wrong length */ + + if (exact && (*objid_len != (unsigned)v->namelen + 10)) + return; + + table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, VRF_DEFAULT); + if (!table) + return; + + /* Get INDEX information out of OID. + * ipForwardDest, ipForwardProto, ipForwardPolicy, ipForwardNextHop + */ + + if (*objid_len > (unsigned)v->namelen) + oid2in_addr(objid + v->namelen, + MIN(4U, *objid_len - v->namelen), &dest); + + if (*objid_len > (unsigned)v->namelen + 4) + proto = objid[v->namelen + 4]; + + if (*objid_len > (unsigned)v->namelen + 5) + policy = objid[v->namelen + 5]; + + if (*objid_len > (unsigned)v->namelen + 6) + oid2in_addr(objid + v->namelen + 6, + MIN(4U, *objid_len - v->namelen - 6), &nexthop); + + /* Apply GETNEXT on not exact search */ + + if (!exact && (*objid_len >= (unsigned)v->namelen + 10)) { + if (!in_addr_add((uint8_t *)&nexthop, 1)) + return; + } + + /* For exact: search matching entry in rib table. */ + + if (exact) { + if (policy) /* Not supported (yet?) */ + return; + for (*np = route_top(table); *np; *np = route_next(*np)) { + if (!in_addr_cmp(&(*np)->p.u.prefix, + (uint8_t *)&dest)) { + RNODE_FOREACH_RE (*np, *re) { + if (!in_addr_cmp((uint8_t *)&(*re)->nhe + ->nhg.nexthop + ->gate.ipv4, + (uint8_t *)&nexthop)) + if (proto + == proto_trans((*re)->type)) + return; + } + } + } + return; + } + + /* Search next best entry */ + + for (np2 = route_top(table); np2; np2 = route_next(np2)) { + + /* Check destination first */ + if (in_addr_cmp(&np2->p.u.prefix, (uint8_t *)&dest) > 0) + RNODE_FOREACH_RE (np2, re2) { + check_replace(np2, re2, np, re); + } + + if (in_addr_cmp(&np2->p.u.prefix, (uint8_t *)&dest) + == 0) { /* have to look at each re individually */ + RNODE_FOREACH_RE (np2, re2) { + int proto2, policy2; + + proto2 = proto_trans(re2->type); + policy2 = 0; + + if ((policy < policy2) + || ((policy == policy2) && (proto < proto2)) + || ((policy == policy2) && (proto == proto2) + && (in_addr_cmp( + (uint8_t *)&re2->nhe + ->nhg.nexthop->gate.ipv4, + (uint8_t *)&nexthop) + >= 0))) + check_replace(np2, re2, np, re); + } + } + } + + if (!*re) + return; + + policy = 0; + proto = proto_trans((*re)->type); + + *objid_len = v->namelen + 10; + pnt = (uint8_t *)&(*np)->p.u.prefix; + for (i = 0; i < 4; i++) + objid[v->namelen + i] = *pnt++; + + objid[v->namelen + 4] = proto; + objid[v->namelen + 5] = policy; + + { + struct nexthop *nexthop; + + nexthop = (*re)->nhe->nhg.nexthop; + if (nexthop) { + pnt = (uint8_t *)&nexthop->gate.ipv4; + for (i = 0; i < 4; i++) + objid[i + v->namelen + 6] = *pnt++; + } + } + + return; +} + +static uint8_t *ipFwTable(struct variable *v, oid objid[], size_t *objid_len, + int exact, size_t *val_len, + WriteMethod **write_method) +{ + struct route_node *np; + struct route_entry *re; + static int result; + static int resarr[2]; + static struct in_addr netmask; + struct nexthop *nexthop; + + if (smux_header_table(v, objid, objid_len, exact, val_len, write_method) + == MATCH_FAILED) + return NULL; + + get_fwtable_route_node(v, objid, objid_len, exact, &np, &re); + if (!np) + return NULL; + + nexthop = re->nhe->nhg.nexthop; + if (!nexthop) + return NULL; + + switch (v->magic) { + case IPFORWARDDEST: + *val_len = 4; + return &np->p.u.prefix; + case IPFORWARDMASK: + masklen2ip(np->p.prefixlen, &netmask); + *val_len = 4; + return (uint8_t *)&netmask; + case IPFORWARDPOLICY: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDNEXTHOP: + *val_len = 4; + return (uint8_t *)&nexthop->gate.ipv4; + case IPFORWARDIFINDEX: + *val_len = sizeof(int); + return (uint8_t *)&nexthop->ifindex; + case IPFORWARDTYPE: + if (nexthop->type == NEXTHOP_TYPE_IFINDEX) + result = 3; + else + result = 4; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDPROTO: + result = proto_trans(re->type); + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDAGE: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDINFO: + resarr[0] = 0; + resarr[1] = 0; + *val_len = 2 * sizeof(int); + return (uint8_t *)resarr; + case IPFORWARDNEXTHOPAS: + result = -1; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDMETRIC1: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDMETRIC2: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDMETRIC3: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDMETRIC4: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + case IPFORWARDMETRIC5: + result = 0; + *val_len = sizeof(int); + return (uint8_t *)&result; + default: + return NULL; + } + return NULL; +} + +static uint8_t *ipCidrTable(struct variable *v, oid objid[], size_t *objid_len, + int exact, size_t *val_len, + WriteMethod **write_method) +{ + if (smux_header_table(v, objid, objid_len, exact, val_len, write_method) + == MATCH_FAILED) + return NULL; + + switch (v->magic) { + case IPCIDRROUTEDEST: + break; + default: + return NULL; + } + return NULL; +} + +static int zebra_snmp_init(struct thread_master *tm) +{ + smux_init(tm); + REGISTER_MIB("mibII/ipforward", zebra_variables, variable, ipfw_oid); + return 0; +} + +static int zebra_snmp_module_init(void) +{ + hook_register(frr_late_init, zebra_snmp_init); + return 0; +} + +FRR_MODULE_SETUP(.name = "zebra_snmp", .version = FRR_VERSION, + .description = "zebra AgentX SNMP module", + .init = zebra_snmp_module_init, +); diff --git a/zebra/zebra_srte.c b/zebra/zebra_srte.c new file mode 100644 index 0000000..c0f18dd --- /dev/null +++ b/zebra/zebra_srte.c @@ -0,0 +1,389 @@ +/* Zebra SR-TE code + * Copyright (C) 2020 NetDEF, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "lib/zclient.h" +#include "lib/lib_errors.h" + +#include "zebra/zebra_srte.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_rnh.h" +#include "zebra/zapi_msg.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_SR_POLICY, "SR Policy"); + +static void zebra_sr_policy_deactivate(struct zebra_sr_policy *policy); + +/* Generate rb-tree of SR Policy instances. */ +static inline int +zebra_sr_policy_instance_compare(const struct zebra_sr_policy *a, + const struct zebra_sr_policy *b) +{ + return sr_policy_compare(&a->endpoint, &b->endpoint, a->color, + b->color); +} +RB_GENERATE(zebra_sr_policy_instance_head, zebra_sr_policy, entry, + zebra_sr_policy_instance_compare) + +struct zebra_sr_policy_instance_head zebra_sr_policy_instances = + RB_INITIALIZER(&zebra_sr_policy_instances); + +struct zebra_sr_policy *zebra_sr_policy_add(uint32_t color, + struct ipaddr *endpoint, char *name) +{ + struct zebra_sr_policy *policy; + + policy = XCALLOC(MTYPE_ZEBRA_SR_POLICY, sizeof(*policy)); + policy->color = color; + policy->endpoint = *endpoint; + strlcpy(policy->name, name, sizeof(policy->name)); + policy->status = ZEBRA_SR_POLICY_DOWN; + RB_INSERT(zebra_sr_policy_instance_head, &zebra_sr_policy_instances, + policy); + + return policy; +} + +void zebra_sr_policy_del(struct zebra_sr_policy *policy) +{ + if (policy->status == ZEBRA_SR_POLICY_UP) + zebra_sr_policy_deactivate(policy); + RB_REMOVE(zebra_sr_policy_instance_head, &zebra_sr_policy_instances, + policy); + XFREE(MTYPE_ZEBRA_SR_POLICY, policy); +} + +struct zebra_sr_policy *zebra_sr_policy_find(uint32_t color, + struct ipaddr *endpoint) +{ + struct zebra_sr_policy policy = {}; + + policy.color = color; + policy.endpoint = *endpoint; + return RB_FIND(zebra_sr_policy_instance_head, + &zebra_sr_policy_instances, &policy); +} + +struct zebra_sr_policy *zebra_sr_policy_find_by_name(char *name) +{ + struct zebra_sr_policy *policy; + + // TODO: create index for policy names + RB_FOREACH (policy, zebra_sr_policy_instance_head, + &zebra_sr_policy_instances) { + if (strcmp(policy->name, name) == 0) + return policy; + } + + return NULL; +} + +static int zebra_sr_policy_notify_update_client(struct zebra_sr_policy *policy, + struct zserv *client) +{ + const struct zebra_nhlfe *nhlfe; + struct stream *s; + uint32_t message = 0; + unsigned long nump = 0; + uint8_t num; + struct zapi_nexthop znh; + int ret; + + /* Get output stream. */ + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_NEXTHOP_UPDATE, zvrf_id(policy->zvrf)); + + /* Message flags. */ + SET_FLAG(message, ZAPI_MESSAGE_SRTE); + stream_putl(s, message); + + stream_putw(s, SAFI_UNICAST); + /* + * The prefix is copied twice because the ZEBRA_NEXTHOP_UPDATE + * code was modified to send back both the matched against + * as well as the actual matched. There does not appear to + * be an equivalent here so just send the same thing twice. + */ + switch (policy->endpoint.ipa_type) { + case IPADDR_V4: + stream_putw(s, AF_INET); + stream_putc(s, IPV4_MAX_BITLEN); + stream_put_in_addr(s, &policy->endpoint.ipaddr_v4); + stream_putw(s, AF_INET); + stream_putc(s, IPV4_MAX_BITLEN); + stream_put_in_addr(s, &policy->endpoint.ipaddr_v4); + break; + case IPADDR_V6: + stream_putw(s, AF_INET6); + stream_putc(s, IPV6_MAX_BITLEN); + stream_put(s, &policy->endpoint.ipaddr_v6, IPV6_MAX_BYTELEN); + stream_putw(s, AF_INET6); + stream_putc(s, IPV6_MAX_BITLEN); + stream_put(s, &policy->endpoint.ipaddr_v6, IPV6_MAX_BYTELEN); + break; + default: + flog_warn(EC_LIB_DEVELOPMENT, + "%s: unknown policy endpoint address family: %u", + __func__, policy->endpoint.ipa_type); + exit(1); + } + stream_putl(s, policy->color); + + num = 0; + frr_each (nhlfe_list_const, &policy->lsp->nhlfe_list, nhlfe) { + if (!CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + || CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED)) + continue; + + if (num == 0) { + stream_putc(s, re_type_from_lsp_type(nhlfe->type)); + stream_putw(s, 0); /* instance - not available */ + stream_putc(s, nhlfe->distance); + stream_putl(s, 0); /* metric - not available */ + nump = stream_get_endp(s); + stream_putc(s, 0); + } + + zapi_nexthop_from_nexthop(&znh, nhlfe->nexthop); + ret = zapi_nexthop_encode(s, &znh, 0, message); + if (ret < 0) + goto failure; + + num++; + } + stream_putc_at(s, nump, num); + stream_putw_at(s, 0, stream_get_endp(s)); + + client->nh_last_upd_time = monotime(NULL); + return zserv_send_message(client, s); + +failure: + + stream_free(s); + return -1; +} + +static void zebra_sr_policy_notify_update(struct zebra_sr_policy *policy) +{ + struct rnh *rnh; + struct prefix p = {}; + struct zebra_vrf *zvrf; + struct listnode *node; + struct zserv *client; + + zvrf = policy->zvrf; + switch (policy->endpoint.ipa_type) { + case IPADDR_V4: + p.family = AF_INET; + p.prefixlen = IPV4_MAX_BITLEN; + p.u.prefix4 = policy->endpoint.ipaddr_v4; + break; + case IPADDR_V6: + p.family = AF_INET6; + p.prefixlen = IPV6_MAX_BITLEN; + p.u.prefix6 = policy->endpoint.ipaddr_v6; + break; + default: + flog_warn(EC_LIB_DEVELOPMENT, + "%s: unknown policy endpoint address family: %u", + __func__, policy->endpoint.ipa_type); + exit(1); + } + + rnh = zebra_lookup_rnh(&p, zvrf_id(zvrf), SAFI_UNICAST); + if (!rnh) + return; + + for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client)) { + if (policy->status == ZEBRA_SR_POLICY_UP) + zebra_sr_policy_notify_update_client(policy, client); + else + /* Fallback to the IGP shortest path. */ + zebra_send_rnh_update(rnh, client, zvrf_id(zvrf), + policy->color); + } +} + +static void zebra_sr_policy_activate(struct zebra_sr_policy *policy, + struct zebra_lsp *lsp) +{ + policy->status = ZEBRA_SR_POLICY_UP; + policy->lsp = lsp; + (void)zebra_sr_policy_bsid_install(policy); + zsend_sr_policy_notify_status(policy->color, &policy->endpoint, + policy->name, ZEBRA_SR_POLICY_UP); + zebra_sr_policy_notify_update(policy); +} + +static void zebra_sr_policy_update(struct zebra_sr_policy *policy, + struct zebra_lsp *lsp, + struct zapi_srte_tunnel *old_tunnel) +{ + bool bsid_changed; + bool segment_list_changed; + + policy->lsp = lsp; + + bsid_changed = + policy->segment_list.local_label != old_tunnel->local_label; + segment_list_changed = + policy->segment_list.label_num != old_tunnel->label_num + || memcmp(policy->segment_list.labels, old_tunnel->labels, + sizeof(mpls_label_t) + * policy->segment_list.label_num); + + /* Re-install label stack if necessary. */ + if (bsid_changed || segment_list_changed) { + zebra_sr_policy_bsid_uninstall(policy, old_tunnel->local_label); + (void)zebra_sr_policy_bsid_install(policy); + } + + zsend_sr_policy_notify_status(policy->color, &policy->endpoint, + policy->name, ZEBRA_SR_POLICY_UP); + + /* Handle segment-list update. */ + if (segment_list_changed) + zebra_sr_policy_notify_update(policy); +} + +static void zebra_sr_policy_deactivate(struct zebra_sr_policy *policy) +{ + policy->status = ZEBRA_SR_POLICY_DOWN; + policy->lsp = NULL; + zebra_sr_policy_bsid_uninstall(policy, + policy->segment_list.local_label); + zsend_sr_policy_notify_status(policy->color, &policy->endpoint, + policy->name, ZEBRA_SR_POLICY_DOWN); + zebra_sr_policy_notify_update(policy); +} + +int zebra_sr_policy_validate(struct zebra_sr_policy *policy, + struct zapi_srte_tunnel *new_tunnel) +{ + struct zapi_srte_tunnel old_tunnel = policy->segment_list; + struct zebra_lsp *lsp; + + if (new_tunnel) + policy->segment_list = *new_tunnel; + + /* Try to resolve the Binding-SID nexthops. */ + lsp = mpls_lsp_find(policy->zvrf, policy->segment_list.labels[0]); + if (!lsp || !lsp->best_nhlfe + || lsp->addr_family != ipaddr_family(&policy->endpoint)) { + if (policy->status == ZEBRA_SR_POLICY_UP) + zebra_sr_policy_deactivate(policy); + return -1; + } + + /* First label was resolved successfully. */ + if (policy->status == ZEBRA_SR_POLICY_DOWN) + zebra_sr_policy_activate(policy, lsp); + else + zebra_sr_policy_update(policy, lsp, &old_tunnel); + + return 0; +} + +int zebra_sr_policy_bsid_install(struct zebra_sr_policy *policy) +{ + struct zapi_srte_tunnel *zt = &policy->segment_list; + struct zebra_nhlfe *nhlfe; + + if (zt->local_label == MPLS_LABEL_NONE) + return 0; + + frr_each_safe (nhlfe_list, &policy->lsp->nhlfe_list, nhlfe) { + uint8_t num_out_labels; + mpls_label_t *out_labels; + mpls_label_t null_label = MPLS_LABEL_IMPLICIT_NULL; + + if (!CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED) + || CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_DELETED)) + continue; + + /* + * Don't push the first SID if the corresponding action in the + * LFIB is POP. + */ + if (!nhlfe->nexthop->nh_label + || !nhlfe->nexthop->nh_label->num_labels + || nhlfe->nexthop->nh_label->label[0] + == MPLS_LABEL_IMPLICIT_NULL) { + if (zt->label_num > 1) { + num_out_labels = zt->label_num - 1; + out_labels = &zt->labels[1]; + } else { + num_out_labels = 1; + out_labels = &null_label; + } + } else { + num_out_labels = zt->label_num; + out_labels = zt->labels; + } + + if (mpls_lsp_install( + policy->zvrf, zt->type, zt->local_label, + num_out_labels, out_labels, nhlfe->nexthop->type, + &nhlfe->nexthop->gate, nhlfe->nexthop->ifindex) + < 0) + return -1; + } + + return 0; +} + +void zebra_sr_policy_bsid_uninstall(struct zebra_sr_policy *policy, + mpls_label_t old_bsid) +{ + struct zapi_srte_tunnel *zt = &policy->segment_list; + + mpls_lsp_uninstall_all_vrf(policy->zvrf, zt->type, old_bsid); +} + +int zebra_sr_policy_label_update(mpls_label_t label, + enum zebra_sr_policy_update_label_mode mode) +{ + struct zebra_sr_policy *policy; + + RB_FOREACH (policy, zebra_sr_policy_instance_head, + &zebra_sr_policy_instances) { + mpls_label_t next_hop_label; + + next_hop_label = policy->segment_list.labels[0]; + if (next_hop_label != label) + continue; + + switch (mode) { + case ZEBRA_SR_POLICY_LABEL_CREATED: + case ZEBRA_SR_POLICY_LABEL_UPDATED: + case ZEBRA_SR_POLICY_LABEL_REMOVED: + zebra_sr_policy_validate(policy, NULL); + break; + } + } + + return 0; +} + +void zebra_srte_init(void) +{ +} diff --git a/zebra/zebra_srte.h b/zebra/zebra_srte.h new file mode 100644 index 0000000..fe77809 --- /dev/null +++ b/zebra/zebra_srte.h @@ -0,0 +1,74 @@ +/* Zebra's client header. + * Copyright (C) 2020 Netdef, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_SRTE_H +#define _ZEBRA_SRTE_H + +#include "zebra/zebra_mpls.h" + +#include "lib/zclient.h" +#include "lib/srte.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum zebra_sr_policy_update_label_mode { + ZEBRA_SR_POLICY_LABEL_CREATED = 1, + ZEBRA_SR_POLICY_LABEL_UPDATED = 2, + ZEBRA_SR_POLICY_LABEL_REMOVED = 3, +}; + +struct zebra_sr_policy { + RB_ENTRY(zebra_sr_policy) entry; + uint32_t color; + struct ipaddr endpoint; + char name[SRTE_POLICY_NAME_MAX_LENGTH]; + enum zebra_sr_policy_status status; + struct zapi_srte_tunnel segment_list; + struct zebra_lsp *lsp; + struct zebra_vrf *zvrf; +}; +RB_HEAD(zebra_sr_policy_instance_head, zebra_sr_policy); +RB_PROTOTYPE(zebra_sr_policy_instance_head, zebra_sr_policy, entry, + zebra_sr_policy_instance_compare) + +extern struct zebra_sr_policy_instance_head zebra_sr_policy_instances; + +struct zebra_sr_policy * +zebra_sr_policy_add(uint32_t color, struct ipaddr *endpoint, char *name); +void zebra_sr_policy_del(struct zebra_sr_policy *policy); +struct zebra_sr_policy *zebra_sr_policy_find(uint32_t color, + struct ipaddr *endpoint); +struct zebra_sr_policy *zebra_sr_policy_find_by_name(char *name); +int zebra_sr_policy_validate(struct zebra_sr_policy *policy, + struct zapi_srte_tunnel *new_tunnel); +int zebra_sr_policy_bsid_install(struct zebra_sr_policy *policy); +void zebra_sr_policy_bsid_uninstall(struct zebra_sr_policy *policy, + mpls_label_t old_bsid); +void zebra_srte_init(void); +int zebra_sr_policy_label_update(mpls_label_t label, + enum zebra_sr_policy_update_label_mode mode); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_SRTE_H */ diff --git a/zebra/zebra_srv6.c b/zebra/zebra_srv6.c new file mode 100644 index 0000000..36506ca --- /dev/null +++ b/zebra/zebra_srv6.c @@ -0,0 +1,386 @@ +/* + * Zebra SRv6 definitions + * Copyright (C) 2020 Hiroki Shirokura, LINE Corporation + * Copyright (C) 2020 Masakazu Asama + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "network.h" +#include "prefix.h" +#include "stream.h" +#include "srv6.h" +#include "zebra/debug.h" +#include "zebra/zapi_msg.h" +#include "zebra/zserv.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_srv6.h" +#include "zebra/zebra_errors.h" +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <arpa/inet.h> +#include <netinet/in.h> + + +DEFINE_MGROUP(SRV6_MGR, "SRv6 Manager"); +DEFINE_MTYPE_STATIC(SRV6_MGR, SRV6M_CHUNK, "SRv6 Manager Chunk"); + +/* define hooks for the basic API, so that it can be specialized or served + * externally + */ + +DEFINE_HOOK(srv6_manager_client_connect, + (struct zserv *client, vrf_id_t vrf_id), + (client, vrf_id)); +DEFINE_HOOK(srv6_manager_client_disconnect, + (struct zserv *client), (client)); +DEFINE_HOOK(srv6_manager_get_chunk, + (struct srv6_locator **loc, + struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id), + (loc, client, locator_name, vrf_id)); +DEFINE_HOOK(srv6_manager_release_chunk, + (struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id), + (client, locator_name, vrf_id)); + +/* define wrappers to be called in zapi_msg.c (as hooks must be called in + * source file where they were defined) + */ + +void srv6_manager_client_connect_call(struct zserv *client, vrf_id_t vrf_id) +{ + hook_call(srv6_manager_client_connect, client, vrf_id); +} + +void srv6_manager_get_locator_chunk_call(struct srv6_locator **loc, + struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id) +{ + hook_call(srv6_manager_get_chunk, loc, client, locator_name, vrf_id); +} + +void srv6_manager_release_locator_chunk_call(struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id) +{ + hook_call(srv6_manager_release_chunk, client, locator_name, vrf_id); +} + +int srv6_manager_client_disconnect_cb(struct zserv *client) +{ + hook_call(srv6_manager_client_disconnect, client); + return 0; +} + +static int zebra_srv6_cleanup(struct zserv *client) +{ + return 0; +} + +void zebra_srv6_locator_add(struct srv6_locator *locator) +{ + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct srv6_locator *tmp; + struct listnode *node; + struct zserv *client; + + tmp = zebra_srv6_locator_lookup(locator->name); + if (!tmp) + listnode_add(srv6->locators, locator); + + /* + * Notify new locator info to zclients. + * + * The srv6 locators and their prefixes are managed by zserv(zebra). + * And an actual configuration the srv6 sid in the srv6 locator is done + * by zclient(bgpd, isisd, etc). The configuration of each locator + * allocation and specify it by zserv and zclient should be + * asynchronous. For that, zclient should be received the event via + * ZAPI when a srv6 locator is added on zebra. + * Basically, in SRv6, adding/removing SRv6 locators is performed less + * frequently than adding rib entries, so a broad to all zclients will + * not degrade the overall performance of FRRouting. + */ + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zsend_zebra_srv6_locator_add(client, locator); +} + +void zebra_srv6_locator_delete(struct srv6_locator *locator) +{ + struct listnode *n; + struct srv6_locator_chunk *c; + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct zserv *client; + + /* + * Notify deleted locator info to zclients if needed. + * + * zclient(bgpd,isisd,etc) allocates a sid from srv6 locator chunk and + * uses it for its own purpose. For example, in the case of BGP L3VPN, + * the SID assigned to vpn unicast rib will be given. + * And when the locator is deleted by zserv(zebra), those SIDs need to + * be withdrawn. The zclient must initiate the withdrawal of the SIDs + * by ZEBRA_SRV6_LOCATOR_DELETE, and this notification is sent to the + * owner of each chunk. + */ + for (ALL_LIST_ELEMENTS_RO((struct list *)locator->chunks, n, c)) { + if (c->proto == ZEBRA_ROUTE_SYSTEM) + continue; + client = zserv_find_client(c->proto, c->instance); + if (!client) { + zlog_warn( + "%s: Not found zclient(proto=%u, instance=%u).", + __func__, c->proto, c->instance); + continue; + } + zsend_zebra_srv6_locator_delete(client, locator); + } + + listnode_delete(srv6->locators, locator); + srv6_locator_free(locator); +} + +struct srv6_locator *zebra_srv6_locator_lookup(const char *name) +{ + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct srv6_locator *locator; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(srv6->locators, node, locator)) + if (!strncmp(name, locator->name, SRV6_LOCNAME_SIZE)) + return locator; + return NULL; +} + +struct zebra_srv6 *zebra_srv6_get_default(void) +{ + static struct zebra_srv6 srv6; + static bool first_execution = true; + + if (first_execution) { + first_execution = false; + srv6.locators = list_new(); + } + return &srv6; +} + +/** + * Core function, assigns srv6-locator chunks + * + * It first searches through the list to check if there's one available + * (previously released). Otherwise it creates and assigns a new one + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param session_id SessionID of client + * @param name Name of SRv6-locator + * @return Pointer to the assigned srv6-locator chunk, + * or NULL if the request could not be satisfied + */ +static struct srv6_locator * +assign_srv6_locator_chunk(uint8_t proto, + uint16_t instance, + uint32_t session_id, + const char *locator_name) +{ + bool chunk_found = false; + struct listnode *node = NULL; + struct srv6_locator *loc = NULL; + struct srv6_locator_chunk *chunk = NULL; + + loc = zebra_srv6_locator_lookup(locator_name); + if (!loc) { + zlog_info("%s: locator %s was not found", + __func__, locator_name); + return NULL; + } + + for (ALL_LIST_ELEMENTS_RO((struct list *)loc->chunks, node, chunk)) { + if (chunk->proto != NO_PROTO && chunk->proto != proto) + continue; + chunk_found = true; + break; + } + + if (!chunk_found) { + zlog_info("%s: locator is already owned", __func__); + return NULL; + } + + chunk->proto = proto; + chunk->instance = instance; + chunk->session_id = session_id; + return loc; +} + +static int zebra_srv6_manager_get_locator_chunk(struct srv6_locator **loc, + struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id) +{ + int ret = 0; + + *loc = assign_srv6_locator_chunk(client->proto, client->instance, + client->session_id, locator_name); + + if (!*loc) + zlog_err("Unable to assign locator chunk to %s instance %u", + zebra_route_string(client->proto), client->instance); + else if (IS_ZEBRA_DEBUG_PACKET) + zlog_info("Assigned locator chunk %s to %s instance %u", + (*loc)->name, zebra_route_string(client->proto), + client->instance); + + if (*loc && (*loc)->status_up) + ret = zsend_srv6_manager_get_locator_chunk_response(client, + vrf_id, + *loc); + return ret; +} + +/** + * Core function, release no longer used srv6-locator chunks + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @param session_id Zclient session ID, to identify the zclient session + * @param locator_name SRv6-locator name, to identify the actual locator + * @return 0 on success, -1 otherwise + */ +static int release_srv6_locator_chunk(uint8_t proto, uint16_t instance, + uint32_t session_id, + const char *locator_name) +{ + int ret = -1; + struct listnode *node; + struct srv6_locator_chunk *chunk; + struct srv6_locator *loc = NULL; + + loc = zebra_srv6_locator_lookup(locator_name); + if (!loc) + return -1; + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Releasing srv6-locator on %s", __func__, + locator_name); + + for (ALL_LIST_ELEMENTS_RO((struct list *)loc->chunks, node, chunk)) { + if (chunk->proto != proto || + chunk->instance != instance || + chunk->session_id != session_id) + continue; + chunk->proto = NO_PROTO; + chunk->instance = 0; + chunk->session_id = 0; + chunk->keep = 0; + ret = 0; + break; + } + + if (ret != 0) + flog_err(EC_ZEBRA_SRV6M_UNRELEASED_LOCATOR_CHUNK, + "%s: SRv6 locator chunk not released", __func__); + + return ret; +} + +static int zebra_srv6_manager_release_locator_chunk(struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id) +{ + if (vrf_id != VRF_DEFAULT) { + zlog_err("SRv6 locator doesn't support vrf"); + return -1; + } + + return release_srv6_locator_chunk(client->proto, client->instance, + client->session_id, locator_name); +} + +/** + * Release srv6-locator chunks from a client. + * + * Called on client disconnection or reconnection. It only releases chunks + * with empty keep value. + * + * @param proto Daemon protocol of client, to identify the owner + * @param instance Instance, to identify the owner + * @return Number of chunks released + */ +int release_daemon_srv6_locator_chunks(struct zserv *client) +{ + int ret; + int count = 0; + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct listnode *loc_node; + struct listnode *chunk_node; + struct srv6_locator *loc; + struct srv6_locator_chunk *chunk; + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Releasing chunks for client proto %s, instance %d, session %u", + __func__, zebra_route_string(client->proto), + client->instance, client->session_id); + + for (ALL_LIST_ELEMENTS_RO(srv6->locators, loc_node, loc)) { + for (ALL_LIST_ELEMENTS_RO(loc->chunks, chunk_node, chunk)) { + if (chunk->proto == client->proto && + chunk->instance == client->instance && + chunk->session_id == client->session_id && + chunk->keep == 0) { + ret = release_srv6_locator_chunk( + chunk->proto, chunk->instance, + chunk->session_id, loc->name); + if (ret == 0) + count++; + } + } + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("%s: Released %d srv6-locator chunks", + __func__, count); + + return count; +} + +void zebra_srv6_init(void) +{ + hook_register(zserv_client_close, zebra_srv6_cleanup); + hook_register(srv6_manager_get_chunk, + zebra_srv6_manager_get_locator_chunk); + hook_register(srv6_manager_release_chunk, + zebra_srv6_manager_release_locator_chunk); +} + +bool zebra_srv6_is_enable(void) +{ + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + + return listcount(srv6->locators); +} diff --git a/zebra/zebra_srv6.h b/zebra/zebra_srv6.h new file mode 100644 index 0000000..84fcc30 --- /dev/null +++ b/zebra/zebra_srv6.h @@ -0,0 +1,80 @@ +/* + * Zebra SRv6 definitions + * Copyright (C) 2020 Hiroki Shirokura, LINE Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_SRV6_H +#define _ZEBRA_SRV6_H + +#include <zebra.h> +#include <arpa/inet.h> +#include <netinet/in.h> + +#include "qobj.h" +#include "prefix.h" +#include <pthread.h> +#include <plist.h> + +/* SRv6 instance structure. */ +struct zebra_srv6 { + struct list *locators; +}; + +/* declare hooks for the basic API, so that it can be specialized or served + * externally. Also declare a hook when those functions have been registered, + * so that any external module wanting to replace those can react + */ + +DECLARE_HOOK(srv6_manager_client_connect, + (struct zserv *client, vrf_id_t vrf_id), + (client, vrf_id)); +DECLARE_HOOK(srv6_manager_client_disconnect, + (struct zserv *client), (client)); +DECLARE_HOOK(srv6_manager_get_chunk, + (struct srv6_locator **loc, + struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id), + (mc, client, keep, size, base, vrf_id)); +DECLARE_HOOK(srv6_manager_release_chunk, + (struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id), + (client, locator_name, vrf_id)); + + +extern void zebra_srv6_locator_add(struct srv6_locator *locator); +extern void zebra_srv6_locator_delete(struct srv6_locator *locator); +extern struct srv6_locator *zebra_srv6_locator_lookup(const char *name); + +extern void zebra_srv6_init(void); +extern struct zebra_srv6 *zebra_srv6_get_default(void); +extern bool zebra_srv6_is_enable(void); + +extern void srv6_manager_client_connect_call(struct zserv *client, + vrf_id_t vrf_id); +extern void srv6_manager_get_locator_chunk_call(struct srv6_locator **loc, + struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id); +extern void srv6_manager_release_locator_chunk_call(struct zserv *client, + const char *locator_name, + vrf_id_t vrf_id); +extern int srv6_manager_client_disconnect_cb(struct zserv *client); +extern int release_daemon_srv6_locator_chunks(struct zserv *client); + +#endif /* _ZEBRA_SRV6_H */ diff --git a/zebra/zebra_srv6_vty.c b/zebra/zebra_srv6_vty.c new file mode 100644 index 0000000..62ce173 --- /dev/null +++ b/zebra/zebra_srv6_vty.c @@ -0,0 +1,402 @@ +/* + * Zebra SRv6 VTY functions + * Copyright (C) 2020 Hiroki Shirokura, LINE Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "memory.h" +#include "if.h" +#include "prefix.h" +#include "command.h" +#include "table.h" +#include "rib.h" +#include "nexthop.h" +#include "vrf.h" +#include "srv6.h" +#include "lib/json.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_srv6.h" +#include "zebra/zebra_srv6_vty.h" +#include "zebra/zebra_rnh.h" +#include "zebra/redistribute.h" +#include "zebra/zebra_routemap.h" +#include "zebra/zebra_dplane.h" + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_srv6_vty_clippy.c" +#endif + +static int zebra_sr_config(struct vty *vty); + +static struct cmd_node sr_node = { + .name = "sr", + .node = SEGMENT_ROUTING_NODE, + .parent_node = CONFIG_NODE, + .prompt = "%s(config-sr)# ", + .config_write = zebra_sr_config, +}; + +static struct cmd_node srv6_node = { + .name = "srv6", + .node = SRV6_NODE, + .parent_node = SEGMENT_ROUTING_NODE, + .prompt = "%s(config-srv6)# ", + +}; + +static struct cmd_node srv6_locs_node = { + .name = "srv6-locators", + .node = SRV6_LOCS_NODE, + .parent_node = SRV6_NODE, + .prompt = "%s(config-srv6-locators)# ", +}; + +static struct cmd_node srv6_loc_node = { + .name = "srv6-locator", + .node = SRV6_LOC_NODE, + .parent_node = SRV6_LOCS_NODE, + .prompt = "%s(config-srv6-locator)# " +}; + +DEFUN (show_srv6_locator, + show_srv6_locator_cmd, + "show segment-routing srv6 locator [json]", + SHOW_STR + "Segment Routing\n" + "Segment Routing SRv6\n" + "Locator Information\n" + JSON_STR) +{ + const bool uj = use_json(argc, argv); + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct srv6_locator *locator; + struct listnode *node; + char str[256]; + int id; + json_object *json = NULL; + json_object *json_locators = NULL; + json_object *json_locator = NULL; + + if (uj) { + json = json_object_new_object(); + json_locators = json_object_new_array(); + json_object_object_add(json, "locators", json_locators); + + for (ALL_LIST_ELEMENTS_RO(srv6->locators, node, locator)) { + json_locator = srv6_locator_json(locator); + if (!json_locator) + continue; + json_object_array_add(json_locators, json_locator); + + } + + vty_json(vty, json); + } else { + vty_out(vty, "Locator:\n"); + vty_out(vty, "Name ID Prefix Status\n"); + vty_out(vty, "-------------------- ------- ------------------------ -------\n"); + + id = 1; + for (ALL_LIST_ELEMENTS_RO(srv6->locators, node, locator)) { + prefix2str(&locator->prefix, str, sizeof(str)); + vty_out(vty, "%-20s %7d %-24s %s\n", + locator->name, id, str, + locator->status_up ? "Up" : "Down"); + ++id; + } + vty_out(vty, "\n"); + } + + return CMD_SUCCESS; +} + +DEFUN (show_srv6_locator_detail, + show_srv6_locator_detail_cmd, + "show segment-routing srv6 locator NAME detail [json]", + SHOW_STR + "Segment Routing\n" + "Segment Routing SRv6\n" + "Locator Information\n" + "Locator Name\n" + "Detailed information\n" + JSON_STR) +{ + const bool uj = use_json(argc, argv); + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct srv6_locator *locator; + struct listnode *node; + char str[256]; + const char *locator_name = argv[4]->arg; + json_object *json_locator = NULL; + + if (uj) { + locator = zebra_srv6_locator_lookup(locator_name); + if (!locator) + return CMD_WARNING; + + json_locator = srv6_locator_detailed_json(locator); + vty_json(vty, json_locator); + return CMD_SUCCESS; + } + + for (ALL_LIST_ELEMENTS_RO(srv6->locators, node, locator)) { + struct listnode *node; + struct srv6_locator_chunk *chunk; + + if (strcmp(locator->name, locator_name) != 0) + continue; + + prefix2str(&locator->prefix, str, sizeof(str)); + vty_out(vty, "Name: %s\n", locator->name); + vty_out(vty, "Prefix: %s\n", str); + vty_out(vty, "Function-Bit-Len: %u\n", + locator->function_bits_length); + + vty_out(vty, "Chunks:\n"); + for (ALL_LIST_ELEMENTS_RO((struct list *)locator->chunks, node, + chunk)) { + prefix2str(&chunk->prefix, str, sizeof(str)); + vty_out(vty, "- prefix: %s, owner: %s\n", str, + zebra_route_string(chunk->proto)); + } + } + + + return CMD_SUCCESS; +} + +DEFUN_NOSH (segment_routing, + segment_routing_cmd, + "segment-routing", + "Segment Routing\n") +{ + vty->node = SEGMENT_ROUTING_NODE; + return CMD_SUCCESS; +} + +DEFUN_NOSH (srv6, + srv6_cmd, + "srv6", + "Segment Routing SRv6\n") +{ + vty->node = SRV6_NODE; + return CMD_SUCCESS; +} + +DEFUN (no_srv6, + no_srv6_cmd, + "no srv6", + NO_STR + "Segment Routing SRv6\n") +{ + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct srv6_locator *locator; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(srv6->locators, node, nnode, locator)) + zebra_srv6_locator_delete(locator); + return CMD_SUCCESS; +} + +DEFUN_NOSH (srv6_locators, + srv6_locators_cmd, + "locators", + "Segment Routing SRv6 locators\n") +{ + vty->node = SRV6_LOCS_NODE; + return CMD_SUCCESS; +} + +DEFUN_NOSH (srv6_locator, + srv6_locator_cmd, + "locator WORD", + "Segment Routing SRv6 locator\n" + "Specify locator-name\n") +{ + struct srv6_locator *locator = NULL; + + locator = zebra_srv6_locator_lookup(argv[1]->arg); + if (locator) { + VTY_PUSH_CONTEXT(SRV6_LOC_NODE, locator); + locator->status_up = true; + return CMD_SUCCESS; + } + + locator = srv6_locator_alloc(argv[1]->arg); + if (!locator) { + vty_out(vty, "%% Alloc failed\n"); + return CMD_WARNING_CONFIG_FAILED; + } + locator->status_up = true; + + VTY_PUSH_CONTEXT(SRV6_LOC_NODE, locator); + vty->node = SRV6_LOC_NODE; + return CMD_SUCCESS; +} + +DEFUN (no_srv6_locator, + no_srv6_locator_cmd, + "no locator WORD", + NO_STR + "Segment Routing SRv6 locator\n" + "Specify locator-name\n") +{ + struct srv6_locator *locator = zebra_srv6_locator_lookup(argv[2]->arg); + if (!locator) { + vty_out(vty, "%% Can't find SRv6 locator\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zebra_srv6_locator_delete(locator); + return CMD_SUCCESS; +} + +DEFPY (locator_prefix, + locator_prefix_cmd, + "prefix X:X::X:X/M$prefix [func-bits (16-64)$func_bit_len]", + "Configure SRv6 locator prefix\n" + "Specify SRv6 locator prefix\n" + "Configure SRv6 locator function length in bits\n" + "Specify SRv6 locator function length in bits\n") +{ + VTY_DECLVAR_CONTEXT(srv6_locator, locator); + struct srv6_locator_chunk *chunk = NULL; + struct listnode *node = NULL; + + locator->prefix = *prefix; + + /* + * TODO(slankdev): please support variable node-bit-length. + * In draft-ietf-bess-srv6-services-05#section-3.2.1. + * Locator block length and Locator node length are defined. + * Which are defined as "locator-len == block-len + node-len". + * In current implementation, node bits length is hardcoded as 24. + * It should be supported various val. + * + * Cisco IOS-XR support only following pattern. + * (1) Teh locator length should be 64-bits long. + * (2) The SID block portion (MSBs) cannot exceed 40 bits. + * If this value is less than 40 bits, + * user should use a pattern of zeros as a filler. + * (3) The Node Id portion (LSBs) cannot exceed 24 bits. + */ + locator->block_bits_length = prefix->prefixlen - 24; + locator->node_bits_length = 24; + locator->function_bits_length = func_bit_len; + locator->argument_bits_length = 0; + + if (list_isempty(locator->chunks)) { + chunk = srv6_locator_chunk_alloc(); + chunk->prefix = *prefix; + chunk->proto = 0; + listnode_add(locator->chunks, chunk); + } else { + for (ALL_LIST_ELEMENTS_RO(locator->chunks, node, chunk)) { + uint8_t zero[16] = {0}; + + if (memcmp(&chunk->prefix.prefix, zero, 16) == 0) { + struct zserv *client; + struct listnode *client_node; + + chunk->prefix = *prefix; + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, + client_node, + client)) { + struct srv6_locator *tmp; + + if (client->proto != chunk->proto) + continue; + + srv6_manager_get_locator_chunk_call( + &tmp, client, + locator->name, + VRF_DEFAULT); + } + } + } + } + + zebra_srv6_locator_add(locator); + return CMD_SUCCESS; +} + +static int zebra_sr_config(struct vty *vty) +{ + struct zebra_srv6 *srv6 = zebra_srv6_get_default(); + struct listnode *node; + struct srv6_locator *locator; + char str[256]; + + vty_out(vty, "!\n"); + if (zebra_srv6_is_enable()) { + vty_out(vty, "segment-routing\n"); + vty_out(vty, " srv6\n"); + vty_out(vty, " locators\n"); + for (ALL_LIST_ELEMENTS_RO(srv6->locators, node, locator)) { + inet_ntop(AF_INET6, &locator->prefix.prefix, + str, sizeof(str)); + vty_out(vty, " locator %s\n", locator->name); + vty_out(vty, " prefix %s/%u", str, + locator->prefix.prefixlen); + if (locator->function_bits_length) + vty_out(vty, " func-bits %u", + locator->function_bits_length); + vty_out(vty, "\n"); + vty_out(vty, " exit\n"); + vty_out(vty, " !\n"); + } + vty_out(vty, " exit\n"); + vty_out(vty, " !\n"); + vty_out(vty, " exit\n"); + vty_out(vty, " !\n"); + vty_out(vty, "exit\n"); + vty_out(vty, "!\n"); + } + return 0; +} + +void zebra_srv6_vty_init(void) +{ + /* Install nodes and its default commands */ + install_node(&sr_node); + install_node(&srv6_node); + install_node(&srv6_locs_node); + install_node(&srv6_loc_node); + install_default(SEGMENT_ROUTING_NODE); + install_default(SRV6_NODE); + install_default(SRV6_LOCS_NODE); + install_default(SRV6_LOC_NODE); + + /* Command for change node */ + install_element(CONFIG_NODE, &segment_routing_cmd); + install_element(SEGMENT_ROUTING_NODE, &srv6_cmd); + install_element(SEGMENT_ROUTING_NODE, &no_srv6_cmd); + install_element(SRV6_NODE, &srv6_locators_cmd); + install_element(SRV6_LOCS_NODE, &srv6_locator_cmd); + install_element(SRV6_LOCS_NODE, &no_srv6_locator_cmd); + + /* Command for configuration */ + install_element(SRV6_LOC_NODE, &locator_prefix_cmd); + + /* Command for operation */ + install_element(VIEW_NODE, &show_srv6_locator_cmd); + install_element(VIEW_NODE, &show_srv6_locator_detail_cmd); +} diff --git a/zebra/zebra_srv6_vty.h b/zebra/zebra_srv6_vty.h new file mode 100644 index 0000000..42d6aef --- /dev/null +++ b/zebra/zebra_srv6_vty.h @@ -0,0 +1,25 @@ +/* + * Zebra SRv6 VTY functions + * Copyright (C) 2020 Hiroki Shirokura, LINE Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_SRV6_VTY_H +#define _ZEBRA_SRV6_VTY_H + +extern void zebra_srv6_vty_init(void); + +#endif /* _ZEBRA_SRV6_VTY_H */ diff --git a/zebra/zebra_trace.c b/zebra/zebra_trace.c new file mode 100644 index 0000000..fef5ad2 --- /dev/null +++ b/zebra/zebra_trace.c @@ -0,0 +1,6 @@ +#define TRACEPOINT_CREATE_PROBES +#define TRACEPOINT_DEFINE + +#include <zebra.h> + +#include "zebra_trace.h" diff --git a/zebra/zebra_trace.h b/zebra/zebra_trace.h new file mode 100644 index 0000000..374305f --- /dev/null +++ b/zebra/zebra_trace.h @@ -0,0 +1,173 @@ +/* Tracing for zebra + * + * Copyright (C) 2020 NVIDIA Corporation + * Donald Sharp + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if !defined(__ZEBRA_TRACE_H__) || defined(TRACEPOINT_HEADER_MULTI_READ) +#define __ZEBRA_TRACE_H__ + +#include "lib/trace.h" + +#ifdef HAVE_LTTNG + +#undef TRACEPOINT_PROVIDER +#define TRACEPOINT_PROVIDER frr_zebra + +#undef TRACEPOINT_INCLUDE +#define TRACEPOINT_INCLUDE "zebra/zebra_trace.h" + +#include <lttng/tracepoint.h> + +#include <lib/ns.h> +#include <lib/table.h> + +#include <zebra/zebra_ns.h> + +TRACEPOINT_EVENT( + frr_zebra, + netlink_request_intf_addr, + TP_ARGS(struct nlsock *, netlink_cmd, + int, family, + int, type, + uint32_t, filter_mask), + TP_FIELDS( + ctf_integer_hex(intptr_t, netlink_cmd, netlink_cmd) + ctf_integer(int, family, family) + ctf_integer(int, type, type) + ctf_integer(uint32_t, filter_mask, filter_mask) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_interface, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_nexthop_change, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_interface_addr, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_route_change_read_unicast, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_rule_change, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_tc_qdisc_change, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +TRACEPOINT_EVENT( + frr_zebra, + netlink_tc_class_change, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + + +TRACEPOINT_EVENT( + frr_zebra, + netlink_tc_filter_change, + TP_ARGS( + struct nlmsghdr *, header, + ns_id_t, ns_id, + int, startup), + TP_FIELDS( + ctf_integer_hex(intptr_t, header, header) + ctf_integer(uint32_t, ns_id, ns_id) + ctf_integer(uint32_t, startup, startup) + ) + ) + +#include <lttng/tracepoint-event.h> + +#endif /* HAVE_LTTNG */ + +#endif /* __ZEBRA_TRACE_H__ */ diff --git a/zebra/zebra_vrf.c b/zebra/zebra_vrf.c new file mode 100644 index 0000000..52daf94 --- /dev/null +++ b/zebra/zebra_vrf.c @@ -0,0 +1,653 @@ +/* + * Copyright (C) 2016 CumulusNetworks + * Donald Sharp + * + * This file is part of Quagga + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <zebra.h> + +/* for basename */ +#include <libgen.h> + +#include "log.h" +#include "linklist.h" +#include "command.h" +#include "memory.h" +#include "srcdest_table.h" +#include "vrf.h" +#include "vty.h" + +#include "zebra/zebra_router.h" +#include "zebra/rtadv.h" +#include "zebra/debug.h" +#include "zebra/zapi_msg.h" +#include "zebra/rib.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_rnh.h" +#include "zebra/router-id.h" +#include "zebra/interface.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_netns_notify.h" +#include "zebra/zebra_routemap.h" +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_vrf_clippy.c" +#endif +#include "zebra/table_manager.h" + +static void zebra_vrf_table_create(struct zebra_vrf *zvrf, afi_t afi, + safi_t safi); +static void zebra_rnhtable_node_cleanup(struct route_table *table, + struct route_node *node); + +DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_VRF, "ZEBRA VRF"); +DEFINE_MTYPE_STATIC(ZEBRA, OTHER_TABLE, "Other Table"); + +/* VRF information update. */ +static void zebra_vrf_add_update(struct zebra_vrf *zvrf) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_VRF_ADD %s", zvrf_name(zvrf)); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_vrf_add(client, zvrf); + } +} + +static void zebra_vrf_delete_update(struct zebra_vrf *zvrf) +{ + struct listnode *node, *nnode; + struct zserv *client; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("MESSAGE: ZEBRA_VRF_DELETE %s", zvrf_name(zvrf)); + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + /* Do not send unsolicited messages to synchronous clients. */ + if (client->synchronous) + continue; + + zsend_vrf_delete(client, zvrf); + } +} + +void zebra_vrf_update_all(struct zserv *client) +{ + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + if (vrf->vrf_id != VRF_UNKNOWN) + zsend_vrf_add(client, vrf_info_lookup(vrf->vrf_id)); + } +} + +/* Callback upon creating a new VRF. */ +static int zebra_vrf_new(struct vrf *vrf) +{ + struct zebra_vrf *zvrf; + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("VRF %s created, id %u", vrf->name, vrf->vrf_id); + + zvrf = zebra_vrf_alloc(vrf); + if (!vrf_is_backend_netns()) + zvrf->zns = zebra_ns_lookup(NS_DEFAULT); + + otable_init(&zvrf->other_tables); + + router_id_init(zvrf); + + /* Initiate Table Manager per ZNS */ + table_manager_enable(zvrf); + + return 0; +} + +/* Callback upon enabling a VRF. */ +static int zebra_vrf_enable(struct vrf *vrf) +{ + struct zebra_vrf *zvrf = vrf->info; + struct route_table *table; + afi_t afi; + safi_t safi; + + assert(zvrf); + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("VRF %s id %u is now active", zvrf_name(zvrf), + zvrf_id(zvrf)); + + if (vrf_is_backend_netns()) + zvrf->zns = zebra_ns_lookup((ns_id_t)vrf->vrf_id); + else + zvrf->zns = zebra_ns_lookup(NS_DEFAULT); + + rtadv_vrf_init(zvrf); + + /* Inform clients that the VRF is now active. This is an + * add for the clients. + */ + + zebra_vrf_add_update(zvrf); + /* Allocate tables */ + for (afi = AFI_IP; afi <= AFI_IP6; afi++) { + for (safi = SAFI_UNICAST; safi <= SAFI_MULTICAST; safi++) + zebra_vrf_table_create(zvrf, afi, safi); + + table = route_table_init(); + table->cleanup = zebra_rnhtable_node_cleanup; + zvrf->rnh_table[afi] = table; + + table = route_table_init(); + table->cleanup = zebra_rnhtable_node_cleanup; + zvrf->rnh_table_multicast[afi] = table; + } + + /* Kick off any VxLAN-EVPN processing. */ + zebra_vxlan_vrf_enable(zvrf); + + return 0; +} + +/* Callback upon disabling a VRF. */ +static int zebra_vrf_disable(struct vrf *vrf) +{ + struct zebra_vrf *zvrf = vrf->info; + struct interface *ifp; + afi_t afi; + safi_t safi; + + assert(zvrf); + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("VRF %s id %u is now inactive", zvrf_name(zvrf), + zvrf_id(zvrf)); + + /* Stop any VxLAN-EVPN processing. */ + zebra_vxlan_vrf_disable(zvrf); + + rtadv_vrf_terminate(zvrf); + + /* Inform clients that the VRF is now inactive. This is a + * delete for the clients. + */ + zebra_vrf_delete_update(zvrf); + + /* If asked to retain routes, there's nothing more to do. */ + if (CHECK_FLAG(zvrf->flags, ZEBRA_VRF_RETAIN)) + return 0; + + /* Remove all routes. */ + for (afi = AFI_IP; afi <= AFI_IP6; afi++) { + route_table_finish(zvrf->rnh_table[afi]); + zvrf->rnh_table[afi] = NULL; + route_table_finish(zvrf->rnh_table_multicast[afi]); + zvrf->rnh_table_multicast[afi] = NULL; + + for (safi = SAFI_UNICAST; safi <= SAFI_MULTICAST; safi++) + rib_close_table(zvrf->table[afi][safi]); + } + + /* Cleanup Vxlan, MPLS and PW tables. */ + zebra_vxlan_cleanup_tables(zvrf); + zebra_mpls_cleanup_tables(zvrf); + zebra_pw_exit(zvrf); + + /* Remove link-local IPv4 addresses created for BGP unnumbered peering. + */ + FOR_ALL_INTERFACES (vrf, ifp) + if_nbr_ipv6ll_to_ipv4ll_neigh_del_all(ifp); + + /* clean-up work queues */ + meta_queue_free(zrouter.mq, zvrf); + + /* Cleanup (free) routing tables and NHT tables. */ + for (afi = AFI_IP; afi <= AFI_IP6; afi++) { + /* + * Set the table pointer to NULL as that + * we no-longer need a copy of it, nor do we + * own this data, the zebra_router structure + * owns these tables. Once we've cleaned up the + * table, see rib_close_table above + * we no-longer need this pointer. + */ + for (safi = SAFI_UNICAST; safi <= SAFI_MULTICAST; safi++) { + zebra_router_release_table(zvrf, zvrf->table_id, afi, + safi); + zvrf->table[afi][safi] = NULL; + } + } + + return 0; +} + +static int zebra_vrf_delete(struct vrf *vrf) +{ + struct zebra_vrf *zvrf = vrf->info; + struct other_route_table *otable; + + assert(zvrf); + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("VRF %s id %u deleted", zvrf_name(zvrf), + zvrf_id(zvrf)); + + table_manager_disable(zvrf); + + /* clean-up work queues */ + meta_queue_free(zrouter.mq, zvrf); + + /* Free Vxlan and MPLS. */ + zebra_vxlan_close_tables(zvrf); + zebra_mpls_close_tables(zvrf); + + otable = otable_pop(&zvrf->other_tables); + while (otable) { + zebra_router_release_table(zvrf, otable->table_id, + otable->afi, otable->safi); + XFREE(MTYPE_OTHER_TABLE, otable); + + otable = otable_pop(&zvrf->other_tables); + } + + /* Cleanup EVPN states for vrf */ + zebra_vxlan_vrf_delete(zvrf); + zebra_routemap_vrf_delete(zvrf); + + list_delete_all_node(zvrf->rid_all_sorted_list); + list_delete_all_node(zvrf->rid_lo_sorted_list); + + list_delete_all_node(zvrf->rid6_all_sorted_list); + list_delete_all_node(zvrf->rid6_lo_sorted_list); + + otable_fini(&zvrf->other_tables); + XFREE(MTYPE_ZEBRA_VRF, zvrf); + vrf->info = NULL; + + return 0; +} + +/* Lookup the routing table in a VRF based on both VRF-Id and table-id. + * NOTE: Table-id is relevant on two modes: + * - case VRF backend is default : on default VRF only + * - case VRF backend is netns : on all VRFs + */ +struct route_table *zebra_vrf_lookup_table_with_table_id(afi_t afi, safi_t safi, + vrf_id_t vrf_id, + uint32_t table_id) +{ + struct zebra_vrf *zvrf = vrf_info_lookup(vrf_id); + struct other_route_table ort, *otable; + + if (!zvrf) + return NULL; + + if (afi >= AFI_MAX || safi >= SAFI_MAX) + return NULL; + + if (table_id == zvrf->table_id) + return zebra_vrf_table(afi, safi, vrf_id); + + ort.afi = afi; + ort.safi = safi; + ort.table_id = table_id; + otable = otable_find(&zvrf->other_tables, &ort); + + if (otable) + return otable->table; + + return NULL; +} + +struct route_table *zebra_vrf_get_table_with_table_id(afi_t afi, safi_t safi, + vrf_id_t vrf_id, + uint32_t table_id) +{ + struct zebra_vrf *zvrf = vrf_info_lookup(vrf_id); + struct other_route_table *otable; + struct route_table *table; + + table = zebra_vrf_lookup_table_with_table_id(afi, safi, vrf_id, + table_id); + + if (table) + goto done; + + /* Create it as an `other` table */ + table = zebra_router_get_table(zvrf, table_id, afi, safi); + + otable = XCALLOC(MTYPE_OTHER_TABLE, sizeof(*otable)); + otable->afi = afi; + otable->safi = safi; + otable->table_id = table_id; + otable->table = table; + otable_add(&zvrf->other_tables, otable); + +done: + return table; +} + +static void zebra_rnhtable_node_cleanup(struct route_table *table, + struct route_node *node) +{ + if (node->info) + zebra_free_rnh(node->info); +} + +/* + * Create a routing table for the specific AFI/SAFI in the given VRF. + */ +static void zebra_vrf_table_create(struct zebra_vrf *zvrf, afi_t afi, + safi_t safi) +{ + struct route_node *rn; + struct prefix p; + + assert(!zvrf->table[afi][safi]); + + zvrf->table[afi][safi] = + zebra_router_get_table(zvrf, zvrf->table_id, afi, safi); + + memset(&p, 0, sizeof(p)); + p.family = afi2family(afi); + + rn = srcdest_rnode_get(zvrf->table[afi][safi], &p, NULL); + zebra_rib_create_dest(rn); +} + +/* Allocate new zebra VRF. */ +struct zebra_vrf *zebra_vrf_alloc(struct vrf *vrf) +{ + struct zebra_vrf *zvrf; + + zvrf = XCALLOC(MTYPE_ZEBRA_VRF, sizeof(struct zebra_vrf)); + + zvrf->vrf = vrf; + vrf->info = zvrf; + + zebra_vxlan_init_tables(zvrf); + zebra_mpls_init_tables(zvrf); + zebra_pw_init(zvrf); + zvrf->table_id = RT_TABLE_MAIN; + /* by default table ID is default one */ + return zvrf; +} + +/* Lookup VRF by identifier. */ +struct zebra_vrf *zebra_vrf_lookup_by_id(vrf_id_t vrf_id) +{ + return vrf_info_lookup(vrf_id); +} + +/* Lookup VRF by name. */ +struct zebra_vrf *zebra_vrf_lookup_by_name(const char *name) +{ + struct vrf *vrf; + + if (!name) + name = VRF_DEFAULT_NAME; + + vrf = vrf_lookup_by_name(name); + if (vrf) + return ((struct zebra_vrf *)vrf->info); + + return NULL; +} + +/* Lookup the routing table in an enabled VRF. */ +struct route_table *zebra_vrf_table(afi_t afi, safi_t safi, vrf_id_t vrf_id) +{ + struct zebra_vrf *zvrf = vrf_info_lookup(vrf_id); + + if (!zvrf) + return NULL; + + if (afi >= AFI_MAX || safi >= SAFI_MAX) + return NULL; + + return zvrf->table[afi][safi]; +} + +static int vrf_config_write(struct vty *vty) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + + if (!zvrf) + continue; + + if (zvrf_id(zvrf) == VRF_DEFAULT) { + if (zvrf->l3vni) + vty_out(vty, "vni %u%s\n", zvrf->l3vni, + is_l3vni_for_prefix_routes_only( + zvrf->l3vni) + ? " prefix-routes-only" + : ""); + if (zvrf->zebra_rnh_ip_default_route) + vty_out(vty, "ip nht resolve-via-default\n"); + + if (zvrf->zebra_rnh_ipv6_default_route) + vty_out(vty, "ipv6 nht resolve-via-default\n"); + + if (zvrf->tbl_mgr + && (zvrf->tbl_mgr->start || zvrf->tbl_mgr->end)) + vty_out(vty, "ip table range %u %u\n", + zvrf->tbl_mgr->start, + zvrf->tbl_mgr->end); + } else { + vty_frame(vty, "vrf %s\n", zvrf_name(zvrf)); + if (zvrf->l3vni) + vty_out(vty, " vni %u%s\n", zvrf->l3vni, + is_l3vni_for_prefix_routes_only( + zvrf->l3vni) + ? " prefix-routes-only" + : ""); + zebra_ns_config_write(vty, (struct ns *)vrf->ns_ctxt); + if (zvrf->zebra_rnh_ip_default_route) + vty_out(vty, " ip nht resolve-via-default\n"); + + if (zvrf->zebra_rnh_ipv6_default_route) + vty_out(vty, " ipv6 nht resolve-via-default\n"); + + if (zvrf->tbl_mgr && vrf_is_backend_netns() + && (zvrf->tbl_mgr->start || zvrf->tbl_mgr->end)) + vty_out(vty, " ip table range %u %u\n", + zvrf->tbl_mgr->start, + zvrf->tbl_mgr->end); + } + + + zebra_routemap_config_write_protocol(vty, zvrf); + router_id_write(vty, zvrf); + + if (zvrf_id(zvrf) != VRF_DEFAULT) + vty_endframe(vty, "exit-vrf\n!\n"); + else + vty_out(vty, "!\n"); + } + return 0; +} + +DEFPY (vrf_netns, + vrf_netns_cmd, + "netns NAME$netns_name", + "Attach VRF to a Namespace\n" + "The file name in " NS_RUN_DIR ", or a full pathname\n") +{ + char *pathname = ns_netns_pathname(vty, netns_name); + int ret; + + VTY_DECLVAR_CONTEXT(vrf, vrf); + + if (!pathname) + return CMD_WARNING_CONFIG_FAILED; + + frr_with_privs(&zserv_privs) { + ret = zebra_vrf_netns_handler_create( + vty, vrf, pathname, NS_UNKNOWN, NS_UNKNOWN, NS_UNKNOWN); + } + + return ret; +} + +DEFUN (no_vrf_netns, + no_vrf_netns_cmd, + "no netns [NAME]", + NO_STR + "Detach VRF from a Namespace\n" + "The file name in " NS_RUN_DIR ", or a full pathname\n") +{ + struct ns *ns = NULL; + + VTY_DECLVAR_CONTEXT(vrf, vrf); + + if (!vrf_is_backend_netns()) { + vty_out(vty, "VRF backend is not Netns. Aborting\n"); + return CMD_WARNING_CONFIG_FAILED; + } + if (!vrf->ns_ctxt) { + vty_out(vty, "VRF %s(%u) is not configured with NetNS\n", + vrf->name, vrf->vrf_id); + return CMD_WARNING_CONFIG_FAILED; + } + + ns = (struct ns *)vrf->ns_ctxt; + + ns->vrf_ctxt = NULL; + vrf_disable(vrf); + /* vrf ID from VRF is necessary for Zebra + * so that propagate to other clients is done + */ + ns_delete(ns); + vrf->ns_ctxt = NULL; + return CMD_SUCCESS; +} + +/* if ns_id is different and not VRF_UNKNOWN, + * then update vrf identifier, and enable VRF + */ +static void vrf_update_vrf_id(ns_id_t ns_id, void *opaqueptr) +{ + ns_id_t vrf_id = (vrf_id_t)ns_id; + vrf_id_t old_vrf_id; + struct vrf *vrf = (struct vrf *)opaqueptr; + + if (!vrf) + return; + old_vrf_id = vrf->vrf_id; + if (vrf_id == vrf->vrf_id) + return; + if (vrf->vrf_id != VRF_UNKNOWN) + RB_REMOVE(vrf_id_head, &vrfs_by_id, vrf); + vrf->vrf_id = vrf_id; + RB_INSERT(vrf_id_head, &vrfs_by_id, vrf); + if (old_vrf_id == VRF_UNKNOWN) + vrf_enable(vrf); +} + +int zebra_vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, + char *pathname, ns_id_t ns_id, + ns_id_t internal_ns_id, + ns_id_t rel_def_ns_id) +{ + struct ns *ns = NULL; + + if (!vrf) + return CMD_WARNING_CONFIG_FAILED; + if (vrf->vrf_id != VRF_UNKNOWN && vrf->ns_ctxt == NULL) { + if (vty) + vty_out(vty, + "VRF %u is already configured with VRF %s\n", + vrf->vrf_id, vrf->name); + else + zlog_info("VRF %u is already configured with VRF %s", + vrf->vrf_id, vrf->name); + return CMD_WARNING_CONFIG_FAILED; + } + if (vrf->ns_ctxt != NULL) { + ns = (struct ns *)vrf->ns_ctxt; + if (!strcmp(ns->name, pathname)) { + if (vty) + vty_out(vty, + "VRF %u already configured with NETNS %s\n", + vrf->vrf_id, ns->name); + else + zlog_info( + "VRF %u already configured with NETNS %s", + vrf->vrf_id, ns->name); + return CMD_WARNING; + } + } + ns = ns_lookup_name(pathname); + if (ns && ns->vrf_ctxt) { + struct vrf *vrf2 = (struct vrf *)ns->vrf_ctxt; + + if (vrf2 == vrf) + return CMD_SUCCESS; + if (vty) + vty_out(vty, + "NS %s is already configured with VRF %u(%s)\n", + ns->name, vrf2->vrf_id, vrf2->name); + else + zlog_info("NS %s is already configured with VRF %u(%s)", + ns->name, vrf2->vrf_id, vrf2->name); + return CMD_WARNING_CONFIG_FAILED; + } + ns = ns_get_created(ns, pathname, ns_id); + ns->internal_ns_id = internal_ns_id; + ns->relative_default_ns = rel_def_ns_id; + ns->vrf_ctxt = (void *)vrf; + vrf->ns_ctxt = (void *)ns; + /* update VRF netns NAME */ + strlcpy(vrf->data.l.netns_name, basename(pathname), NS_NAMSIZ); + + if (!ns_enable(ns, vrf_update_vrf_id)) { + if (vty) + vty_out(vty, "Can not associate NS %u with NETNS %s\n", + ns->ns_id, ns->name); + else + zlog_info("Can not associate NS %u with NETNS %s", + ns->ns_id, ns->name); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +/* Zebra VRF initialization. */ +void zebra_vrf_init(void) +{ + vrf_init(zebra_vrf_new, zebra_vrf_enable, zebra_vrf_disable, + zebra_vrf_delete); + + hook_register(zserv_client_close, release_daemon_table_chunks); + + vrf_cmd_init(vrf_config_write); + + if (vrf_is_backend_netns() && ns_have_netns()) { + /* Install NS commands. */ + install_element(VRF_NODE, &vrf_netns_cmd); + install_element(VRF_NODE, &no_vrf_netns_cmd); + } +} diff --git a/zebra/zebra_vrf.h b/zebra/zebra_vrf.h new file mode 100644 index 0000000..02e3c19 --- /dev/null +++ b/zebra/zebra_vrf.h @@ -0,0 +1,275 @@ +/* + * Zebra Vrf Header + * Copyright (C) 2016 Cumulus Networks + * Donald Sharp + * + * This file is part of Quagga. + * + * Quagga is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * Quagga is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if !defined(__ZEBRA_VRF_H__) +#define __ZEBRA_VRF_H__ + +#include "vxlan.h" + +#include <zebra/zebra_ns.h> +#include <zebra/zebra_pw.h> +#include <zebra/rtadv.h> +#include <lib/vxlan.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* MPLS (Segment Routing) global block */ +struct mpls_srgb { + uint32_t start_label; + uint32_t end_label; +}; + +struct zebra_rmap { + char *name; + struct route_map *map; +}; + +PREDECL_RBTREE_UNIQ(otable); + +struct other_route_table { + struct otable_item next; + + afi_t afi; + safi_t safi; + uint32_t table_id; + + struct route_table *table; +}; + +/* Routing table instance. */ +struct zebra_vrf { + /* Back pointer */ + struct vrf *vrf; + + /* Description. */ + char *desc; + + /* FIB identifier. */ + uint8_t fib_id; + + /* Flags. */ + uint16_t flags; +#define ZEBRA_VRF_RETAIN (1 << 0) +#define ZEBRA_PIM_SEND_VXLAN_SG (1 << 1) + + uint32_t table_id; + + /* Routing table. */ + struct route_table *table[AFI_MAX][SAFI_MAX]; + + /* Recursive Nexthop table */ + struct route_table *rnh_table[AFI_MAX]; + struct route_table *rnh_table_multicast[AFI_MAX]; + + struct otable_head other_tables; + + /* 2nd pointer type used primarily to quell a warning on + * ALL_LIST_ELEMENTS_RO + */ + struct list _rid_all_sorted_list; + struct list _rid_lo_sorted_list; + struct list *rid_all_sorted_list; + struct list *rid_lo_sorted_list; + struct prefix rid_user_assigned; + struct list _rid6_all_sorted_list; + struct list _rid6_lo_sorted_list; + struct list *rid6_all_sorted_list; + struct list *rid6_lo_sorted_list; + struct prefix rid6_user_assigned; + + /* + * Back pointer to the owning namespace. + */ + struct zebra_ns *zns; + + /* MPLS Label to handle L3VPN <-> vrf popping */ + mpls_label_t label[AFI_MAX]; + uint8_t label_proto[AFI_MAX]; + + /* MPLS static LSP config table */ + struct hash *slsp_table; + + /* MPLS label forwarding table */ + struct hash *lsp_table; + + /* MPLS FEC binding table */ + struct route_table *fec_table[AFI_MAX]; + + /* MPLS Segment Routing Global block */ + struct mpls_srgb mpls_srgb; + + /* Pseudowires. */ + struct zebra_pw_head pseudowires; + struct zebra_static_pw_head static_pseudowires; + + struct zebra_rmap proto_rm[AFI_MAX][ZEBRA_ROUTE_MAX + 1]; + struct zebra_rmap nht_rm[AFI_MAX][ZEBRA_ROUTE_MAX + 1]; + + /* MPLS processing flags */ + uint16_t mpls_flags; +#define MPLS_FLAG_SCHEDULE_LSPS (1 << 0) + + /* + * EVPN hash table. Only in the EVPN instance. + */ + struct hash *evpn_table; + + /* + * Whether EVPN is enabled or not. Only in the EVPN instance. + */ + int advertise_all_vni; + + /* + * Whether we are advertising g/w macip in EVPN or not. + * Only in the EVPN instance. + */ + int advertise_gw_macip; + + int advertise_svi_macip; + + /* l3-vni info */ + vni_t l3vni; + + /* pim mroutes installed for vxlan flooding */ + struct hash *vxlan_sg_table; + + bool dup_addr_detect; + + int dad_time; + uint32_t dad_max_moves; + bool dad_freeze; + uint32_t dad_freeze_time; + + /* + * Flooding mechanism for BUM packets for VxLAN-EVPN. + */ + enum vxlan_flood_control vxlan_flood_ctrl; + + /* Install stats */ + uint64_t installs; + uint64_t removals; + uint64_t installs_queued; + uint64_t removals_queued; + uint64_t neigh_updates; + uint64_t lsp_installs_queued; + uint64_t lsp_removals_queued; + uint64_t lsp_installs; + uint64_t lsp_removals; + + struct table_manager *tbl_mgr; + + struct rtadv rtadv; + + bool zebra_rnh_ip_default_route; + bool zebra_rnh_ipv6_default_route; +}; +#define PROTO_RM_NAME(zvrf, afi, rtype) zvrf->proto_rm[afi][rtype].name +#define NHT_RM_NAME(zvrf, afi, rtype) zvrf->nht_rm[afi][rtype].name +#define PROTO_RM_MAP(zvrf, afi, rtype) zvrf->proto_rm[afi][rtype].map +#define NHT_RM_MAP(zvrf, afi, rtype) zvrf->nht_rm[afi][rtype].map + +/* + * special macro to allow us to get the correct zebra_vrf + */ +#define ZEBRA_DECLVAR_CONTEXT_VRF(vrfptr, zvrfptr) \ + VTY_DECLVAR_CONTEXT_VRF(vrfptr); \ + struct zebra_vrf *zvrfptr = vrfptr->info; \ + MACRO_REQUIRE_SEMICOLON() /* end */ + +static inline vrf_id_t zvrf_id(struct zebra_vrf *zvrf) +{ + if (!zvrf || !zvrf->vrf) + return VRF_DEFAULT; + return zvrf->vrf->vrf_id; +} + +static inline const char *zvrf_ns_name(struct zebra_vrf *zvrf) +{ + if (!zvrf->vrf || !zvrf->vrf->ns_ctxt) + return NULL; + return ns_get_name((struct ns *)zvrf->vrf->ns_ctxt); +} + +static inline const char *zvrf_name(struct zebra_vrf *zvrf) +{ + if (!zvrf || !zvrf->vrf) + return "Unknown"; + return zvrf->vrf->name; +} + +static inline bool zvrf_is_active(struct zebra_vrf *zvrf) +{ + return zvrf->vrf->status & VRF_ACTIVE; +} + +static inline int +zvrf_other_table_compare_func(const struct other_route_table *a, + const struct other_route_table *b) +{ + if (a->afi != b->afi) + return a->afi - b->afi; + + if (a->safi != b->safi) + return a->safi - b->safi; + + if (a->table_id != b->table_id) + return a->table_id - b->table_id; + + return 0; +} + +DECLARE_RBTREE_UNIQ(otable, struct other_route_table, next, + zvrf_other_table_compare_func); + +extern struct route_table * +zebra_vrf_lookup_table_with_table_id(afi_t afi, safi_t safi, vrf_id_t vrf_id, + uint32_t table_id); +extern struct route_table *zebra_vrf_get_table_with_table_id(afi_t afi, + safi_t safi, + vrf_id_t vrf_id, + uint32_t table_id); + +extern void zebra_vrf_update_all(struct zserv *client); +extern struct zebra_vrf *zebra_vrf_lookup_by_id(vrf_id_t vrf_id); +extern struct zebra_vrf *zebra_vrf_lookup_by_name(const char *); +extern struct zebra_vrf *zebra_vrf_alloc(struct vrf *vrf); +extern struct route_table *zebra_vrf_table(afi_t, safi_t, vrf_id_t); + +/* + * API to associate a VRF with a NETNS. + * Called either from vty or through discovery. + */ +extern int zebra_vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, + char *pathname, ns_id_t ext_ns_id, + ns_id_t ns_id, ns_id_t rel_def_ns_id); + +extern void zebra_vrf_init(void); + +extern void zebra_rtable_node_cleanup(struct route_table *table, + struct route_node *node); + +#ifdef __cplusplus +} +#endif + +#endif /* ZEBRA_VRF_H */ diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c new file mode 100644 index 0000000..525e036 --- /dev/null +++ b/zebra/zebra_vty.c @@ -0,0 +1,4623 @@ +/* Zebra VTY functions + * Copyright (C) 2002 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#include "memory.h" +#include "if.h" +#include "prefix.h" +#include "command.h" +#include "table.h" +#include "rib.h" +#include "nexthop.h" +#include "vrf.h" +#include "linklist.h" +#include "mpls.h" +#include "routemap.h" +#include "srcdest_table.h" +#include "vxlan.h" +#include "termtable.h" + +#include "zebra/zebra_router.h" +#include "zebra/zserv.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_rnh.h" +#include "zebra/redistribute.h" +#include "zebra/zebra_routemap.h" +#include "lib/json.h" +#include "lib/route_opaque.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_vty_clippy.c" +#endif +#include "zebra/zserv.h" +#include "zebra/router-id.h" +#include "zebra/ipforward.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_pbr.h" +#include "zebra/zebra_nhg.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/interface.h" +#include "northbound_cli.h" +#include "zebra/zebra_nb.h" +#include "zebra/kernel_netlink.h" +#include "zebra/if_netlink.h" +#include "zebra/table_manager.h" +#include "zebra/zebra_script.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_neigh.h" + +/* context to manage dumps in multiple tables or vrfs */ +struct route_show_ctx { + bool multi; /* dump multiple tables or vrf */ + bool header_done; /* common header already displayed */ +}; + +static int do_show_ip_route(struct vty *vty, const char *vrf_name, afi_t afi, + safi_t safi, bool use_fib, bool use_json, + route_tag_t tag, + const struct prefix *longer_prefix_p, + bool supernets_only, int type, + unsigned short ospf_instance_id, uint32_t tableid, + bool show_ng, struct route_show_ctx *ctx); +static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, + int mcast, bool use_fib, bool show_ng); +static void vty_show_ip_route_summary(struct vty *vty, + struct route_table *table, bool use_json); +static void vty_show_ip_route_summary_prefix(struct vty *vty, + struct route_table *table, + bool use_json); +/* Helper api to format a nexthop in the 'detailed' output path. */ +static void show_nexthop_detail_helper(struct vty *vty, + const struct route_entry *re, + const struct nexthop *nexthop, + bool is_backup); + +static void show_ip_route_dump_vty(struct vty *vty, struct route_table *table); +static void show_ip_route_nht_dump(struct vty *vty, struct nexthop *nexthop, + struct route_entry *re, unsigned int num); + +DEFUN (ip_multicast_mode, + ip_multicast_mode_cmd, + "ip multicast rpf-lookup-mode <urib-only|mrib-only|mrib-then-urib|lower-distance|longer-prefix>", + IP_STR + "Multicast options\n" + "RPF lookup behavior\n" + "Lookup in unicast RIB only\n" + "Lookup in multicast RIB only\n" + "Try multicast RIB first, fall back to unicast RIB\n" + "Lookup both, use entry with lower distance\n" + "Lookup both, use entry with longer prefix\n") +{ + char *mode = argv[3]->text; + + if (strmatch(mode, "urib-only")) + multicast_mode_ipv4_set(MCAST_URIB_ONLY); + else if (strmatch(mode, "mrib-only")) + multicast_mode_ipv4_set(MCAST_MRIB_ONLY); + else if (strmatch(mode, "mrib-then-urib")) + multicast_mode_ipv4_set(MCAST_MIX_MRIB_FIRST); + else if (strmatch(mode, "lower-distance")) + multicast_mode_ipv4_set(MCAST_MIX_DISTANCE); + else if (strmatch(mode, "longer-prefix")) + multicast_mode_ipv4_set(MCAST_MIX_PFXLEN); + else { + vty_out(vty, "Invalid mode specified\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (no_ip_multicast_mode, + no_ip_multicast_mode_cmd, + "no ip multicast rpf-lookup-mode [<urib-only|mrib-only|mrib-then-urib|lower-distance|longer-prefix>]", + NO_STR + IP_STR + "Multicast options\n" + "RPF lookup behavior\n" + "Lookup in unicast RIB only\n" + "Lookup in multicast RIB only\n" + "Try multicast RIB first, fall back to unicast RIB\n" + "Lookup both, use entry with lower distance\n" + "Lookup both, use entry with longer prefix\n") +{ + multicast_mode_ipv4_set(MCAST_NO_CONFIG); + return CMD_SUCCESS; +} + + +DEFUN (show_ip_rpf, + show_ip_rpf_cmd, + "show ip rpf [json]", + SHOW_STR + IP_STR + "Display RPF information for multicast source\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + struct route_show_ctx ctx = { + .multi = false, + }; + + return do_show_ip_route(vty, VRF_DEFAULT_NAME, AFI_IP, SAFI_MULTICAST, + false, uj, 0, NULL, false, 0, 0, 0, false, + &ctx); +} + +DEFUN (show_ip_rpf_addr, + show_ip_rpf_addr_cmd, + "show ip rpf A.B.C.D", + SHOW_STR + IP_STR + "Display RPF information for multicast source\n" + "IP multicast source address (e.g. 10.0.0.0)\n") +{ + int idx_ipv4 = 3; + struct in_addr addr; + struct route_node *rn; + struct route_entry *re; + int ret; + + ret = inet_aton(argv[idx_ipv4]->arg, &addr); + if (ret == 0) { + vty_out(vty, "%% Malformed address\n"); + return CMD_WARNING; + } + + re = rib_match_ipv4_multicast(VRF_DEFAULT, addr, &rn); + + if (re) + vty_show_ip_route_detail(vty, rn, 1, false, false); + else + vty_out(vty, "%% No match for RPF lookup\n"); + + return CMD_SUCCESS; +} + +static char re_status_output_char(const struct route_entry *re, + const struct nexthop *nhop, + bool is_fib) +{ + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) { + bool star_p = false; + + if (nhop && + !CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_DUPLICATE) && + !CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE)) { + /* More-specific test for 'fib' output */ + if (is_fib) { + star_p = !!CHECK_FLAG(nhop->flags, + NEXTHOP_FLAG_FIB); + } else + star_p = true; + } + + if (zrouter.asic_offloaded && + CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED)) + return 'q'; + + if (zrouter.asic_offloaded + && CHECK_FLAG(re->flags, ZEBRA_FLAG_TRAPPED)) + return 't'; + + if (zrouter.asic_offloaded + && CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED)) + return 'o'; + + if (star_p) + return '*'; + else + return ' '; + } + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED)) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED)) + return 'q'; + + return 'r'; + } + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED)) + return 'q'; + + return ' '; +} + +/* + * Show backup nexthop info, in the 'detailed' output path + */ +static void show_nh_backup_helper(struct vty *vty, + const struct route_entry *re, + const struct nexthop *nexthop) +{ + const struct nexthop *start, *backup, *temp; + int i, idx; + + /* Double-check that there _is_ a backup */ + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP) || + re->nhe->backup_info == NULL || re->nhe->backup_info->nhe == NULL || + re->nhe->backup_info->nhe->nhg.nexthop == NULL) + return; + + /* Locate the backup nexthop(s) */ + start = re->nhe->backup_info->nhe->nhg.nexthop; + for (i = 0; i < nexthop->backup_num; i++) { + /* Format the backup(s) (indented) */ + backup = start; + for (idx = 0; idx < nexthop->backup_idx[i]; idx++) { + backup = backup->next; + if (backup == NULL) + break; + } + + /* It's possible for backups to be recursive too, + * so walk the recursive resolution list if present. + */ + temp = backup; + while (backup) { + vty_out(vty, " "); + show_nexthop_detail_helper(vty, re, backup, + true /*backup*/); + vty_out(vty, "\n"); + + if (backup->resolved && temp == backup) + backup = backup->resolved; + else + backup = nexthop_next(backup); + + if (backup == temp->next) + break; + } + } + +} + +/* + * Helper api to format output for a nexthop, used in the 'detailed' + * output path. + */ +static void show_nexthop_detail_helper(struct vty *vty, + const struct route_entry *re, + const struct nexthop *nexthop, + bool is_backup) +{ + char addrstr[32]; + char buf[MPLS_LABEL_STRLEN]; + int i; + + if (is_backup) + vty_out(vty, " b%s", + nexthop->rparent ? " " : ""); + else + vty_out(vty, " %c%s", + re_status_output_char(re, nexthop, false), + nexthop->rparent ? " " : ""); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out(vty, " %pI4", + &nexthop->gate.ipv4); + if (nexthop->ifindex) + vty_out(vty, ", via %s", + ifindex2ifname( + nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " %s", + inet_ntop(AF_INET6, &nexthop->gate.ipv6, + buf, sizeof(buf))); + if (nexthop->ifindex) + vty_out(vty, ", via %s", + ifindex2ifname( + nexthop->ifindex, + nexthop->vrf_id)); + break; + + case NEXTHOP_TYPE_IFINDEX: + vty_out(vty, " directly connected, %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + vty_out(vty, " unreachable"); + switch (nexthop->bh_type) { + case BLACKHOLE_REJECT: + vty_out(vty, " (ICMP unreachable)"); + break; + case BLACKHOLE_ADMINPROHIB: + vty_out(vty, + " (ICMP admin-prohibited)"); + break; + case BLACKHOLE_NULL: + vty_out(vty, " (blackhole)"); + break; + case BLACKHOLE_UNSPEC: + break; + } + break; + } + + if (re->vrf_id != nexthop->vrf_id) { + struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id); + + vty_out(vty, "(vrf %s)", VRF_LOGNAME(vrf)); + } + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE)) + vty_out(vty, " (duplicate nexthop removed)"); + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + vty_out(vty, " inactive"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + vty_out(vty, " onlink"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_LINKDOWN)) + vty_out(vty, " linkdown"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + vty_out(vty, " (recursive)"); + + /* Source specified? */ + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (nexthop->src.ipv4.s_addr) { + if (inet_ntop(AF_INET, &nexthop->src.ipv4, + addrstr, sizeof(addrstr))) + vty_out(vty, ", src %s", + addrstr); + } + break; + + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (!IPV6_ADDR_SAME(&nexthop->src.ipv6, + &in6addr_any)) { + if (inet_ntop(AF_INET6, &nexthop->src.ipv6, + addrstr, sizeof(addrstr))) + vty_out(vty, ", src %s", + addrstr); + } + break; + + default: + break; + } + + if (re->nexthop_mtu) + vty_out(vty, ", mtu %u", re->nexthop_mtu); + + /* Label information */ + if (nexthop->nh_label && nexthop->nh_label->num_labels) { + vty_out(vty, ", label %s", + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, buf, + sizeof(buf), 1 /*pretty*/)); + } + + if (nexthop->weight) + vty_out(vty, ", weight %u", nexthop->weight); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + vty_out(vty, ", backup %d", nexthop->backup_idx[0]); + + for (i = 1; i < nexthop->backup_num; i++) + vty_out(vty, ",%d", nexthop->backup_idx[i]); + } +} + +static void zebra_show_ip_route_opaque(struct vty *vty, struct route_entry *re, + struct json_object *json) +{ + struct bgp_zebra_opaque bzo = {}; + struct ospf_zebra_opaque ozo = {}; + + if (!re->opaque) + return; + + switch (re->type) { + case ZEBRA_ROUTE_SHARP: + if (json) + json_object_string_add(json, "opaque", + (char *)re->opaque->data); + else + vty_out(vty, " Opaque Data: %s", + (char *)re->opaque->data); + break; + + case ZEBRA_ROUTE_BGP: + memcpy(&bzo, re->opaque->data, re->opaque->length); + + if (json) { + json_object_string_add(json, "asPath", bzo.aspath); + json_object_string_add(json, "communities", + bzo.community); + json_object_string_add(json, "largeCommunities", + bzo.lcommunity); + json_object_string_add(json, "selectionReason", + bzo.selection_reason); + } else { + vty_out(vty, " AS-Path : %s\n", bzo.aspath); + + if (bzo.community[0] != '\0') + vty_out(vty, " Communities : %s\n", + bzo.community); + + if (bzo.lcommunity[0] != '\0') + vty_out(vty, " Large-Communities: %s\n", + bzo.lcommunity); + + vty_out(vty, " Selection reason : %s\n", + bzo.selection_reason); + } + break; + case ZEBRA_ROUTE_OSPF: + case ZEBRA_ROUTE_OSPF6: + memcpy(&ozo, re->opaque->data, re->opaque->length); + + if (json) { + json_object_string_add(json, "ospfPathType", + ozo.path_type); + if (ozo.area_id[0] != '\0') + json_object_string_add(json, "ospfAreaId", + ozo.area_id); + if (ozo.tag[0] != '\0') + json_object_string_add(json, "ospfTag", + ozo.tag); + } else { + vty_out(vty, " OSPF path type : %s\n", + ozo.path_type); + if (ozo.area_id[0] != '\0') + vty_out(vty, " OSPF area ID : %s\n", + ozo.area_id); + if (ozo.tag[0] != '\0') + vty_out(vty, " OSPF tag : %s\n", + ozo.tag); + } + break; + default: + break; + } +} + +static void uptime2str(time_t uptime, char *buf, size_t bufsize) +{ + time_t cur; + + cur = monotime(NULL); + cur -= uptime; + + frrtime_to_interval(cur, buf, bufsize); +} + +/* New RIB. Detailed information for IPv4 route. */ +static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, + int mcast, bool use_fib, bool show_ng) +{ + struct route_entry *re; + struct nexthop *nexthop; + char buf[SRCDEST2STR_BUFFER]; + struct zebra_vrf *zvrf; + rib_dest_t *dest; + + dest = rib_dest_from_rnode(rn); + + RNODE_FOREACH_RE (rn, re) { + /* + * If re not selected for forwarding, skip re + * for "show ip/ipv6 fib <prefix>" + */ + if (use_fib && re != dest->selected_fib) + continue; + + const char *mcast_info = ""; + if (mcast) { + struct rib_table_info *info = + srcdest_rnode_table_info(rn); + mcast_info = (info->safi == SAFI_MULTICAST) + ? " using Multicast RIB" + : " using Unicast RIB"; + } + + vty_out(vty, "Routing entry for %s%s\n", + srcdest_rnode2str(rn, buf, sizeof(buf)), mcast_info); + vty_out(vty, " Known via \"%s", zebra_route_string(re->type)); + if (re->instance) + vty_out(vty, "[%d]", re->instance); + vty_out(vty, "\""); + vty_out(vty, ", distance %u, metric %u", re->distance, + re->metric); + if (re->tag) { + vty_out(vty, ", tag %u", re->tag); +#if defined(SUPPORT_REALMS) + if (re->tag > 0 && re->tag <= 255) + vty_out(vty, "(realm)"); +#endif + } + if (re->mtu) + vty_out(vty, ", mtu %u", re->mtu); + if (re->vrf_id != VRF_DEFAULT) { + zvrf = vrf_info_lookup(re->vrf_id); + vty_out(vty, ", vrf %s", zvrf_name(zvrf)); + } + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) + vty_out(vty, ", best"); + vty_out(vty, "\n"); + + uptime2str(re->uptime, buf, sizeof(buf)); + + vty_out(vty, " Last update %s ago\n", buf); + + if (show_ng) { + vty_out(vty, " Nexthop Group ID: %u\n", re->nhe_id); + if (re->nhe_installed_id != 0 + && re->nhe_id != re->nhe_installed_id) + vty_out(vty, + " Installed Nexthop Group ID: %u\n", + re->nhe_installed_id); + } + + for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) { + /* Use helper to format each nexthop */ + show_nexthop_detail_helper(vty, re, nexthop, + false /*not backup*/); + vty_out(vty, "\n"); + + /* Include backup(s), if present */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) + show_nh_backup_helper(vty, re, nexthop); + } + zebra_show_ip_route_opaque(vty, re, NULL); + + vty_out(vty, "\n"); + } +} + +/* + * Helper for nexthop output, used in the 'show ip route' path + */ +static void show_route_nexthop_helper(struct vty *vty, + const struct route_entry *re, + const struct nexthop *nexthop) +{ + char buf[MPLS_LABEL_STRLEN]; + int i; + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out(vty, " via %pI4", &nexthop->gate.ipv4); + if (nexthop->ifindex) + vty_out(vty, ", %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " via %s", + inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf, + sizeof(buf))); + if (nexthop->ifindex) + vty_out(vty, ", %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + + case NEXTHOP_TYPE_IFINDEX: + vty_out(vty, " is directly connected, %s", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + vty_out(vty, " unreachable"); + switch (nexthop->bh_type) { + case BLACKHOLE_REJECT: + vty_out(vty, " (ICMP unreachable)"); + break; + case BLACKHOLE_ADMINPROHIB: + vty_out(vty, " (ICMP admin-prohibited)"); + break; + case BLACKHOLE_NULL: + vty_out(vty, " (blackhole)"); + break; + case BLACKHOLE_UNSPEC: + break; + } + break; + } + + if ((re == NULL || (nexthop->vrf_id != re->vrf_id))) + vty_out(vty, " (vrf %s)", vrf_id_to_name(nexthop->vrf_id)); + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + vty_out(vty, " inactive"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + vty_out(vty, " onlink"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_LINKDOWN)) + vty_out(vty, " linkdown"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + vty_out(vty, " (recursive)"); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (nexthop->src.ipv4.s_addr) { + if (inet_ntop(AF_INET, &nexthop->src.ipv4, buf, + sizeof(buf))) + vty_out(vty, ", src %s", buf); + /* SR-TE information */ + if (nexthop->srte_color) + vty_out(vty, ", SR-TE color %u", + nexthop->srte_color); + } + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (!IPV6_ADDR_SAME(&nexthop->src.ipv6, &in6addr_any)) { + if (inet_ntop(AF_INET6, &nexthop->src.ipv6, buf, + sizeof(buf))) + vty_out(vty, ", src %s", buf); + } + break; + default: + break; + } + + /* Label information */ + if (nexthop->nh_label && nexthop->nh_label->num_labels) { + vty_out(vty, ", label %s", + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, buf, + sizeof(buf), 1)); + } + + if (nexthop->nh_srv6) { + seg6local_context2str(buf, sizeof(buf), + &nexthop->nh_srv6->seg6local_ctx, + nexthop->nh_srv6->seg6local_action); + vty_out(vty, ", seg6local %s %s", seg6local_action2str( + nexthop->nh_srv6->seg6local_action), buf); + + inet_ntop(AF_INET6, &nexthop->nh_srv6->seg6_segs, buf, + sizeof(buf)); + vty_out(vty, ", seg6 %s", buf); + } + + if (nexthop->weight) + vty_out(vty, ", weight %u", nexthop->weight); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + vty_out(vty, ", backup %d", nexthop->backup_idx[0]); + + for (i = 1; i < nexthop->backup_num; i++) + vty_out(vty, ",%d", nexthop->backup_idx[i]); + } +} + +/* + * Render a nexthop into a json object; the caller allocates and owns + * the json object memory. + */ +static void show_nexthop_json_helper(json_object *json_nexthop, + const struct nexthop *nexthop, + const struct route_entry *re) +{ + char buf[SRCDEST2STR_BUFFER]; + json_object *json_labels = NULL; + json_object *json_backups = NULL; + json_object *json_seg6local = NULL; + json_object *json_seg6 = NULL; + int i; + + json_object_int_add(json_nexthop, "flags", + nexthop->flags); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE)) + json_object_boolean_true_add(json_nexthop, + "duplicate"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + json_object_boolean_true_add(json_nexthop, + "fib"); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + json_object_string_addf(json_nexthop, "ip", "%pI4", + &nexthop->gate.ipv4); + json_object_string_add(json_nexthop, "afi", + "ipv4"); + + if (nexthop->ifindex) { + json_object_int_add(json_nexthop, + "interfaceIndex", + nexthop->ifindex); + json_object_string_add( + json_nexthop, "interfaceName", + ifindex2ifname( + nexthop->ifindex, + nexthop->vrf_id)); + } + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + json_object_string_addf(json_nexthop, "ip", "%pI6", + &nexthop->gate.ipv6); + json_object_string_add(json_nexthop, "afi", + "ipv6"); + + if (nexthop->ifindex) { + json_object_int_add(json_nexthop, + "interfaceIndex", + nexthop->ifindex); + json_object_string_add( + json_nexthop, "interfaceName", + ifindex2ifname( + nexthop->ifindex, + nexthop->vrf_id)); + } + break; + + case NEXTHOP_TYPE_IFINDEX: + json_object_boolean_true_add( + json_nexthop, "directlyConnected"); + json_object_int_add(json_nexthop, + "interfaceIndex", + nexthop->ifindex); + json_object_string_add( + json_nexthop, "interfaceName", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + json_object_boolean_true_add(json_nexthop, + "unreachable"); + switch (nexthop->bh_type) { + case BLACKHOLE_REJECT: + json_object_boolean_true_add( + json_nexthop, "reject"); + break; + case BLACKHOLE_ADMINPROHIB: + json_object_boolean_true_add( + json_nexthop, + "admin-prohibited"); + break; + case BLACKHOLE_NULL: + json_object_boolean_true_add( + json_nexthop, "blackhole"); + break; + case BLACKHOLE_UNSPEC: + break; + } + break; + } + + /* This nexthop is a resolver for the parent nexthop. + * Set resolver flag for better clarity and delimiter + * in flat list of nexthops in json. + */ + if (nexthop->rparent) + json_object_boolean_true_add(json_nexthop, "resolver"); + + if (nexthop->vrf_id != re->vrf_id) + json_object_string_add(json_nexthop, "vrf", + vrf_id_to_name(nexthop->vrf_id)); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE)) + json_object_boolean_true_add(json_nexthop, + "duplicate"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + json_object_boolean_true_add(json_nexthop, + "active"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) + json_object_boolean_true_add(json_nexthop, + "onLink"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_LINKDOWN)) + json_object_boolean_true_add(json_nexthop, "linkDown"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + json_object_boolean_true_add(json_nexthop, + "recursive"); + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + json_backups = json_object_new_array(); + for (i = 0; i < nexthop->backup_num; i++) { + json_object_array_add( + json_backups, + json_object_new_int(nexthop->backup_idx[i])); + } + + json_object_object_add(json_nexthop, "backupIndex", + json_backups); + } + + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (nexthop->src.ipv4.s_addr) { + if (inet_ntop(AF_INET, + &nexthop->src.ipv4, buf, + sizeof(buf))) + json_object_string_add( + json_nexthop, "source", + buf); + } + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + if (!IPV6_ADDR_SAME(&nexthop->src.ipv6, + &in6addr_any)) { + if (inet_ntop(AF_INET6, + &nexthop->src.ipv6, buf, + sizeof(buf))) + json_object_string_add( + json_nexthop, "source", + buf); + } + break; + default: + break; + } + + if (nexthop->nh_label + && nexthop->nh_label->num_labels) { + json_labels = json_object_new_array(); + + for (int label_index = 0; + label_index + < nexthop->nh_label->num_labels; + label_index++) + json_object_array_add( + json_labels, + json_object_new_int( + nexthop->nh_label->label + [label_index])); + + json_object_object_add(json_nexthop, "labels", + json_labels); + } + + if (nexthop->weight) + json_object_int_add(json_nexthop, "weight", + nexthop->weight); + + if (nexthop->srte_color) + json_object_int_add(json_nexthop, "srteColor", + nexthop->srte_color); + + if (nexthop->nh_srv6) { + json_seg6local = json_object_new_object(); + json_object_string_add( + json_seg6local, "action", seg6local_action2str( + nexthop->nh_srv6->seg6local_action)); + json_object_object_add(json_nexthop, "seg6local", + json_seg6local); + + json_seg6 = json_object_new_object(); + inet_ntop(AF_INET6, &nexthop->nh_srv6->seg6_segs, buf, + sizeof(buf)); + json_object_string_add(json_seg6, "segs", buf); + json_object_object_add(json_nexthop, "seg6", json_seg6); + } +} + +static void vty_show_ip_route(struct vty *vty, struct route_node *rn, + struct route_entry *re, json_object *json, + bool is_fib, bool show_ng) +{ + const struct nexthop *nexthop; + int len = 0; + char buf[SRCDEST2STR_BUFFER]; + json_object *json_nexthops = NULL; + json_object *json_nexthop = NULL; + json_object *json_route = NULL; + const rib_dest_t *dest = rib_dest_from_rnode(rn); + const struct nexthop_group *nhg; + char up_str[MONOTIME_STRLEN]; + bool first_p = true; + bool nhg_from_backup = false; + + uptime2str(re->uptime, up_str, sizeof(up_str)); + + /* If showing fib information, use the fib view of the + * nexthops. + */ + if (is_fib) + nhg = rib_get_fib_nhg(re); + else + nhg = &(re->nhe->nhg); + + if (json) { + json_route = json_object_new_object(); + json_nexthops = json_object_new_array(); + + json_object_string_add(json_route, "prefix", + srcdest_rnode2str(rn, buf, sizeof(buf))); + json_object_int_add(json_route, "prefixLen", rn->p.prefixlen); + json_object_string_add(json_route, "protocol", + zebra_route_string(re->type)); + + if (re->instance) + json_object_int_add(json_route, "instance", + re->instance); + + json_object_int_add(json_route, "vrfId", re->vrf_id); + json_object_string_add(json_route, "vrfName", + vrf_id_to_name(re->vrf_id)); + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) + json_object_boolean_true_add(json_route, "selected"); + + if (dest->selected_fib == re) + json_object_boolean_true_add(json_route, + "destSelected"); + + json_object_int_add(json_route, "distance", + re->distance); + json_object_int_add(json_route, "metric", re->metric); + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) + json_object_boolean_true_add(json_route, "installed"); + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED)) + json_object_boolean_true_add(json_route, "failed"); + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED)) + json_object_boolean_true_add(json_route, "queued"); + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_TRAPPED)) + json_object_boolean_true_add(json_route, "trapped"); + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED)) + json_object_boolean_true_add(json_route, "offloaded"); + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED)) + json_object_boolean_false_add(json_route, "offloaded"); + + if (re->tag) + json_object_int_add(json_route, "tag", re->tag); + + if (re->table) + json_object_int_add(json_route, "table", re->table); + + json_object_int_add(json_route, "internalStatus", + re->status); + json_object_int_add(json_route, "internalFlags", + re->flags); + json_object_int_add(json_route, "internalNextHopNum", + nexthop_group_nexthop_num(&(re->nhe->nhg))); + json_object_int_add(json_route, "internalNextHopActiveNum", + nexthop_group_active_nexthop_num( + &(re->nhe->nhg))); + json_object_int_add(json_route, "nexthopGroupId", re->nhe_id); + + if (re->nhe_installed_id != 0) + json_object_int_add(json_route, + "installedNexthopGroupId", + re->nhe_installed_id); + + json_object_string_add(json_route, "uptime", up_str); + + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + json_nexthop = json_object_new_object(); + show_nexthop_json_helper(json_nexthop, + nexthop, re); + + json_object_array_add(json_nexthops, + json_nexthop); + } + + json_object_object_add(json_route, "nexthops", json_nexthops); + + /* If there are backup nexthops, include them */ + if (is_fib) + nhg = rib_get_fib_backup_nhg(re); + else + nhg = zebra_nhg_get_backup_nhg(re->nhe); + + if (nhg && nhg->nexthop) { + json_nexthops = json_object_new_array(); + + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + json_nexthop = json_object_new_object(); + + show_nexthop_json_helper(json_nexthop, + nexthop, re); + json_object_array_add(json_nexthops, + json_nexthop); + } + + json_object_object_add(json_route, "backupNexthops", + json_nexthops); + } + zebra_show_ip_route_opaque(NULL, re, json_route); + + json_object_array_add(json, json_route); + return; + } + + /* Prefix information, and first nexthop. If we're showing 'fib', + * and there are no installed primary nexthops, see if there are any + * backup nexthops and start with those. + */ + if (is_fib && nhg->nexthop == NULL) { + nhg = rib_get_fib_backup_nhg(re); + nhg_from_backup = true; + } + + len = vty_out(vty, "%c", zebra_route_char(re->type)); + if (re->instance) + len += vty_out(vty, "[%d]", re->instance); + if (nhg_from_backup && nhg->nexthop) { + len += vty_out( + vty, "%cb%c %s", + CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED) ? '>' : ' ', + re_status_output_char(re, nhg->nexthop, is_fib), + srcdest_rnode2str(rn, buf, sizeof(buf))); + } else { + len += vty_out( + vty, "%c%c %s", + CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED) ? '>' : ' ', + re_status_output_char(re, nhg->nexthop, is_fib), + srcdest_rnode2str(rn, buf, sizeof(buf))); + } + + /* Distance and metric display. */ + if (((re->type == ZEBRA_ROUTE_CONNECT) && + (re->distance || re->metric)) || + (re->type != ZEBRA_ROUTE_CONNECT)) + len += vty_out(vty, " [%u/%u]", re->distance, + re->metric); + + if (show_ng) + len += vty_out(vty, " (%u)", re->nhe_id); + + /* Nexthop information. */ + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + if (first_p) { + first_p = false; + } else if (nhg_from_backup) { + vty_out(vty, " b%c%*c", + re_status_output_char(re, nexthop, is_fib), + len - 3 + (2 * nexthop_level(nexthop)), ' '); + } else { + vty_out(vty, " %c%*c", + re_status_output_char(re, nexthop, is_fib), + len - 3 + (2 * nexthop_level(nexthop)), ' '); + } + + show_route_nexthop_helper(vty, re, nexthop); + vty_out(vty, ", %s\n", up_str); + } + + /* If we only had backup nexthops, we're done */ + if (nhg_from_backup) + return; + + /* Check for backup nexthop info if present */ + if (is_fib) + nhg = rib_get_fib_backup_nhg(re); + else + nhg = zebra_nhg_get_backup_nhg(re->nhe); + + if (nhg == NULL) + return; + + /* Print backup info */ + for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { + bool star_p = false; + + if (is_fib) + star_p = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + /* TODO -- it'd be nice to be able to include + * the entire list of backups, *and* include the + * real installation state. + */ + vty_out(vty, " b%c %*c", + (star_p ? '*' : ' '), + len - 3 + (2 * nexthop_level(nexthop)), ' '); + show_route_nexthop_helper(vty, re, nexthop); + vty_out(vty, "\n"); + } + +} + +static void vty_show_ip_route_detail_json(struct vty *vty, + struct route_node *rn, bool use_fib) +{ + json_object *json = NULL; + json_object *json_prefix = NULL; + struct route_entry *re; + char buf[BUFSIZ]; + rib_dest_t *dest; + + dest = rib_dest_from_rnode(rn); + + json = json_object_new_object(); + json_prefix = json_object_new_array(); + + RNODE_FOREACH_RE (rn, re) { + /* + * If re not selected for forwarding, skip re + * for "show ip/ipv6 fib <prefix> json" + */ + if (use_fib && re != dest->selected_fib) + continue; + vty_show_ip_route(vty, rn, re, json_prefix, use_fib, false); + } + + prefix2str(&rn->p, buf, sizeof(buf)); + json_object_object_add(json, buf, json_prefix); + vty_json(vty, json); +} + +static void do_show_route_helper(struct vty *vty, struct zebra_vrf *zvrf, + struct route_table *table, afi_t afi, + bool use_fib, route_tag_t tag, + const struct prefix *longer_prefix_p, + bool supernets_only, int type, + unsigned short ospf_instance_id, bool use_json, + uint32_t tableid, bool show_ng, + struct route_show_ctx *ctx) +{ + struct route_node *rn; + struct route_entry *re; + int first = 1; + rib_dest_t *dest; + json_object *json = NULL; + json_object *json_prefix = NULL; + uint32_t addr; + char buf[BUFSIZ]; + + /* + * ctx->multi indicates if we are dumping multiple tables or vrfs. + * if set: + * => display the common header at most once + * => add newline at each call except first + * => always display the VRF and table + * else: + * => display the common header if at least one entry is found + * => display the VRF and table if specific + */ + + if (use_json) + json = json_object_new_object(); + + /* Show all routes. */ + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + + RNODE_FOREACH_RE (rn, re) { + if (use_fib && re != dest->selected_fib) + continue; + + if (tag && re->tag != tag) + continue; + + if (longer_prefix_p + && !prefix_match(longer_prefix_p, &rn->p)) + continue; + + /* This can only be true when the afi is IPv4 */ + if (supernets_only) { + addr = ntohl(rn->p.u.prefix4.s_addr); + + if (IN_CLASSC(addr) && rn->p.prefixlen >= 24) + continue; + + if (IN_CLASSB(addr) && rn->p.prefixlen >= 16) + continue; + + if (IN_CLASSA(addr) && rn->p.prefixlen >= 8) + continue; + } + + if (type && re->type != type) + continue; + + if (ospf_instance_id + && (re->type != ZEBRA_ROUTE_OSPF + || re->instance != ospf_instance_id)) + continue; + + if (use_json) { + if (!json_prefix) + json_prefix = json_object_new_array(); + } else if (first) { + if (!ctx->header_done) { + if (afi == AFI_IP) + vty_out(vty, + SHOW_ROUTE_V4_HEADER); + else + vty_out(vty, + SHOW_ROUTE_V6_HEADER); + } + if (ctx->multi && ctx->header_done) + vty_out(vty, "\n"); + if (ctx->multi || zvrf_id(zvrf) != VRF_DEFAULT + || tableid) { + if (!tableid) + vty_out(vty, "VRF %s:\n", + zvrf_name(zvrf)); + else + vty_out(vty, + "VRF %s table %u:\n", + zvrf_name(zvrf), + tableid); + } + ctx->header_done = true; + first = 0; + } + + vty_show_ip_route(vty, rn, re, json_prefix, use_fib, + show_ng); + } + + if (json_prefix) { + prefix2str(&rn->p, buf, sizeof(buf)); + json_object_object_add(json, buf, json_prefix); + json_prefix = NULL; + } + } + + if (use_json) + vty_json(vty, json); +} + +static void do_show_ip_route_all(struct vty *vty, struct zebra_vrf *zvrf, + afi_t afi, bool use_fib, bool use_json, + route_tag_t tag, + const struct prefix *longer_prefix_p, + bool supernets_only, int type, + unsigned short ospf_instance_id, bool show_ng, + struct route_show_ctx *ctx) +{ + struct zebra_router_table *zrt; + struct rib_table_info *info; + + RB_FOREACH (zrt, zebra_router_table_head, + &zrouter.tables) { + info = route_table_get_info(zrt->table); + + if (zvrf != info->zvrf) + continue; + if (zrt->afi != afi || + zrt->safi != SAFI_UNICAST) + continue; + + do_show_ip_route(vty, zvrf_name(zvrf), afi, SAFI_UNICAST, + use_fib, use_json, tag, longer_prefix_p, + supernets_only, type, ospf_instance_id, + zrt->tableid, show_ng, ctx); + } +} + +static int do_show_ip_route(struct vty *vty, const char *vrf_name, afi_t afi, + safi_t safi, bool use_fib, bool use_json, + route_tag_t tag, + const struct prefix *longer_prefix_p, + bool supernets_only, int type, + unsigned short ospf_instance_id, uint32_t tableid, + bool show_ng, struct route_show_ctx *ctx) +{ + struct route_table *table; + struct zebra_vrf *zvrf = NULL; + + if (!(zvrf = zebra_vrf_lookup_by_name(vrf_name))) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "vrf %s not defined\n", vrf_name); + return CMD_SUCCESS; + } + + if (zvrf_id(zvrf) == VRF_UNKNOWN) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "vrf %s inactive\n", vrf_name); + return CMD_SUCCESS; + } + + if (tableid) + table = zebra_router_find_table(zvrf, tableid, afi, SAFI_UNICAST); + else + table = zebra_vrf_table(afi, safi, zvrf_id(zvrf)); + if (!table) { + if (use_json) + vty_out(vty, "{}\n"); + return CMD_SUCCESS; + } + + do_show_route_helper(vty, zvrf, table, afi, use_fib, tag, + longer_prefix_p, supernets_only, type, + ospf_instance_id, use_json, tableid, show_ng, ctx); + + return CMD_SUCCESS; +} + +DEFPY (show_ip_nht, + show_ip_nht_cmd, + "show <ip$ipv4|ipv6$ipv6> <nht|import-check>$type [<A.B.C.D|X:X::X:X>$addr|vrf NAME$vrf_name [<A.B.C.D|X:X::X:X>$addr]|vrf all$vrf_all] [mrib$mrib]", + SHOW_STR + IP_STR + IP6_STR + "IP nexthop tracking table\n" + "IP import check tracking table\n" + "IPv4 Address\n" + "IPv6 Address\n" + VRF_CMD_HELP_STR + "IPv4 Address\n" + "IPv6 Address\n" + VRF_ALL_CMD_HELP_STR + "Show Multicast (MRIB) NHT state\n") +{ + afi_t afi = ipv4 ? AFI_IP : AFI_IP6; + vrf_id_t vrf_id = VRF_DEFAULT; + struct prefix prefix, *p = NULL; + safi_t safi = mrib ? SAFI_MULTICAST : SAFI_UNICAST; + + if (vrf_all) { + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) + if ((zvrf = vrf->info) != NULL) { + vty_out(vty, "\nVRF %s:\n", zvrf_name(zvrf)); + zebra_print_rnh_table(zvrf_id(zvrf), afi, safi, + vty, NULL); + } + return CMD_SUCCESS; + } + if (vrf_name) + VRF_GET_ID(vrf_id, vrf_name, false); + + memset(&prefix, 0, sizeof(prefix)); + if (addr) { + p = sockunion2hostprefix(addr, &prefix); + if (!p) + return CMD_WARNING; + } + + zebra_print_rnh_table(vrf_id, afi, safi, vty, p); + return CMD_SUCCESS; +} + +DEFUN (ip_nht_default_route, + ip_nht_default_route_cmd, + "ip nht resolve-via-default", + IP_STR + "Filter Next Hop tracking route resolution\n" + "Resolve via default route\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (zvrf->zebra_rnh_ip_default_route) + return CMD_SUCCESS; + + zvrf->zebra_rnh_ip_default_route = true; + + zebra_evaluate_rnh(zvrf, AFI_IP, 0, NULL, SAFI_UNICAST); + return CMD_SUCCESS; +} + +static void show_nexthop_group_out(struct vty *vty, struct nhg_hash_entry *nhe) +{ + struct nexthop *nexthop = NULL; + struct nhg_connected *rb_node_dep = NULL; + struct nexthop_group *backup_nhg; + char up_str[MONOTIME_STRLEN]; + char time_left[MONOTIME_STRLEN]; + + uptime2str(nhe->uptime, up_str, sizeof(up_str)); + + vty_out(vty, "ID: %u (%s)\n", nhe->id, zebra_route_string(nhe->type)); + vty_out(vty, " RefCnt: %u", nhe->refcnt); + if (thread_is_scheduled(nhe->timer)) + vty_out(vty, " Time to Deletion: %s", + thread_timer_to_hhmmss(time_left, sizeof(time_left), + nhe->timer)); + vty_out(vty, "\n"); + + vty_out(vty, " Uptime: %s\n", up_str); + vty_out(vty, " VRF: %s\n", vrf_id_to_name(nhe->vrf_id)); + + + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)) { + vty_out(vty, " Valid"); + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) + vty_out(vty, ", Installed"); + vty_out(vty, "\n"); + } + if (nhe->ifp) + vty_out(vty, " Interface Index: %d\n", nhe->ifp->ifindex); + + if (!zebra_nhg_depends_is_empty(nhe)) { + vty_out(vty, " Depends:"); + frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { + vty_out(vty, " (%u)", rb_node_dep->nhe->id); + } + vty_out(vty, "\n"); + } + + /* Output nexthops */ + for (ALL_NEXTHOPS(nhe->nhg, nexthop)) { + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + vty_out(vty, " "); + else + /* Make recursive nexthops a bit more clear */ + vty_out(vty, " "); + + show_route_nexthop_helper(vty, NULL, nexthop); + + if (nhe->backup_info == NULL || nhe->backup_info->nhe == NULL) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_HAS_BACKUP)) + vty_out(vty, " [backup %d]", + nexthop->backup_idx[0]); + + vty_out(vty, "\n"); + continue; + } + + /* TODO -- print more useful backup info */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + int i; + + vty_out(vty, "[backup"); + for (i = 0; i < nexthop->backup_num; i++) + vty_out(vty, " %d", nexthop->backup_idx[i]); + + vty_out(vty, "]"); + } + + vty_out(vty, "\n"); + } + + /* Output backup nexthops (if any) */ + backup_nhg = zebra_nhg_get_backup_nhg(nhe); + if (backup_nhg) { + vty_out(vty, " Backups:\n"); + + for (ALL_NEXTHOPS_PTR(backup_nhg, nexthop)) { + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + vty_out(vty, " "); + else + /* Make recursive nexthops a bit more clear */ + vty_out(vty, " "); + + show_route_nexthop_helper(vty, NULL, nexthop); + vty_out(vty, "\n"); + } + } + + if (!zebra_nhg_dependents_is_empty(nhe)) { + vty_out(vty, " Dependents:"); + frr_each(nhg_connected_tree, &nhe->nhg_dependents, + rb_node_dep) { + vty_out(vty, " (%u)", rb_node_dep->nhe->id); + } + vty_out(vty, "\n"); + } + +} + +static int show_nexthop_group_id_cmd_helper(struct vty *vty, uint32_t id) +{ + struct nhg_hash_entry *nhe = NULL; + + nhe = zebra_nhg_lookup_id(id); + + if (nhe) + show_nexthop_group_out(vty, nhe); + else { + vty_out(vty, "Nexthop Group ID: %u does not exist\n", id); + return CMD_WARNING; + } + return CMD_SUCCESS; +} + +/* Helper function for iteration through the hash of nexthop-groups/nhe-s */ + +struct nhe_show_context { + struct vty *vty; + vrf_id_t vrf_id; + afi_t afi; + int type; +}; + +static int nhe_show_walker(struct hash_bucket *bucket, void *arg) +{ + struct nhe_show_context *ctx = arg; + struct nhg_hash_entry *nhe; + + nhe = bucket->data; /* We won't be offered NULL buckets */ + + if (ctx->afi && nhe->afi != ctx->afi) + goto done; + + if (ctx->vrf_id && nhe->vrf_id != ctx->vrf_id) + goto done; + + if (ctx->type && nhe->type != ctx->type) + goto done; + + show_nexthop_group_out(ctx->vty, nhe); + +done: + return HASHWALK_CONTINUE; +} + +static void show_nexthop_group_cmd_helper(struct vty *vty, + struct zebra_vrf *zvrf, afi_t afi, + int type) +{ + struct nhe_show_context ctx; + + ctx.vty = vty; + ctx.afi = afi; + ctx.vrf_id = zvrf->vrf->vrf_id; + ctx.type = type; + + hash_walk(zrouter.nhgs_id, nhe_show_walker, &ctx); +} + +static void if_nexthop_group_dump_vty(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zebra_if = NULL; + struct nhg_connected *rb_node_dep = NULL; + + zebra_if = ifp->info; + + if (!if_nhg_dependents_is_empty(ifp)) { + vty_out(vty, "Interface %s:\n", ifp->name); + + frr_each(nhg_connected_tree, &zebra_if->nhg_dependents, + rb_node_dep) { + vty_out(vty, " "); + show_nexthop_group_out(vty, rb_node_dep->nhe); + } + } +} + +DEFPY (show_interface_nexthop_group, + show_interface_nexthop_group_cmd, + "show interface [IFNAME$if_name] nexthop-group", + SHOW_STR + "Interface status and configuration\n" + "Interface name\n" + "Show Nexthop Groups\n") +{ + struct vrf *vrf = NULL; + struct interface *ifp = NULL; + bool found = false; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if (if_name) { + ifp = if_lookup_by_name(if_name, vrf->vrf_id); + if (ifp) { + if_nexthop_group_dump_vty(vty, ifp); + found = true; + } + } else { + FOR_ALL_INTERFACES (vrf, ifp) + if_nexthop_group_dump_vty(vty, ifp); + found = true; + } + } + + if (!found) { + vty_out(vty, "%% Can't find interface %s\n", if_name); + return CMD_WARNING; + } + + return CMD_SUCCESS; +} + +DEFPY (show_nexthop_group, + show_nexthop_group_cmd, + "show nexthop-group rib <(0-4294967295)$id|[singleton <ip$v4|ipv6$v6>] [<kernel|zebra|bgp|sharp>$type_str] [vrf <NAME$vrf_name|all$vrf_all>]>", + SHOW_STR + "Show Nexthop Groups\n" + "RIB information\n" + "Nexthop Group ID\n" + "Show Singleton Nexthop-Groups\n" + IP_STR + IP6_STR + "Kernel (not installed via the zebra RIB)\n" + "Zebra (implicitly created by zebra)\n" + "Border Gateway Protocol (BGP)\n" + "Super Happy Advanced Routing Protocol (SHARP)\n" + VRF_FULL_CMD_HELP_STR) +{ + + struct zebra_vrf *zvrf = NULL; + afi_t afi = AFI_UNSPEC; + int type = 0; + + if (id) + return show_nexthop_group_id_cmd_helper(vty, id); + + if (v4) + afi = AFI_IP; + else if (v6) + afi = AFI_IP6; + + if (type_str) { + type = proto_redistnum((afi ? afi : AFI_IP), type_str); + if (type < 0) { + /* assume zebra */ + type = ZEBRA_ROUTE_NHG; + } + } + + if (!vrf_is_backend_netns() && (vrf_name || vrf_all)) { + vty_out(vty, + "VRF subcommand does not make any sense in l3mdev based vrf's\n"); + return CMD_WARNING; + } + + if (vrf_all) { + struct vrf *vrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + struct zebra_vrf *zvrf; + + zvrf = vrf->info; + if (!zvrf) + continue; + + vty_out(vty, "VRF: %s\n", vrf->name); + show_nexthop_group_cmd_helper(vty, zvrf, afi, type); + } + + return CMD_SUCCESS; + } + + if (vrf_name) + zvrf = zebra_vrf_lookup_by_name(vrf_name); + else + zvrf = zebra_vrf_lookup_by_name(VRF_DEFAULT_NAME); + + if (!zvrf) { + vty_out(vty, "%% VRF '%s' specified does not exist\n", + vrf_name); + return CMD_WARNING; + } + + show_nexthop_group_cmd_helper(vty, zvrf, afi, type); + + return CMD_SUCCESS; +} + +DEFPY_HIDDEN(nexthop_group_use_enable, + nexthop_group_use_enable_cmd, + "[no] zebra nexthop kernel enable", + NO_STR + ZEBRA_STR + "Nexthop configuration \n" + "Configure use of kernel nexthops\n" + "Enable kernel nexthops\n") +{ + zebra_nhg_enable_kernel_nexthops(!no); + return CMD_SUCCESS; +} + +DEFPY_HIDDEN(proto_nexthop_group_only, proto_nexthop_group_only_cmd, + "[no] zebra nexthop proto only", + NO_STR ZEBRA_STR + "Nexthop configuration\n" + "Configure exclusive use of proto nexthops\n" + "Only use proto nexthops\n") +{ + zebra_nhg_set_proto_nexthops_only(!no); + return CMD_SUCCESS; +} + +DEFPY_HIDDEN(backup_nexthop_recursive_use_enable, + backup_nexthop_recursive_use_enable_cmd, + "[no] zebra nexthop resolve-via-backup", + NO_STR + ZEBRA_STR + "Nexthop configuration \n" + "Configure use of backup nexthops in recursive resolution\n") +{ + zebra_nhg_set_recursive_use_backups(!no); + return CMD_SUCCESS; +} + +DEFUN (no_ip_nht_default_route, + no_ip_nht_default_route_cmd, + "no ip nht resolve-via-default", + NO_STR + IP_STR + "Filter Next Hop tracking route resolution\n" + "Resolve via default route\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (!zvrf->zebra_rnh_ip_default_route) + return CMD_SUCCESS; + + zvrf->zebra_rnh_ip_default_route = false; + zebra_evaluate_rnh(zvrf, AFI_IP, 0, NULL, SAFI_UNICAST); + return CMD_SUCCESS; +} + +DEFUN (ipv6_nht_default_route, + ipv6_nht_default_route_cmd, + "ipv6 nht resolve-via-default", + IP6_STR + "Filter Next Hop tracking route resolution\n" + "Resolve via default route\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (zvrf->zebra_rnh_ipv6_default_route) + return CMD_SUCCESS; + + zvrf->zebra_rnh_ipv6_default_route = true; + zebra_evaluate_rnh(zvrf, AFI_IP6, 0, NULL, SAFI_UNICAST); + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nht_default_route, + no_ipv6_nht_default_route_cmd, + "no ipv6 nht resolve-via-default", + NO_STR + IP6_STR + "Filter Next Hop tracking route resolution\n" + "Resolve via default route\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (!zvrf->zebra_rnh_ipv6_default_route) + return CMD_SUCCESS; + + zvrf->zebra_rnh_ipv6_default_route = false; + zebra_evaluate_rnh(zvrf, AFI_IP6, 0, NULL, SAFI_UNICAST); + return CMD_SUCCESS; +} + +DEFPY_HIDDEN(rnh_hide_backups, rnh_hide_backups_cmd, + "[no] ip nht hide-backup-events", + NO_STR + IP_STR + "Nexthop-tracking configuration\n" + "Hide notification about backup nexthops\n") +{ + rnh_set_hide_backups(!no); + return CMD_SUCCESS; +} + +DEFPY (show_route, + show_route_cmd, + "show\ + <\ + ip$ipv4 <fib$fib|route> [table <(1-4294967295)$table|all$table_all>]\ + [vrf <NAME$vrf_name|all$vrf_all>]\ + [{\ + tag (1-4294967295)\ + |A.B.C.D/M$prefix longer-prefixes\ + |supernets-only$supernets_only\ + }]\ + [<\ + " FRR_IP_REDIST_STR_ZEBRA "$type_str\ + |ospf$type_str (1-65535)$ospf_instance_id\ + >]\ + |ipv6$ipv6 <fib$fib|route> [table <(1-4294967295)$table|all$table_all>]\ + [vrf <NAME$vrf_name|all$vrf_all>]\ + [{\ + tag (1-4294967295)\ + |X:X::X:X/M$prefix longer-prefixes\ + }]\ + [" FRR_IP6_REDIST_STR_ZEBRA "$type_str]\ + >\ + [<json$json|nexthop-group$ng>]", + SHOW_STR + IP_STR + "IP forwarding table\n" + "IP routing table\n" + "Table to display\n" + "The table number to display\n" + "All tables\n" + VRF_FULL_CMD_HELP_STR + "Show only routes with tag\n" + "Tag value\n" + "IP prefix <network>/<length>, e.g., 35.0.0.0/8\n" + "Show route matching the specified Network/Mask pair only\n" + "Show supernet entries only\n" + FRR_IP_REDIST_HELP_STR_ZEBRA + "Open Shortest Path First (OSPFv2)\n" + "Instance ID\n" + IPV6_STR + "IP forwarding table\n" + "IP routing table\n" + "Table to display\n" + "The table number to display\n" + "All tables\n" + VRF_FULL_CMD_HELP_STR + "Show only routes with tag\n" + "Tag value\n" + "IPv6 prefix\n" + "Show route matching the specified Network/Mask pair only\n" + FRR_IP6_REDIST_HELP_STR_ZEBRA + JSON_STR + "Nexthop Group Information\n") +{ + afi_t afi = ipv4 ? AFI_IP : AFI_IP6; + struct vrf *vrf; + int type = 0; + struct zebra_vrf *zvrf; + struct route_show_ctx ctx = { + .multi = vrf_all || table_all, + }; + + if (!vrf_is_backend_netns()) { + if ((vrf_all || vrf_name) && (table || table_all)) { + if (!!json) + vty_out(vty, "{}\n"); + else { + vty_out(vty, "Linux vrf backend already points to table id\n"); + vty_out(vty, "Either remove table parameter or vrf parameter\n"); + } + return CMD_SUCCESS; + } + } + if (type_str) { + type = proto_redistnum(afi, type_str); + if (type < 0) { + vty_out(vty, "Unknown route type\n"); + return CMD_WARNING; + } + } + + if (vrf_all) { + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if ((zvrf = vrf->info) == NULL + || (zvrf->table[afi][SAFI_UNICAST] == NULL)) + continue; + + if (table_all) + do_show_ip_route_all( + vty, zvrf, afi, !!fib, !!json, tag, + prefix_str ? prefix : NULL, + !!supernets_only, type, + ospf_instance_id, !!ng, &ctx); + else + do_show_ip_route( + vty, zvrf_name(zvrf), afi, SAFI_UNICAST, + !!fib, !!json, tag, + prefix_str ? prefix : NULL, + !!supernets_only, type, + ospf_instance_id, table, !!ng, &ctx); + } + } else { + vrf_id_t vrf_id = VRF_DEFAULT; + + if (vrf_name) + VRF_GET_ID(vrf_id, vrf_name, !!json); + vrf = vrf_lookup_by_id(vrf_id); + if (!vrf) + return CMD_SUCCESS; + + zvrf = vrf->info; + if (!zvrf) + return CMD_SUCCESS; + + if (table_all) + do_show_ip_route_all(vty, zvrf, afi, !!fib, !!json, tag, + prefix_str ? prefix : NULL, + !!supernets_only, type, + ospf_instance_id, !!ng, &ctx); + else + do_show_ip_route(vty, vrf->name, afi, SAFI_UNICAST, + !!fib, !!json, tag, + prefix_str ? prefix : NULL, + !!supernets_only, type, + ospf_instance_id, table, !!ng, &ctx); + } + + return CMD_SUCCESS; +} + +ALIAS_HIDDEN (show_route, + show_ro_cmd, + "show <ip$ipv4|ipv6$ipv6> ro", + SHOW_STR + IP_STR + IPV6_STR + "IP routing table\n"); + + +DEFPY (show_route_detail, + show_route_detail_cmd, + "show\ + <\ + ip$ipv4 <fib$fib|route> [vrf <NAME$vrf_name|all$vrf_all>]\ + <\ + A.B.C.D$address\ + |A.B.C.D/M$prefix\ + >\ + |ipv6$ipv6 <fib$fib|route> [vrf <NAME$vrf_name|all$vrf_all>]\ + <\ + X:X::X:X$address\ + |X:X::X:X/M$prefix\ + >\ + >\ + [json$json] [nexthop-group$ng]", + SHOW_STR + IP_STR + "IPv6 forwarding table\n" + "IP routing table\n" + VRF_FULL_CMD_HELP_STR + "Network in the IP routing table to display\n" + "IP prefix <network>/<length>, e.g., 35.0.0.0/8\n" + IP6_STR + "IPv6 forwarding table\n" + "IPv6 routing table\n" + VRF_FULL_CMD_HELP_STR + "IPv6 Address\n" + "IPv6 prefix\n" + JSON_STR + "Nexthop Group Information\n") +{ + afi_t afi = ipv4 ? AFI_IP : AFI_IP6; + struct route_table *table; + struct prefix p; + struct route_node *rn; + bool use_fib = !!fib; + rib_dest_t *dest; + bool network_found = false; + bool show_ng = !!ng; + + if (address_str) + prefix_str = address_str; + if (str2prefix(prefix_str, &p) < 0) { + vty_out(vty, "%% Malformed address\n"); + return CMD_WARNING; + } + + if (vrf_all) { + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if ((zvrf = vrf->info) == NULL + || (table = zvrf->table[afi][SAFI_UNICAST]) == NULL) + continue; + + rn = route_node_match(table, &p); + if (!rn) + continue; + if (!address_str && rn->p.prefixlen != p.prefixlen) { + route_unlock_node(rn); + continue; + } + + dest = rib_dest_from_rnode(rn); + if (use_fib && !dest->selected_fib) { + route_unlock_node(rn); + continue; + } + + network_found = true; + if (json) + vty_show_ip_route_detail_json(vty, rn, use_fib); + else + vty_show_ip_route_detail(vty, rn, 0, use_fib, + show_ng); + + route_unlock_node(rn); + } + + if (!network_found) { + if (json) + vty_out(vty, "{}\n"); + else { + if (use_fib) + vty_out(vty, + "%% Network not in FIB\n"); + else + vty_out(vty, + "%% Network not in RIB\n"); + } + return CMD_WARNING; + } + } else { + vrf_id_t vrf_id = VRF_DEFAULT; + + if (vrf_name) + VRF_GET_ID(vrf_id, vrf_name, false); + + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + if (!table) + return CMD_SUCCESS; + + rn = route_node_match(table, &p); + if (rn) + dest = rib_dest_from_rnode(rn); + + if (!rn || (!address_str && rn->p.prefixlen != p.prefixlen) || + (use_fib && dest && !dest->selected_fib)) { + if (json) + vty_out(vty, "{}\n"); + else { + if (use_fib) + vty_out(vty, + "%% Network not in FIB\n"); + else + vty_out(vty, + "%% Network not in table\n"); + } + if (rn) + route_unlock_node(rn); + return CMD_WARNING; + } + + if (json) + vty_show_ip_route_detail_json(vty, rn, use_fib); + else + vty_show_ip_route_detail(vty, rn, 0, use_fib, show_ng); + + route_unlock_node(rn); + } + + return CMD_SUCCESS; +} + +DEFPY (show_route_summary, + show_route_summary_cmd, + "show <ip$ipv4|ipv6$ipv6> route [vrf <NAME$vrf_name|all$vrf_all>] \ + summary [table (1-4294967295)$table_id] [prefix$prefix] [json]", + SHOW_STR + IP_STR + IP6_STR + "IP routing table\n" + VRF_FULL_CMD_HELP_STR + "Summary of all routes\n" + "Table to display summary for\n" + "The table number\n" + "Prefix routes\n" + JSON_STR) +{ + afi_t afi = ipv4 ? AFI_IP : AFI_IP6; + struct route_table *table; + bool uj = use_json(argc, argv); + + if (vrf_all) { + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if ((zvrf = vrf->info) == NULL) + continue; + + if (table_id == 0) + table = zebra_vrf_table(afi, SAFI_UNICAST, + zvrf->vrf->vrf_id); + else + table = zebra_vrf_lookup_table_with_table_id( + afi, SAFI_UNICAST, zvrf->vrf->vrf_id, + table_id); + + if (!table) + continue; + + if (prefix) + vty_show_ip_route_summary_prefix(vty, table, + uj); + else + vty_show_ip_route_summary(vty, table, uj); + } + } else { + vrf_id_t vrf_id = VRF_DEFAULT; + + if (vrf_name) + VRF_GET_ID(vrf_id, vrf_name, false); + + if (table_id == 0) + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + else + table = zebra_vrf_lookup_table_with_table_id( + afi, SAFI_UNICAST, vrf_id, table_id); + if (!table) + return CMD_SUCCESS; + + if (prefix) + vty_show_ip_route_summary_prefix(vty, table, uj); + else + vty_show_ip_route_summary(vty, table, uj); + } + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN (show_route_zebra_dump, + show_route_zebra_dump_cmd, + "show <ip|ipv6> zebra route dump [vrf VRFNAME]", + SHOW_STR + IP_STR + IP6_STR + "Zebra daemon\n" + "Routing table\n" + "All information\n" + VRF_CMD_HELP_STR) +{ + afi_t afi = AFI_IP; + struct route_table *table; + const char *vrf_name = NULL; + int idx = 0; + + afi = strmatch(argv[1]->text, "ipv6") ? AFI_IP6 : AFI_IP; + + if (argv_find(argv, argc, "vrf", &idx)) + vrf_name = argv[++idx]->arg; + + if (!vrf_name) { + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + if ((zvrf == NULL) + || (zvrf->table[afi][SAFI_UNICAST] == NULL)) + continue; + + table = zvrf->table[afi][SAFI_UNICAST]; + show_ip_route_dump_vty(vty, table); + } + } else { + vrf_id_t vrf_id = VRF_DEFAULT; + + VRF_GET_ID(vrf_id, vrf_name, true); + + table = zebra_vrf_table(afi, SAFI_UNICAST, vrf_id); + if (!table) + return CMD_SUCCESS; + + show_ip_route_dump_vty(vty, table); + } + + return CMD_SUCCESS; +} + +static void show_ip_route_nht_dump(struct vty *vty, struct nexthop *nexthop, + struct route_entry *re, unsigned int num) +{ + + char buf[SRCDEST2STR_BUFFER]; + + vty_out(vty, " Nexthop %u:\n", num); + vty_out(vty, " type: %u\n", nexthop->type); + vty_out(vty, " flags: %u\n", nexthop->flags); + switch (nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out(vty, " ip address: %s\n", + inet_ntop(AF_INET, &nexthop->gate.ipv4, buf, + sizeof(buf))); + vty_out(vty, " afi: ipv4\n"); + + if (nexthop->ifindex) { + vty_out(vty, " interface index: %d\n", + nexthop->ifindex); + vty_out(vty, " interface name: %s\n", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + } + + if (nexthop->src.ipv4.s_addr + && (inet_ntop(AF_INET, &nexthop->src.ipv4, buf, + sizeof(buf)))) + vty_out(vty, " source: %s\n", buf); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " ip: %s\n", + inet_ntop(AF_INET6, &nexthop->gate.ipv6, buf, + sizeof(buf))); + vty_out(vty, " afi: ipv6\n"); + + if (nexthop->ifindex) { + vty_out(vty, " interface index: %d\n", + nexthop->ifindex); + vty_out(vty, " interface name: %s\n", + ifindex2ifname(nexthop->ifindex, + nexthop->vrf_id)); + } + + if (!IPV6_ADDR_SAME(&nexthop->src.ipv6, &in6addr_any)) { + if (inet_ntop(AF_INET6, &nexthop->src.ipv6, buf, + sizeof(buf))) + vty_out(vty, " source: %s\n", buf); + } + break; + case NEXTHOP_TYPE_IFINDEX: + vty_out(vty, + " Nexthop is an interface (directly connected).\n"); + vty_out(vty, " interface index: %d\n", nexthop->ifindex); + vty_out(vty, " interface name: %s\n", + ifindex2ifname(nexthop->ifindex, nexthop->vrf_id)); + break; + case NEXTHOP_TYPE_BLACKHOLE: + vty_out(vty, " Nexthop type is blackhole.\n"); + + switch (nexthop->bh_type) { + case BLACKHOLE_REJECT: + vty_out(vty, " Blackhole type: reject\n"); + break; + case BLACKHOLE_ADMINPROHIB: + vty_out(vty, + " Blackhole type: admin-prohibited\n"); + break; + case BLACKHOLE_NULL: + vty_out(vty, " Blackhole type: NULL0\n"); + break; + case BLACKHOLE_UNSPEC: + break; + } + break; + } +} + +static void show_ip_route_dump_vty(struct vty *vty, struct route_table *table) +{ + struct route_node *rn; + struct route_entry *re; + char buf[SRCDEST2STR_BUFFER]; + char time[20]; + time_t uptime; + struct tm tm; + struct timeval tv; + struct nexthop *nexthop = NULL; + int nexthop_num = 0; + + vty_out(vty, "\nIPv4/IPv6 Routing table dump\n"); + vty_out(vty, "----------------------------\n"); + + for (rn = route_top(table); rn; rn = route_next(rn)) { + RNODE_FOREACH_RE (rn, re) { + vty_out(vty, "Route: %s\n", + srcdest_rnode2str(rn, buf, sizeof(buf))); + vty_out(vty, " protocol: %s\n", + zebra_route_string(re->type)); + vty_out(vty, " instance: %u\n", re->instance); + vty_out(vty, " VRF ID: %u\n", re->vrf_id); + vty_out(vty, " VRF name: %s\n", + vrf_id_to_name(re->vrf_id)); + vty_out(vty, " flags: %u\n", re->flags); + + if (re->type != ZEBRA_ROUTE_CONNECT) { + vty_out(vty, " distance: %u\n", re->distance); + vty_out(vty, " metric: %u\n", re->metric); + } + + vty_out(vty, " tag: %u\n", re->tag); + + uptime = monotime(&tv); + uptime -= re->uptime; + gmtime_r(&uptime, &tm); + + if (uptime < ONE_DAY_SECOND) + snprintf(time, sizeof(time), "%02d:%02d:%02d", + tm.tm_hour, tm.tm_min, tm.tm_sec); + else if (uptime < ONE_WEEK_SECOND) + snprintf(time, sizeof(time), "%dd%02dh%02dm", + tm.tm_yday, tm.tm_hour, tm.tm_min); + else + snprintf(time, sizeof(time), "%02dw%dd%02dh", + tm.tm_yday / 7, + tm.tm_yday - ((tm.tm_yday / 7) * 7), + tm.tm_hour); + + vty_out(vty, " status: %u\n", re->status); + vty_out(vty, " nexthop_num: %u\n", + nexthop_group_nexthop_num(&(re->nhe->nhg))); + vty_out(vty, " nexthop_active_num: %u\n", + nexthop_group_active_nexthop_num( + &(re->nhe->nhg))); + vty_out(vty, " table: %u\n", re->table); + vty_out(vty, " uptime: %s\n", time); + + for (ALL_NEXTHOPS_PTR(&(re->nhe->nhg), nexthop)) { + nexthop_num++; + show_ip_route_nht_dump(vty, nexthop, re, + nexthop_num); + } + + nexthop_num = 0; + vty_out(vty, "\n"); + } + } +} + +static void vty_show_ip_route_summary(struct vty *vty, + struct route_table *table, bool use_json) +{ + struct route_node *rn; + struct route_entry *re; +#define ZEBRA_ROUTE_IBGP ZEBRA_ROUTE_MAX +#define ZEBRA_ROUTE_TOTAL (ZEBRA_ROUTE_IBGP + 1) + uint32_t rib_cnt[ZEBRA_ROUTE_TOTAL + 1]; + uint32_t fib_cnt[ZEBRA_ROUTE_TOTAL + 1]; + uint32_t offload_cnt[ZEBRA_ROUTE_TOTAL + 1]; + uint32_t trap_cnt[ZEBRA_ROUTE_TOTAL + 1]; + uint32_t i; + uint32_t is_ibgp; + json_object *json_route_summary = NULL; + json_object *json_route_routes = NULL; + + memset(&rib_cnt, 0, sizeof(rib_cnt)); + memset(&fib_cnt, 0, sizeof(fib_cnt)); + memset(&offload_cnt, 0, sizeof(offload_cnt)); + memset(&trap_cnt, 0, sizeof(trap_cnt)); + + if (use_json) { + json_route_summary = json_object_new_object(); + json_route_routes = json_object_new_array(); + json_object_object_add(json_route_summary, "routes", + json_route_routes); + } + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) + RNODE_FOREACH_RE (rn, re) { + is_ibgp = (re->type == ZEBRA_ROUTE_BGP + && CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP)); + + rib_cnt[ZEBRA_ROUTE_TOTAL]++; + if (is_ibgp) + rib_cnt[ZEBRA_ROUTE_IBGP]++; + else + rib_cnt[re->type]++; + + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) { + fib_cnt[ZEBRA_ROUTE_TOTAL]++; + + if (is_ibgp) + fib_cnt[ZEBRA_ROUTE_IBGP]++; + else + fib_cnt[re->type]++; + } + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_TRAPPED)) { + if (is_ibgp) + trap_cnt[ZEBRA_ROUTE_IBGP]++; + else + trap_cnt[re->type]++; + } + + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED)) { + if (is_ibgp) + offload_cnt[ZEBRA_ROUTE_IBGP]++; + else + offload_cnt[re->type]++; + } + } + + if (!use_json) + vty_out(vty, "%-20s %-20s %s (vrf %s)\n", "Route Source", + "Routes", "FIB", + zvrf_name(((struct rib_table_info *) + route_table_get_info(table)) + ->zvrf)); + + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { + if ((rib_cnt[i] > 0) || (i == ZEBRA_ROUTE_BGP + && rib_cnt[ZEBRA_ROUTE_IBGP] > 0)) { + if (i == ZEBRA_ROUTE_BGP) { + if (use_json) { + json_object *json_route_ebgp = + json_object_new_object(); + + json_object_int_add( + json_route_ebgp, "fib", + fib_cnt[ZEBRA_ROUTE_BGP]); + json_object_int_add( + json_route_ebgp, "rib", + rib_cnt[ZEBRA_ROUTE_BGP]); + json_object_int_add( + json_route_ebgp, "fibOffLoaded", + offload_cnt[ZEBRA_ROUTE_BGP]); + json_object_int_add( + json_route_ebgp, "fibTrapped", + trap_cnt[ZEBRA_ROUTE_BGP]); + + json_object_string_add(json_route_ebgp, + "type", "ebgp"); + json_object_array_add(json_route_routes, + json_route_ebgp); + + json_object *json_route_ibgp = + json_object_new_object(); + + json_object_int_add( + json_route_ibgp, "fib", + fib_cnt[ZEBRA_ROUTE_IBGP]); + json_object_int_add( + json_route_ibgp, "rib", + rib_cnt[ZEBRA_ROUTE_IBGP]); + json_object_int_add( + json_route_ibgp, "fibOffLoaded", + offload_cnt[ZEBRA_ROUTE_IBGP]); + json_object_int_add( + json_route_ibgp, "fibTrapped", + trap_cnt[ZEBRA_ROUTE_IBGP]); + json_object_string_add(json_route_ibgp, + "type", "ibgp"); + json_object_array_add(json_route_routes, + json_route_ibgp); + } else { + vty_out(vty, "%-20s %-20d %-20d \n", + "ebgp", + rib_cnt[ZEBRA_ROUTE_BGP], + fib_cnt[ZEBRA_ROUTE_BGP]); + vty_out(vty, "%-20s %-20d %-20d \n", + "ibgp", + rib_cnt[ZEBRA_ROUTE_IBGP], + fib_cnt[ZEBRA_ROUTE_IBGP]); + } + } else { + if (use_json) { + json_object *json_route_type = + json_object_new_object(); + + json_object_int_add(json_route_type, + "fib", fib_cnt[i]); + json_object_int_add(json_route_type, + "rib", rib_cnt[i]); + + json_object_int_add(json_route_type, + "fibOffLoaded", + offload_cnt[i]); + json_object_int_add(json_route_type, + "fibTrapped", + trap_cnt[i]); + json_object_string_add( + json_route_type, "type", + zebra_route_string(i)); + json_object_array_add(json_route_routes, + json_route_type); + } else + vty_out(vty, "%-20s %-20d %-20d \n", + zebra_route_string(i), + rib_cnt[i], fib_cnt[i]); + } + } + } + + if (use_json) { + json_object_int_add(json_route_summary, "routesTotal", + rib_cnt[ZEBRA_ROUTE_TOTAL]); + json_object_int_add(json_route_summary, "routesTotalFib", + fib_cnt[ZEBRA_ROUTE_TOTAL]); + + vty_json(vty, json_route_summary); + } else { + vty_out(vty, "------\n"); + vty_out(vty, "%-20s %-20d %-20d \n", "Totals", + rib_cnt[ZEBRA_ROUTE_TOTAL], fib_cnt[ZEBRA_ROUTE_TOTAL]); + vty_out(vty, "\n"); + } +} + +/* + * Implementation of the ip route summary prefix command. + * + * This command prints the primary prefixes that have been installed by various + * protocols on the box. + * + */ +static void vty_show_ip_route_summary_prefix(struct vty *vty, + struct route_table *table, + bool use_json) +{ + struct route_node *rn; + struct route_entry *re; + struct nexthop *nexthop; +#define ZEBRA_ROUTE_IBGP ZEBRA_ROUTE_MAX +#define ZEBRA_ROUTE_TOTAL (ZEBRA_ROUTE_IBGP + 1) + uint32_t rib_cnt[ZEBRA_ROUTE_TOTAL + 1]; + uint32_t fib_cnt[ZEBRA_ROUTE_TOTAL + 1]; + uint32_t i; + int cnt; + json_object *json_route_summary = NULL; + json_object *json_route_routes = NULL; + + memset(&rib_cnt, 0, sizeof(rib_cnt)); + memset(&fib_cnt, 0, sizeof(fib_cnt)); + + if (use_json) { + json_route_summary = json_object_new_object(); + json_route_routes = json_object_new_array(); + json_object_object_add(json_route_summary, "prefixRoutes", + json_route_routes); + } + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) + RNODE_FOREACH_RE (rn, re) { + + /* + * In case of ECMP, count only once. + */ + cnt = 0; + if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) { + fib_cnt[ZEBRA_ROUTE_TOTAL]++; + fib_cnt[re->type]++; + } + for (nexthop = re->nhe->nhg.nexthop; (!cnt && nexthop); + nexthop = nexthop->next) { + cnt++; + rib_cnt[ZEBRA_ROUTE_TOTAL]++; + rib_cnt[re->type]++; + if (re->type == ZEBRA_ROUTE_BGP + && CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP)) { + rib_cnt[ZEBRA_ROUTE_IBGP]++; + if (CHECK_FLAG(re->status, + ROUTE_ENTRY_INSTALLED)) + fib_cnt[ZEBRA_ROUTE_IBGP]++; + } + } + } + + if (!use_json) + vty_out(vty, "%-20s %-20s %s (vrf %s)\n", "Route Source", + "Prefix Routes", "FIB", + zvrf_name(((struct rib_table_info *) + route_table_get_info(table)) + ->zvrf)); + + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { + if (rib_cnt[i] > 0) { + if (i == ZEBRA_ROUTE_BGP) { + if (use_json) { + json_object *json_route_ebgp = + json_object_new_object(); + + json_object_int_add( + json_route_ebgp, "fib", + fib_cnt[ZEBRA_ROUTE_BGP] + - fib_cnt[ZEBRA_ROUTE_IBGP]); + json_object_int_add( + json_route_ebgp, "rib", + rib_cnt[ZEBRA_ROUTE_BGP] + - rib_cnt[ZEBRA_ROUTE_IBGP]); + json_object_string_add(json_route_ebgp, + "type", "ebgp"); + json_object_array_add(json_route_routes, + json_route_ebgp); + + json_object *json_route_ibgp = + json_object_new_object(); + + json_object_int_add( + json_route_ibgp, "fib", + fib_cnt[ZEBRA_ROUTE_IBGP]); + json_object_int_add( + json_route_ibgp, "rib", + rib_cnt[ZEBRA_ROUTE_IBGP]); + json_object_string_add(json_route_ibgp, + "type", "ibgp"); + json_object_array_add(json_route_routes, + json_route_ibgp); + } else { + vty_out(vty, "%-20s %-20d %-20d \n", + "ebgp", + rib_cnt[ZEBRA_ROUTE_BGP] + - rib_cnt[ZEBRA_ROUTE_IBGP], + fib_cnt[ZEBRA_ROUTE_BGP] + - fib_cnt[ZEBRA_ROUTE_IBGP]); + vty_out(vty, "%-20s %-20d %-20d \n", + "ibgp", + rib_cnt[ZEBRA_ROUTE_IBGP], + fib_cnt[ZEBRA_ROUTE_IBGP]); + } + } else { + if (use_json) { + json_object *json_route_type = + json_object_new_object(); + + json_object_int_add(json_route_type, + "fib", fib_cnt[i]); + json_object_int_add(json_route_type, + "rib", rib_cnt[i]); + json_object_string_add( + json_route_type, "type", + zebra_route_string(i)); + json_object_array_add(json_route_routes, + json_route_type); + } else + vty_out(vty, "%-20s %-20d %-20d \n", + zebra_route_string(i), + rib_cnt[i], fib_cnt[i]); + } + } + } + + if (use_json) { + json_object_int_add(json_route_summary, "prefixRoutesTotal", + rib_cnt[ZEBRA_ROUTE_TOTAL]); + json_object_int_add(json_route_summary, "prefixRoutesTotalFib", + fib_cnt[ZEBRA_ROUTE_TOTAL]); + + vty_json(vty, json_route_summary); + } else { + vty_out(vty, "------\n"); + vty_out(vty, "%-20s %-20d %-20d \n", "Totals", + rib_cnt[ZEBRA_ROUTE_TOTAL], fib_cnt[ZEBRA_ROUTE_TOTAL]); + vty_out(vty, "\n"); + } +} + +DEFUN (allow_external_route_update, + allow_external_route_update_cmd, + "allow-external-route-update", + "Allow FRR routes to be overwritten by external processes\n") +{ + zrouter.allow_delete = true; + + return CMD_SUCCESS; +} + +DEFUN (no_allow_external_route_update, + no_allow_external_route_update_cmd, + "no allow-external-route-update", + NO_STR + "Allow FRR routes to be overwritten by external processes\n") +{ + zrouter.allow_delete = false; + + return CMD_SUCCESS; +} + +/* show vrf */ +DEFUN (show_vrf, + show_vrf_cmd, + "show vrf", + SHOW_STR + "VRF\n") +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + if (vrf_is_backend_netns()) + vty_out(vty, "netns-based vrfs\n"); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + if (!(zvrf = vrf->info)) + continue; + if (zvrf_id(zvrf) == VRF_DEFAULT) + continue; + + vty_out(vty, "vrf %s ", zvrf_name(zvrf)); + if (zvrf_id(zvrf) == VRF_UNKNOWN || !zvrf_is_active(zvrf)) + vty_out(vty, "inactive"); + else if (zvrf_ns_name(zvrf)) + vty_out(vty, "id %u netns %s", zvrf_id(zvrf), + zvrf_ns_name(zvrf)); + else + vty_out(vty, "id %u table %u", zvrf_id(zvrf), + zvrf->table_id); + if (vrf_is_user_cfged(vrf)) + vty_out(vty, " (configured)"); + vty_out(vty, "\n"); + } + + return CMD_SUCCESS; +} + +DEFPY (evpn_mh_mac_holdtime, + evpn_mh_mac_holdtime_cmd, + "[no$no] evpn mh mac-holdtime (0-86400)$duration", + NO_STR + "EVPN\n" + "Multihoming\n" + "MAC hold time\n" + "Duration in seconds\n") +{ + return zebra_evpn_mh_mac_holdtime_update(vty, duration, + no ? true : false); +} + +DEFPY (evpn_mh_neigh_holdtime, + evpn_mh_neigh_holdtime_cmd, + "[no$no] evpn mh neigh-holdtime (0-86400)$duration", + NO_STR + "EVPN\n" + "Multihoming\n" + "Neighbor entry hold time\n" + "Duration in seconds\n") +{ + + return zebra_evpn_mh_neigh_holdtime_update(vty, duration, + no ? true : false); +} + +DEFPY (evpn_mh_startup_delay, + evpn_mh_startup_delay_cmd, + "[no] evpn mh startup-delay(0-3600)$duration", + NO_STR + "EVPN\n" + "Multihoming\n" + "Startup delay\n" + "duration in seconds\n") +{ + + return zebra_evpn_mh_startup_delay_update(vty, duration, + no ? true : false); +} + +DEFPY(evpn_mh_redirect_off, evpn_mh_redirect_off_cmd, + "[no$no] evpn mh redirect-off", + NO_STR + "EVPN\n" + "Multihoming\n" + "ES bond redirect for fast-failover off\n") +{ + bool redirect_off; + + redirect_off = no ? false : true; + + return zebra_evpn_mh_redirect_off(vty, redirect_off); +} + +DEFUN (default_vrf_vni_mapping, + default_vrf_vni_mapping_cmd, + "vni " CMD_VNI_RANGE "[prefix-routes-only]", + "VNI corresponding to the DEFAULT VRF\n" + "VNI-ID\n" + "Prefix routes only \n") +{ + char xpath[XPATH_MAXLEN]; + struct zebra_vrf *zvrf = NULL; + int filter = 0; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return CMD_WARNING; + + if (argc == 3) + filter = 1; + + snprintf(xpath, sizeof(xpath), FRR_VRF_KEY_XPATH "/frr-zebra:zebra", + VRF_DEFAULT_NAME); + nb_cli_enqueue_change(vty, xpath, NB_OP_CREATE, NULL); + + snprintf(xpath, sizeof(xpath), + FRR_VRF_KEY_XPATH "/frr-zebra:zebra/l3vni-id", + VRF_DEFAULT_NAME); + nb_cli_enqueue_change(vty, xpath, NB_OP_MODIFY, argv[1]->arg); + + if (filter) { + snprintf(xpath, sizeof(xpath), + FRR_VRF_KEY_XPATH "/frr-zebra:zebra/prefix-only", + VRF_DEFAULT_NAME); + nb_cli_enqueue_change(vty, xpath, NB_OP_MODIFY, "true"); + } + + return nb_cli_apply_changes(vty, NULL); +} + +DEFUN (no_default_vrf_vni_mapping, + no_default_vrf_vni_mapping_cmd, + "no vni " CMD_VNI_RANGE "[prefix-routes-only]", + NO_STR + "VNI corresponding to DEFAULT VRF\n" + "VNI-ID\n" + "Prefix routes only \n") +{ + char xpath[XPATH_MAXLEN]; + int filter = 0; + vni_t vni = strtoul(argv[2]->arg, NULL, 10); + struct zebra_vrf *zvrf = NULL; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return CMD_WARNING; + + if (argc == 4) + filter = 1; + + if (zvrf->l3vni != vni) { + vty_out(vty, "VNI %d doesn't exist in VRF: %s \n", vni, + zvrf->vrf->name); + return CMD_WARNING; + } + + snprintf(xpath, sizeof(xpath), + FRR_VRF_KEY_XPATH "/frr-zebra:zebra/l3vni-id", + VRF_DEFAULT_NAME); + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, argv[2]->arg); + + if (filter) { + snprintf(xpath, sizeof(xpath), + FRR_VRF_KEY_XPATH "/frr-zebra:zebra/prefix-only", + VRF_DEFAULT_NAME); + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, "true"); + } + + snprintf(xpath, sizeof(xpath), FRR_VRF_KEY_XPATH "/frr-zebra:zebra", + VRF_DEFAULT_NAME); + nb_cli_enqueue_change(vty, xpath, NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFUN (vrf_vni_mapping, + vrf_vni_mapping_cmd, + "vni " CMD_VNI_RANGE "[prefix-routes-only]", + "VNI corresponding to tenant VRF\n" + "VNI-ID\n" + "prefix-routes-only\n") +{ + int filter = 0; + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + assert(vrf); + assert(zvrf); + + if (argc == 3) + filter = 1; + + nb_cli_enqueue_change(vty, "./frr-zebra:zebra", NB_OP_CREATE, NULL); + nb_cli_enqueue_change(vty, "./frr-zebra:zebra/l3vni-id", NB_OP_MODIFY, + argv[1]->arg); + + if (filter) + nb_cli_enqueue_change(vty, "./frr-zebra:zebra/prefix-only", + NB_OP_MODIFY, "true"); + + return nb_cli_apply_changes(vty, NULL); +} + +DEFUN (no_vrf_vni_mapping, + no_vrf_vni_mapping_cmd, + "no vni " CMD_VNI_RANGE "[prefix-routes-only]", + NO_STR + "VNI corresponding to tenant VRF\n" + "VNI-ID\n" + "prefix-routes-only\n") +{ + int filter = 0; + + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + vni_t vni = strtoul(argv[2]->arg, NULL, 10); + + assert(vrf); + assert(zvrf); + + if (argc == 4) + filter = 1; + + if (zvrf->l3vni != vni) { + vty_out(vty, "VNI %d doesn't exist in VRF: %s \n", vni, + zvrf->vrf->name); + return CMD_WARNING; + } + + nb_cli_enqueue_change(vty, "./frr-zebra:zebra/l3vni-id", NB_OP_DESTROY, + argv[2]->arg); + + if (filter) + nb_cli_enqueue_change(vty, "./frr-zebra:zebra/prefix-only", + NB_OP_DESTROY, "true"); + + nb_cli_enqueue_change(vty, "./frr-zebra:zebra", NB_OP_DESTROY, NULL); + + return nb_cli_apply_changes(vty, NULL); +} + +/* show vrf */ +DEFUN (show_vrf_vni, + show_vrf_vni_cmd, + "show vrf vni [json]", + SHOW_STR + "VRF\n" + "VNI\n" + JSON_STR) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + json_object *json = NULL; + json_object *json_vrfs = NULL; + bool uj = use_json(argc, argv); + + if (uj) { + json = json_object_new_object(); + json_vrfs = json_object_new_array(); + } + + if (!uj) + vty_out(vty, "%-37s %-10s %-20s %-20s %-5s %-18s\n", "VRF", + "VNI", "VxLAN IF", "L3-SVI", "State", "Rmac"); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + zvrf = vrf->info; + if (!zvrf) + continue; + + zebra_vxlan_print_vrf_vni(vty, zvrf, json_vrfs); + } + + if (uj) { + json_object_object_add(json, "vrfs", json_vrfs); + vty_json(vty, json); + } + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_global, + show_evpn_global_cmd, + "show evpn [json]", + SHOW_STR + "EVPN\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + + zebra_vxlan_print_evpn(vty, uj); + return CMD_SUCCESS; +} + +DEFPY(show_evpn_neigh, show_neigh_cmd, "show ip neigh", + SHOW_STR IP_STR "neighbors\n") + +{ + zebra_neigh_show(vty); + + return CMD_SUCCESS; +} + +DEFPY(show_evpn_l2_nh, + show_evpn_l2_nh_cmd, + "show evpn l2-nh [json$json]", + SHOW_STR + "EVPN\n" + "Layer2 nexthops\n" + JSON_STR) +{ + bool uj = !!json; + + zebra_evpn_l2_nh_show(vty, uj); + + return CMD_SUCCESS; +} + +DEFPY(show_evpn_es, + show_evpn_es_cmd, + "show evpn es [NAME$esi_str|detail$detail] [json$json]", + SHOW_STR + "EVPN\n" + "Ethernet Segment\n" + "ES ID\n" + "Detailed information\n" + JSON_STR) +{ + esi_t esi; + bool uj = !!json; + + if (esi_str) { + if (!str_to_esi(esi_str, &esi)) { + vty_out(vty, "%% Malformed ESI\n"); + return CMD_WARNING; + } + zebra_evpn_es_show_esi(vty, uj, &esi); + } else { + if (detail) + zebra_evpn_es_show_detail(vty, uj); + else + zebra_evpn_es_show(vty, uj); + } + + return CMD_SUCCESS; +} + +DEFPY(show_evpn_es_evi, + show_evpn_es_evi_cmd, + "show evpn es-evi [vni (1-16777215)$vni] [detail$detail] [json$json]", + SHOW_STR + "EVPN\n" + "Ethernet Segment per EVI\n" + "VxLAN Network Identifier\n" + "VNI\n" + "Detailed information\n" + JSON_STR) +{ + bool uj = !!json; + bool ud = !!detail; + + if (vni) + zebra_evpn_es_evi_show_vni(vty, uj, vni, ud); + else + zebra_evpn_es_evi_show(vty, uj, ud); + + return CMD_SUCCESS; +} + +DEFPY(show_evpn_access_vlan, + show_evpn_access_vlan_cmd, + "show evpn access-vlan [(1-4094)$vid | detail$detail] [json$json]", + SHOW_STR + "EVPN\n" + "Access VLANs\n" + "VLAN ID\n" + "Detailed information\n" + JSON_STR) +{ + bool uj = !!json; + + if (vid) { + zebra_evpn_acc_vl_show_vid(vty, uj, vid); + } else { + if (detail) + zebra_evpn_acc_vl_show_detail(vty, uj); + else + zebra_evpn_acc_vl_show(vty, uj); + } + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_vni, + show_evpn_vni_cmd, + "show evpn vni [json]", + SHOW_STR + "EVPN\n" + "VxLAN Network Identifier\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_vnis(vty, zvrf, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_vni_detail, show_evpn_vni_detail_cmd, + "show evpn vni detail [json]", + SHOW_STR + "EVPN\n" + "VxLAN Network Identifier\n" + "Detailed Information On Each VNI\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_vnis_detail(vty, zvrf, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_vni_vni, + show_evpn_vni_vni_cmd, + "show evpn vni " CMD_VNI_RANGE "[json]", + SHOW_STR + "EVPN\n" + "VxLAN Network Identifier\n" + "VNI number\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + vni_t vni; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[3]->arg, NULL, 10); + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_vni(vty, zvrf, vni, uj, NULL); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_rmac_vni_mac, + show_evpn_rmac_vni_mac_cmd, + "show evpn rmac vni " CMD_VNI_RANGE " mac WORD [json]", + SHOW_STR + "EVPN\n" + "RMAC\n" + "L3 VNI\n" + "VNI number\n" + "MAC\n" + "mac-address (e.g. 0a:0a:0a:0a:0a:0a)\n" + JSON_STR) +{ + vni_t l3vni = 0; + struct ethaddr mac; + bool uj = use_json(argc, argv); + + l3vni = strtoul(argv[4]->arg, NULL, 10); + if (!prefix_str2mac(argv[6]->arg, &mac)) { + vty_out(vty, "%% Malformed MAC address\n"); + return CMD_WARNING; + } + zebra_vxlan_print_specific_rmac_l3vni(vty, l3vni, &mac, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_rmac_vni, + show_evpn_rmac_vni_cmd, + "show evpn rmac vni " CMD_VNI_RANGE "[json]", + SHOW_STR + "EVPN\n" + "RMAC\n" + "L3 VNI\n" + "VNI number\n" + JSON_STR) +{ + vni_t l3vni = 0; + bool uj = use_json(argc, argv); + + l3vni = strtoul(argv[4]->arg, NULL, 10); + zebra_vxlan_print_rmacs_l3vni(vty, l3vni, uj); + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_rmac_vni_all, + show_evpn_rmac_vni_all_cmd, + "show evpn rmac vni all [json]", + SHOW_STR + "EVPN\n" + "RMAC addresses\n" + "L3 VNI\n" + "All VNIs\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + + zebra_vxlan_print_rmacs_all_l3vni(vty, uj); + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_nh_vni_ip, + show_evpn_nh_vni_ip_cmd, + "show evpn next-hops vni " CMD_VNI_RANGE " ip WORD [json]", + SHOW_STR + "EVPN\n" + "Remote Vteps\n" + "L3 VNI\n" + "VNI number\n" + "Ip address\n" + "Host address (ipv4 or ipv6)\n" + JSON_STR) +{ + vni_t l3vni; + struct ipaddr ip; + bool uj = use_json(argc, argv); + + l3vni = strtoul(argv[4]->arg, NULL, 10); + if (str2ipaddr(argv[6]->arg, &ip) != 0) { + if (!uj) + vty_out(vty, "%% Malformed Neighbor address\n"); + return CMD_WARNING; + } + zebra_vxlan_print_specific_nh_l3vni(vty, l3vni, &ip, uj); + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_nh_vni, + show_evpn_nh_vni_cmd, + "show evpn next-hops vni " CMD_VNI_RANGE "[json]", + SHOW_STR + "EVPN\n" + "Remote Vteps\n" + "L3 VNI\n" + "VNI number\n" + JSON_STR) +{ + vni_t l3vni; + bool uj = use_json(argc, argv); + + l3vni = strtoul(argv[4]->arg, NULL, 10); + zebra_vxlan_print_nh_l3vni(vty, l3vni, uj); + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_nh_vni_all, + show_evpn_nh_vni_all_cmd, + "show evpn next-hops vni all [json]", + SHOW_STR + "EVPN\n" + "Remote VTEPs\n" + "L3 VNI\n" + "All VNIs\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + + zebra_vxlan_print_nh_all_l3vni(vty, uj); + + return CMD_SUCCESS; +} + +DEFUN (show_evpn_mac_vni, + show_evpn_mac_vni_cmd, + "show evpn mac vni " CMD_VNI_RANGE "[json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "VNI number\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + vni_t vni; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[4]->arg, NULL, 10); + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_macs_vni(vty, zvrf, vni, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_mac_vni_all, + show_evpn_mac_vni_all_cmd, + "show evpn mac vni all [json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_macs_all_vni(vty, zvrf, false, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_mac_vni_all_detail, show_evpn_mac_vni_all_detail_cmd, + "show evpn mac vni all detail [json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + "Detailed Information On Each VNI MAC\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_macs_all_vni_detail(vty, zvrf, false, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_mac_vni_all_vtep, + show_evpn_mac_vni_all_vtep_cmd, + "show evpn mac vni all vtep A.B.C.D [json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + "Remote VTEP\n" + "Remote VTEP IP address\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + struct in_addr vtep_ip; + bool uj = use_json(argc, argv); + + if (!inet_aton(argv[6]->arg, &vtep_ip)) { + if (!uj) + vty_out(vty, "%% Malformed VTEP IP address\n"); + return CMD_WARNING; + } + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_macs_all_vni_vtep(vty, zvrf, vtep_ip, uj); + + return CMD_SUCCESS; +} + + +DEFUN (show_evpn_mac_vni_mac, + show_evpn_mac_vni_mac_cmd, + "show evpn mac vni " CMD_VNI_RANGE " mac WORD [json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "VNI number\n" + "MAC\n" + "MAC address (e.g., 00:e0:ec:20:12:62)\n" + JSON_STR) + +{ + struct zebra_vrf *zvrf; + vni_t vni; + struct ethaddr mac; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[4]->arg, NULL, 10); + if (!prefix_str2mac(argv[6]->arg, &mac)) { + vty_out(vty, "%% Malformed MAC address\n"); + return CMD_WARNING; + } + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_specific_mac_vni(vty, zvrf, vni, &mac, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_mac_vni_vtep, + show_evpn_mac_vni_vtep_cmd, + "show evpn mac vni " CMD_VNI_RANGE " vtep A.B.C.D" "[json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "VNI number\n" + "Remote VTEP\n" + "Remote VTEP IP address\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + vni_t vni; + struct in_addr vtep_ip; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[4]->arg, NULL, 10); + if (!inet_aton(argv[6]->arg, &vtep_ip)) { + if (!uj) + vty_out(vty, "%% Malformed VTEP IP address\n"); + return CMD_WARNING; + } + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_macs_vni_vtep(vty, zvrf, vni, vtep_ip, uj); + return CMD_SUCCESS; +} + +DEFPY (show_evpn_mac_vni_all_dad, + show_evpn_mac_vni_all_dad_cmd, + "show evpn mac vni all duplicate [json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + "Duplicate address list\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_macs_all_vni(vty, zvrf, true, uj); + return CMD_SUCCESS; +} + + +DEFPY (show_evpn_mac_vni_dad, + show_evpn_mac_vni_dad_cmd, + "show evpn mac vni " CMD_VNI_RANGE " duplicate [json]", + SHOW_STR + "EVPN\n" + "MAC addresses\n" + "VxLAN Network Identifier\n" + "VNI number\n" + "Duplicate address list\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + + zebra_vxlan_print_macs_vni_dad(vty, zvrf, vni, uj); + + return CMD_SUCCESS; +} + +DEFPY (show_evpn_neigh_vni_dad, + show_evpn_neigh_vni_dad_cmd, + "show evpn arp-cache vni " CMD_VNI_RANGE "duplicate [json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "VNI number\n" + "Duplicate address list\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_neigh_vni_dad(vty, zvrf, vni, uj); + return CMD_SUCCESS; +} + +DEFPY (show_evpn_neigh_vni_all_dad, + show_evpn_neigh_vni_all_dad_cmd, + "show evpn arp-cache vni all duplicate [json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + "Duplicate address list\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_neigh_all_vni(vty, zvrf, true, uj); + return CMD_SUCCESS; +} + + +DEFUN (show_evpn_neigh_vni, + show_evpn_neigh_vni_cmd, + "show evpn arp-cache vni " CMD_VNI_RANGE "[json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "VNI number\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + vni_t vni; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[4]->arg, NULL, 10); + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_neigh_vni(vty, zvrf, vni, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_neigh_vni_all, + show_evpn_neigh_vni_all_cmd, + "show evpn arp-cache vni all [json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_neigh_all_vni(vty, zvrf, false, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_neigh_vni_all_detail, show_evpn_neigh_vni_all_detail_cmd, + "show evpn arp-cache vni all detail [json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "All VNIs\n" + "Neighbor details for all vnis in detail\n" JSON_STR) +{ + struct zebra_vrf *zvrf; + bool uj = use_json(argc, argv); + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_neigh_all_vni_detail(vty, zvrf, false, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_neigh_vni_neigh, + show_evpn_neigh_vni_neigh_cmd, + "show evpn arp-cache vni " CMD_VNI_RANGE " ip WORD [json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "VNI number\n" + "Neighbor\n" + "Neighbor address (IPv4 or IPv6 address)\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + vni_t vni; + struct ipaddr ip; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[4]->arg, NULL, 10); + if (str2ipaddr(argv[6]->arg, &ip) != 0) { + if (!uj) + vty_out(vty, "%% Malformed Neighbor address\n"); + return CMD_WARNING; + } + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_specific_neigh_vni(vty, zvrf, vni, &ip, uj); + return CMD_SUCCESS; +} + +DEFUN (show_evpn_neigh_vni_vtep, + show_evpn_neigh_vni_vtep_cmd, + "show evpn arp-cache vni " CMD_VNI_RANGE " vtep A.B.C.D [json]", + SHOW_STR + "EVPN\n" + "ARP and ND cache\n" + "VxLAN Network Identifier\n" + "VNI number\n" + "Remote VTEP\n" + "Remote VTEP IP address\n" + JSON_STR) +{ + struct zebra_vrf *zvrf; + vni_t vni; + struct in_addr vtep_ip; + bool uj = use_json(argc, argv); + + vni = strtoul(argv[4]->arg, NULL, 10); + if (!inet_aton(argv[6]->arg, &vtep_ip)) { + if (!uj) + vty_out(vty, "%% Malformed VTEP IP address\n"); + return CMD_WARNING; + } + + zvrf = zebra_vrf_get_evpn(); + zebra_vxlan_print_neigh_vni_vtep(vty, zvrf, vni, vtep_ip, uj); + return CMD_SUCCESS; +} + +/* policy routing contexts */ +DEFUN (show_pbr_ipset, + show_pbr_ipset_cmd, + "show pbr ipset [WORD]", + SHOW_STR + "Policy-Based Routing\n" + "IPset Context information\n" + "IPset Name information\n") +{ + int idx = 0; + int found = 0; + found = argv_find(argv, argc, "WORD", &idx); + if (!found) + zebra_pbr_show_ipset_list(vty, NULL); + else + zebra_pbr_show_ipset_list(vty, argv[idx]->arg); + return CMD_SUCCESS; +} + +/* policy routing contexts */ +DEFUN (show_pbr_iptable, + show_pbr_iptable_cmd, + "show pbr iptable [WORD]", + SHOW_STR + "Policy-Based Routing\n" + "IPtable Context information\n" + "IPtable Name information\n") +{ + int idx = 0; + int found = 0; + + found = argv_find(argv, argc, "WORD", &idx); + if (!found) + zebra_pbr_show_iptable(vty, NULL); + else + zebra_pbr_show_iptable(vty, argv[idx]->arg); + return CMD_SUCCESS; +} + +/* policy routing contexts */ +DEFPY (show_pbr_rule, + show_pbr_rule_cmd, + "show pbr rule", + SHOW_STR + "Policy-Based Routing\n" + "Rule\n") +{ + zebra_pbr_show_rule(vty); + return CMD_SUCCESS; +} + +DEFPY (pbr_nexthop_resolve, + pbr_nexthop_resolve_cmd, + "[no$no] pbr nexthop-resolve", + NO_STR + "Policy Based Routing\n" + "Resolve nexthop for dataplane programming\n") +{ + zebra_pbr_expand_action_update(!no); + return CMD_SUCCESS; +} + +DEFPY (clear_evpn_dup_addr, + clear_evpn_dup_addr_cmd, + "clear evpn dup-addr vni <all$vni_all |" CMD_VNI_RANGE"$vni [mac X:X:X:X:X:X | ip <A.B.C.D|X:X::X:X>]>", + CLEAR_STR + "EVPN\n" + "Duplicate address \n" + "VxLAN Network Identifier\n" + "VNI number\n" + "All VNIs\n" + "MAC\n" + "MAC address (e.g., 00:e0:ec:20:12:62)\n" + "IP\n" + "IPv4 address\n" + "IPv6 address\n") +{ + struct ipaddr host_ip = {.ipa_type = IPADDR_NONE }; + int ret = CMD_SUCCESS; + struct list *input; + struct yang_data *yang_dup = NULL, *yang_dup_ip = NULL, + *yang_dup_mac = NULL; + + input = list_new(); + + if (!vni_str) { + yang_dup = yang_data_new( + "/frr-zebra:clear-evpn-dup-addr/input/clear-dup-choice", + "all-case"); + } else { + yang_dup = yang_data_new_uint32( + "/frr-zebra:clear-evpn-dup-addr/input/clear-dup-choice/single-case/vni-id", + vni); + if (!is_zero_mac(&mac->eth_addr)) { + yang_dup_mac = yang_data_new_mac( + "/frr-zebra:clear-evpn-dup-addr/input/clear-dup-choice/single-case/vni-id/mac-addr", + &mac->eth_addr); + if (yang_dup_mac) + listnode_add(input, yang_dup_mac); + } else if (ip) { + if (sockunion_family(ip) == AF_INET) { + host_ip.ipa_type = IPADDR_V4; + host_ip.ipaddr_v4.s_addr = sockunion2ip(ip); + } else { + host_ip.ipa_type = IPADDR_V6; + memcpy(&host_ip.ipaddr_v6, &ip->sin6.sin6_addr, + sizeof(struct in6_addr)); + } + + yang_dup_ip = yang_data_new_ip( + "/frr-zebra:clear-evpn-dup-addr/input/clear-dup-choice/single-case/vni-id/vni-ipaddr", + &host_ip); + + if (yang_dup_ip) + listnode_add(input, yang_dup_ip); + } + } + + if (yang_dup) { + listnode_add(input, yang_dup); + ret = nb_cli_rpc(vty, "/frr-zebra:clear-evpn-dup-addr", input, + NULL); + } + + list_delete(&input); + + return ret; +} + +/* Static ip route configuration write function. */ +static int zebra_ip_config(struct vty *vty) +{ + int write = 0; + + write += zebra_import_table_config(vty, VRF_DEFAULT); + + return write; +} + +DEFUN (ip_zebra_import_table_distance, + ip_zebra_import_table_distance_cmd, + "ip import-table (1-252) [distance (1-255)] [route-map RMAP_NAME]", + IP_STR + "import routes from non-main kernel table\n" + "kernel routing table id\n" + "Distance for imported routes\n" + "Default distance value\n" + "route-map for filtering\n" + "route-map name\n") +{ + uint32_t table_id = 0; + + table_id = strtoul(argv[2]->arg, NULL, 10); + int distance = ZEBRA_TABLE_DISTANCE_DEFAULT; + char *rmap = + strmatch(argv[argc - 2]->text, "route-map") + ? XSTRDUP(MTYPE_ROUTE_MAP_NAME, argv[argc - 1]->arg) + : NULL; + int ret; + + if (argc == 7 || (argc == 5 && !rmap)) + distance = strtoul(argv[4]->arg, NULL, 10); + + if (!is_zebra_valid_kernel_table(table_id)) { + vty_out(vty, + "Invalid routing table ID, %d. Must be in range 1-252\n", + table_id); + if (rmap) + XFREE(MTYPE_ROUTE_MAP_NAME, rmap); + return CMD_WARNING; + } + + if (is_zebra_main_routing_table(table_id)) { + vty_out(vty, + "Invalid routing table ID, %d. Must be non-default table\n", + table_id); + if (rmap) + XFREE(MTYPE_ROUTE_MAP_NAME, rmap); + return CMD_WARNING; + } + + ret = zebra_import_table(AFI_IP, VRF_DEFAULT, table_id, + distance, rmap, 1); + if (rmap) + XFREE(MTYPE_ROUTE_MAP_NAME, rmap); + + return ret; +} + +DEFUN_HIDDEN (zebra_packet_process, + zebra_packet_process_cmd, + "zebra zapi-packets (1-10000)", + ZEBRA_STR + "Zapi Protocol\n" + "Number of packets to process before relinquishing thread\n") +{ + uint32_t packets = strtoul(argv[2]->arg, NULL, 10); + + atomic_store_explicit(&zrouter.packets_to_process, packets, + memory_order_relaxed); + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN (no_zebra_packet_process, + no_zebra_packet_process_cmd, + "no zebra zapi-packets [(1-10000)]", + NO_STR + ZEBRA_STR + "Zapi Protocol\n" + "Number of packets to process before relinquishing thread\n") +{ + atomic_store_explicit(&zrouter.packets_to_process, + ZEBRA_ZAPI_PACKETS_TO_PROCESS, + memory_order_relaxed); + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN (zebra_workqueue_timer, + zebra_workqueue_timer_cmd, + "zebra work-queue (0-10000)", + ZEBRA_STR + "Work Queue\n" + "Time in milliseconds\n") +{ + uint32_t timer = strtoul(argv[2]->arg, NULL, 10); + zrouter.ribq->spec.hold = timer; + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN (no_zebra_workqueue_timer, + no_zebra_workqueue_timer_cmd, + "no zebra work-queue [(0-10000)]", + NO_STR + ZEBRA_STR + "Work Queue\n" + "Time in milliseconds\n") +{ + zrouter.ribq->spec.hold = ZEBRA_RIB_PROCESS_HOLD_TIME; + + return CMD_SUCCESS; +} + +DEFUN (no_ip_zebra_import_table, + no_ip_zebra_import_table_cmd, + "no ip import-table (1-252) [distance (1-255)] [route-map NAME]", + NO_STR + IP_STR + "import routes from non-main kernel table\n" + "kernel routing table id\n" + "Distance for imported routes\n" + "Default distance value\n" + "route-map for filtering\n" + "route-map name\n") +{ + uint32_t table_id = 0; + table_id = strtoul(argv[3]->arg, NULL, 10); + + if (!is_zebra_valid_kernel_table(table_id)) { + vty_out(vty, + "Invalid routing table ID. Must be in range 1-252\n"); + return CMD_WARNING; + } + + if (is_zebra_main_routing_table(table_id)) { + vty_out(vty, + "Invalid routing table ID, %d. Must be non-default table\n", + table_id); + return CMD_WARNING; + } + + if (!is_zebra_import_table_enabled(AFI_IP, VRF_DEFAULT, table_id)) + return CMD_SUCCESS; + + return (zebra_import_table(AFI_IP, VRF_DEFAULT, table_id, 0, NULL, 0)); +} + +DEFPY (zebra_nexthop_group_keep, + zebra_nexthop_group_keep_cmd, + "[no] zebra nexthop-group keep (1-3600)", + NO_STR + ZEBRA_STR + "Nexthop-Group\n" + "How long to keep\n" + "Time in seconds from 1-3600\n") +{ + if (no) + zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER; + else + zrouter.nhg_keep = keep; + + return CMD_SUCCESS; +} + +static int config_write_protocol(struct vty *vty) +{ + if (zrouter.allow_delete) + vty_out(vty, "allow-external-route-update\n"); + + if (zrouter.nhg_keep != ZEBRA_DEFAULT_NHG_KEEP_TIMER) + vty_out(vty, "zebra nexthop-group keep %u\n", zrouter.nhg_keep); + + if (zrouter.ribq->spec.hold != ZEBRA_RIB_PROCESS_HOLD_TIME) + vty_out(vty, "zebra work-queue %u\n", zrouter.ribq->spec.hold); + + if (zrouter.packets_to_process != ZEBRA_ZAPI_PACKETS_TO_PROCESS) + vty_out(vty, "zebra zapi-packets %u\n", + zrouter.packets_to_process); + + enum multicast_mode ipv4_multicast_mode = multicast_mode_ipv4_get(); + + if (ipv4_multicast_mode != MCAST_NO_CONFIG) + vty_out(vty, "ip multicast rpf-lookup-mode %s\n", + ipv4_multicast_mode == MCAST_URIB_ONLY + ? "urib-only" + : ipv4_multicast_mode == MCAST_MRIB_ONLY + ? "mrib-only" + : ipv4_multicast_mode + == MCAST_MIX_MRIB_FIRST + ? "mrib-then-urib" + : ipv4_multicast_mode + == MCAST_MIX_DISTANCE + ? "lower-distance" + : "longer-prefix"); + + /* Include dataplane info */ + dplane_config_write_helper(vty); + + zebra_evpn_mh_config_write(vty); + + zebra_pbr_config_write(vty); + + /* Include nexthop-group config */ + if (!zebra_nhg_kernel_nexthops_enabled()) + vty_out(vty, "no zebra nexthop kernel enable\n"); + + if (zebra_nhg_proto_nexthops_only()) + vty_out(vty, "zebra nexthop proto only\n"); + + if (!zebra_nhg_recursive_use_backups()) + vty_out(vty, "no zebra nexthop resolve-via-backup\n"); + + if (rnh_get_hide_backups()) + vty_out(vty, "ip nht hide-backup-events\n"); + +#ifdef HAVE_NETLINK + /* Include netlink info */ + netlink_config_write_helper(vty); +#endif /* HAVE_NETLINK */ + + return 1; +} + +DEFUN (show_zebra, + show_zebra_cmd, + "show zebra", + SHOW_STR + ZEBRA_STR) +{ + struct vrf *vrf; + struct ttable *table = ttable_new(&ttable_styles[TTSTYLE_BLANK]); + char *out; + + ttable_rowseps(table, 0, BOTTOM, true, '-'); + ttable_add_row(table, "OS|%s(%s)", cmd_system_get(), cmd_release_get()); + ttable_add_row(table, "ECMP Maximum|%d", zrouter.multipath_num); + ttable_add_row(table, "v4 Forwarding|%s", ipforward() ? "On" : "Off"); + ttable_add_row(table, "v6 Forwarding|%s", + ipforward_ipv6() ? "On" : "Off"); + ttable_add_row(table, "MPLS|%s", mpls_enabled ? "On" : "Off"); + ttable_add_row(table, "EVPN|%s", is_evpn_enabled() ? "On" : "Off"); + ttable_add_row(table, "Kernel socket buffer size|%d", rcvbufsize); + + +#ifdef GNU_LINUX + if (!vrf_is_backend_netns()) + ttable_add_row(table, "VRF|l3mdev Available"); + else + ttable_add_row(table, "VRF|Namespaces"); +#else + ttable_add_row(table, "VRF|Not Available"); +#endif + + ttable_add_row(table, "ASIC offload|%s", + zrouter.asic_offloaded ? "Used" : "Unavailable"); + + ttable_add_row(table, "RA|%s", + rtadv_compiled_in() ? "Compiled in" : "Not Compiled in"); + ttable_add_row(table, "RFC 5549|%s", + rtadv_get_interfaces_configured_from_bgp() + ? "BGP is using" + : "BGP is not using"); + + ttable_add_row(table, "Kernel NHG|%s", + zrouter.supports_nhgs ? "Available" : "Unavailable"); + + ttable_add_row(table, "Allow Non FRR route deletion|%s", + zrouter.allow_delete ? "Yes" : "No"); + ttable_add_row(table, "v4 All LinkDown Routes|%s", + zrouter.all_linkdownv4 ? "On" : "Off"); + ttable_add_row(table, "v4 Default LinkDown Routes|%s", + zrouter.default_linkdownv4 ? "On" : "Off"); + ttable_add_row(table, "v6 All LinkDown Routes|%s", + zrouter.all_linkdownv6 ? "On" : "Off"); + ttable_add_row(table, "v6 Default LinkDown Routes|%s", + zrouter.default_linkdownv6 ? "On" : "Off"); + + ttable_add_row(table, "v4 All MC Forwarding|%s", + zrouter.all_mc_forwardingv4 ? "On" : "Off"); + ttable_add_row(table, "v4 Default MC Forwarding|%s", + zrouter.default_mc_forwardingv4 ? "On" : "Off"); + ttable_add_row(table, "v6 All MC Forwarding|%s", + zrouter.all_mc_forwardingv6 ? "On" : "Off"); + ttable_add_row(table, "v6 Default MC Forwarding|%s", + zrouter.default_mc_forwardingv6 ? "On" : "Off"); + + out = ttable_dump(table, "\n"); + vty_out(vty, "%s\n", out); + XFREE(MTYPE_TMP, out); + + ttable_del(table); + vty_out(vty, + " Route Route Neighbor LSP LSP\n"); + vty_out(vty, + "VRF Installs Removals Updates Installs Removals\n"); + + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + struct zebra_vrf *zvrf = vrf->info; + + vty_out(vty, "%-25s %10" PRIu64 " %10" PRIu64 " %10" PRIu64" %10" PRIu64 " %10" PRIu64 "\n", + vrf->name, zvrf->installs, zvrf->removals, + zvrf->neigh_updates, zvrf->lsp_installs, + zvrf->lsp_removals); + } + + return CMD_SUCCESS; +} + +DEFUN (ip_forwarding, + ip_forwarding_cmd, + "ip forwarding", + IP_STR + "Turn on IP forwarding\n") +{ + int ret; + + ret = ipforward(); + if (ret == 0) + ret = ipforward_on(); + + if (ret == 0) { + vty_out(vty, "Can't turn on IP forwarding\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (no_ip_forwarding, + no_ip_forwarding_cmd, + "no ip forwarding", + NO_STR + IP_STR + "Turn off IP forwarding\n") +{ + int ret; + + ret = ipforward(); + if (ret != 0) + ret = ipforward_off(); + + if (ret != 0) { + vty_out(vty, "Can't turn off IP forwarding\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +/* Only display ip forwarding is enabled or not. */ +DEFUN (show_ip_forwarding, + show_ip_forwarding_cmd, + "show ip forwarding", + SHOW_STR + IP_STR + "IP forwarding status\n") +{ + int ret; + + ret = ipforward(); + + if (ret == 0) + vty_out(vty, "IP forwarding is off\n"); + else + vty_out(vty, "IP forwarding is on\n"); + return CMD_SUCCESS; +} + +/* Only display ipv6 forwarding is enabled or not. */ +DEFUN (show_ipv6_forwarding, + show_ipv6_forwarding_cmd, + "show ipv6 forwarding", + SHOW_STR + "IPv6 information\n" + "Forwarding status\n") +{ + int ret; + + ret = ipforward_ipv6(); + + switch (ret) { + case -1: + vty_out(vty, "ipv6 forwarding is unknown\n"); + break; + case 0: + vty_out(vty, "ipv6 forwarding is %s\n", "off"); + break; + case 1: + vty_out(vty, "ipv6 forwarding is %s\n", "on"); + break; + default: + vty_out(vty, "ipv6 forwarding is %s\n", "off"); + break; + } + return CMD_SUCCESS; +} + +DEFUN (ipv6_forwarding, + ipv6_forwarding_cmd, + "ipv6 forwarding", + IPV6_STR + "Turn on IPv6 forwarding\n") +{ + int ret; + + ret = ipforward_ipv6(); + if (ret == 0) + ret = ipforward_ipv6_on(); + + if (ret == 0) { + vty_out(vty, "Can't turn on IPv6 forwarding\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_forwarding, + no_ipv6_forwarding_cmd, + "no ipv6 forwarding", + NO_STR + IPV6_STR + "Turn off IPv6 forwarding\n") +{ + int ret; + + ret = ipforward_ipv6(); + if (ret != 0) + ret = ipforward_ipv6_off(); + + if (ret != 0) { + vty_out(vty, "Can't turn off IPv6 forwarding\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + return CMD_SUCCESS; +} + +/* Display dataplane info */ +DEFUN (show_dataplane, + show_dataplane_cmd, + "show zebra dplane [detailed]", + SHOW_STR + ZEBRA_STR + "Zebra dataplane information\n" + "Detailed output\n") +{ + int idx = 0; + bool detailed = false; + + if (argv_find(argv, argc, "detailed", &idx)) + detailed = true; + + return dplane_show_helper(vty, detailed); +} + +/* Display dataplane providers info */ +DEFUN (show_dataplane_providers, + show_dataplane_providers_cmd, + "show zebra dplane providers [detailed]", + SHOW_STR + ZEBRA_STR + "Zebra dataplane information\n" + "Zebra dataplane provider information\n" + "Detailed output\n") +{ + int idx = 0; + bool detailed = false; + + if (argv_find(argv, argc, "detailed", &idx)) + detailed = true; + + return dplane_show_provs_helper(vty, detailed); +} + +/* Configure dataplane incoming queue limit */ +DEFUN (zebra_dplane_queue_limit, + zebra_dplane_queue_limit_cmd, + "zebra dplane limit (0-10000)", + ZEBRA_STR + "Zebra dataplane\n" + "Limit incoming queued updates\n" + "Number of queued updates\n") +{ + uint32_t limit = 0; + + limit = strtoul(argv[3]->arg, NULL, 10); + + dplane_set_in_queue_limit(limit, true); + + return CMD_SUCCESS; +} + +/* Reset dataplane queue limit to default value */ +DEFUN (no_zebra_dplane_queue_limit, + no_zebra_dplane_queue_limit_cmd, + "no zebra dplane limit [(0-10000)]", + NO_STR + ZEBRA_STR + "Zebra dataplane\n" + "Limit incoming queued updates\n" + "Number of queued updates\n") +{ + dplane_set_in_queue_limit(0, false); + + return CMD_SUCCESS; +} + +DEFUN (zebra_show_routing_tables_summary, + zebra_show_routing_tables_summary_cmd, + "show zebra router table summary", + SHOW_STR + ZEBRA_STR + "The Zebra Router Information\n" + "Table Information about this Zebra Router\n" + "Summary Information\n") +{ + zebra_router_show_table_summary(vty); + + return CMD_SUCCESS; +} + +/* Table configuration write function. */ +static int config_write_table(struct vty *vty) +{ + return 0; +} + +/* IPForwarding configuration write function. */ +static int config_write_forwarding(struct vty *vty) +{ + if (!ipforward()) + vty_out(vty, "no ip forwarding\n"); + if (!ipforward_ipv6()) + vty_out(vty, "no ipv6 forwarding\n"); + vty_out(vty, "!\n"); + return 0; +} + +DEFUN_HIDDEN (show_frr, + show_frr_cmd, + "show frr", + SHOW_STR + "FRR\n") +{ + vty_out(vty, "........ .. . .. . ..... ...77:................................................\n"); + vty_out(vty, ".............................7777:..............................................\n"); + vty_out(vty, ".............................777777,............................................\n"); + vty_out(vty, "... .........................77777777,..........................................\n"); + vty_out(vty, "............................=7777777777:........................................\n"); + vty_out(vty, "........................:7777777777777777,......................................\n"); + vty_out(vty, ".................... ~7777777777777?~,..........................................\n"); + vty_out(vty, "...................I7777777777+.................................................\n"); + vty_out(vty, "................,777777777?............ .......................................\n"); + vty_out(vty, "..............:77777777?..........~?77777.......................................\n"); + vty_out(vty, ".............77777777~........=7777777777.......................................\n"); + vty_out(vty, ".......... +7777777,.......?7777777777777.......................................\n"); + vty_out(vty, "..........7777777~......:7777777777777777......77?,.............................\n"); + vty_out(vty, "........:777777?......+777777777777777777......777777I,.........................\n"); + vty_out(vty, ".......?777777,.....+77777777777777777777......777777777?.......................\n"); + vty_out(vty, "......?777777......7777777777777777777777......,?777777777?.....................\n"); + vty_out(vty, ".....?77777?.....=7777777777777777777I~............,I7777777~...................\n"); + vty_out(vty, "....+77777+.....I77777777777777777:...................+777777I..................\n"); + vty_out(vty, "...~77777+.....7777777777777777=........................?777777...... .......\n"); + vty_out(vty, "...77777I.....I77777777777777~.........:?................,777777.....I777.......\n"); + vty_out(vty, "..777777.....I7777777777777I .......?7777..................777777.....777?......\n"); + vty_out(vty, ".~77777,....=7777777777777:......,7777777..................,77777+....+777......\n"); + vty_out(vty, ".77777I.....7777777777777,......777777777.......ONNNN.......=77777.....777~.....\n"); + vty_out(vty, ",77777.....I777777777777,.....:7777777777......DNNNNNN.......77777+ ...7777.....\n"); + vty_out(vty, "I7777I.....777777777777=.....~77777777777......NNNNNNN~......=7777I....=777.....\n"); + vty_out(vty, "77777:....=777777777777.....,777777777777......$NNNNND ......:77777....:777.....\n"); + vty_out(vty, "77777. ...777777777777~.....7777777777777........7DZ,........:77777.....777.....\n"); + vty_out(vty, "????? . ..777777777777.....,7777777777777....................:77777I....777.....\n"); + vty_out(vty, "....... ..777777777777.....+7777777777777....................=7777777+...?7.....\n"); + vty_out(vty, "..........77777777777I.....I7777777777777....................7777777777:........\n"); + vty_out(vty, "..........77777777777I.....?7777777777777...................~777777777777.......\n"); + vty_out(vty, "..........777777777777.....~7777777777777..................,77777777777777+.....\n"); + vty_out(vty, "..........777777777777......7777777777777..................77777777777777777,...\n"); + vty_out(vty, "..... ....?77777777777I.....~777777777777................,777777.....,:+77777I..\n"); + vty_out(vty, "........ .:777777777777,.....?77777777777...............?777777..............,:=\n"); + vty_out(vty, ".......... 7777777777777..... ?7777777777.............=7777777.....~777I........\n"); + vty_out(vty, "...........:777777777777I......~777777777...........I7777777~.....+777I.........\n"); + vty_out(vty, "..... ......7777777777777I.......I7777777.......+777777777I......7777I..........\n"); + vty_out(vty, ".............77777777777777........?77777......777777777?......=7777=...........\n"); + vty_out(vty, ".............,77777777777777+.........~77......777777I,......:77777.............\n"); + vty_out(vty, "..............~777777777777777~................777777......:77777=..............\n"); + vty_out(vty, "...............:7777777777777777?..............:777777,.....=77=................\n"); + vty_out(vty, "................,777777777777777777?,...........,777777:.....,..................\n"); + vty_out(vty, "........... ......I777777777777777777777I.........777777~.......................\n"); + vty_out(vty, "...................,777777777777777777777..........777777+......................\n"); + vty_out(vty, ".....................+7777777777777777777...........777777?.....................\n"); + vty_out(vty, ".......................=77777777777777777............777777I....................\n"); + vty_out(vty, ".........................:777777777777777.............I77777I...................\n"); + vty_out(vty, "............................~777777777777..............+777777..................\n"); + vty_out(vty, "................................~77777777...............=777777.................\n"); + vty_out(vty, ".....................................:=?I................~777777................\n"); + vty_out(vty, "..........................................................:777777,..............\n"); + vty_out(vty, ".... ... ... . . .... ....... ....... ....................:777777..............\n"); + + return CMD_SUCCESS; +} + +#ifdef HAVE_NETLINK +DEFUN_HIDDEN(zebra_kernel_netlink_batch_tx_buf, + zebra_kernel_netlink_batch_tx_buf_cmd, + "zebra kernel netlink batch-tx-buf (1-1048576) (1-1048576)", + ZEBRA_STR + "Zebra kernel interface\n" + "Set Netlink parameters\n" + "Set batch buffer size and send threshold\n" + "Size of the buffer\n" + "Send threshold\n") +{ + uint32_t bufsize = 0, threshold = 0; + + bufsize = strtoul(argv[4]->arg, NULL, 10); + threshold = strtoul(argv[5]->arg, NULL, 10); + + netlink_set_batch_buffer_size(bufsize, threshold, true); + + return CMD_SUCCESS; +} + +DEFUN_HIDDEN(no_zebra_kernel_netlink_batch_tx_buf, + no_zebra_kernel_netlink_batch_tx_buf_cmd, + "no zebra kernel netlink batch-tx-buf [(0-1048576)] [(0-1048576)]", + NO_STR ZEBRA_STR + "Zebra kernel interface\n" + "Set Netlink parameters\n" + "Set batch buffer size and send threshold\n" + "Size of the buffer\n" + "Send threshold\n") +{ + netlink_set_batch_buffer_size(0, 0, false); + + return CMD_SUCCESS; +} + +DEFPY (zebra_protodown_bit, + zebra_protodown_bit_cmd, + "zebra protodown reason-bit (0-31)$bit", + ZEBRA_STR + "Protodown Configuration\n" + "Reason Bit used in the kernel for application\n" + "Reason Bit range\n") +{ + if_netlink_set_frr_protodown_r_bit(bit); + return CMD_SUCCESS; +} + +DEFPY (no_zebra_protodown_bit, + no_zebra_protodown_bit_cmd, + "no zebra protodown reason-bit [(0-31)$bit]", + NO_STR + ZEBRA_STR + "Protodown Configuration\n" + "Reason Bit used in the kernel for setting protodown\n" + "Reason Bit Range\n") +{ + if_netlink_unset_frr_protodown_r_bit(); + return CMD_SUCCESS; +} + +#endif /* HAVE_NETLINK */ + +DEFUN(ip_table_range, ip_table_range_cmd, + "[no] ip table range (1-4294967295) (1-4294967295)", + NO_STR IP_STR + "table configuration\n" + "Configure table range\n" + "Start Routing Table\n" + "End Routing Table\n") +{ + ZEBRA_DECLVAR_CONTEXT_VRF(vrf, zvrf); + + if (!zvrf) + return CMD_WARNING; + + if (zvrf_id(zvrf) != VRF_DEFAULT && !vrf_is_backend_netns()) { + vty_out(vty, + "VRF subcommand does not make any sense in l3mdev based vrf's\n"); + return CMD_WARNING; + } + + if (strmatch(argv[0]->text, "no")) + return table_manager_range(vty, false, zvrf, NULL, NULL); + + return table_manager_range(vty, true, zvrf, argv[3]->arg, argv[4]->arg); +} + +#ifdef HAVE_SCRIPTING + +DEFUN(zebra_on_rib_process_script, zebra_on_rib_process_script_cmd, + "zebra on-rib-process script SCRIPT", + ZEBRA_STR + "on_rib_process_dplane_results hook call\n" + "Set a script\n" + "Script name (same as filename in /etc/frr/scripts/, without .lua)\n") +{ + + if (frrscript_names_set_script_name(ZEBRA_ON_RIB_PROCESS_HOOK_CALL, + argv[3]->arg) + == 0) { + vty_out(vty, "Successfully added script %s for hook call %s\n", + argv[3]->arg, ZEBRA_ON_RIB_PROCESS_HOOK_CALL); + } else { + vty_out(vty, "Failed to add script %s for hook call %s\n", + argv[3]->arg, ZEBRA_ON_RIB_PROCESS_HOOK_CALL); + } + return CMD_SUCCESS; +} + +#endif /* HAVE_SCRIPTING */ + +/* IP node for static routes. */ +static int zebra_ip_config(struct vty *vty); +static struct cmd_node ip_node = { + .name = "static ip", + .node = IP_NODE, + .prompt = "", + .config_write = zebra_ip_config, +}; +static int config_write_protocol(struct vty *vty); +static struct cmd_node protocol_node = { + .name = "protocol", + .node = PROTOCOL_NODE, + .prompt = "", + .config_write = config_write_protocol, +}; +/* table node for routing tables. */ +static int config_write_table(struct vty *vty); +static struct cmd_node table_node = { + .name = "table", + .node = TABLE_NODE, + .prompt = "", + .config_write = config_write_table, +}; +static int config_write_forwarding(struct vty *vty); +static struct cmd_node forwarding_node = { + .name = "forwarding", + .node = FORWARDING_NODE, + .prompt = "", + .config_write = config_write_forwarding, +}; + +/* Route VTY. */ +void zebra_vty_init(void) +{ + /* Install configuration write function. */ + install_node(&table_node); + install_node(&forwarding_node); + + install_element(VIEW_NODE, &show_ip_forwarding_cmd); + install_element(CONFIG_NODE, &ip_forwarding_cmd); + install_element(CONFIG_NODE, &no_ip_forwarding_cmd); + install_element(ENABLE_NODE, &show_zebra_cmd); + + install_element(VIEW_NODE, &show_ipv6_forwarding_cmd); + install_element(CONFIG_NODE, &ipv6_forwarding_cmd); + install_element(CONFIG_NODE, &no_ipv6_forwarding_cmd); + + /* Route-map */ + zebra_route_map_init(); + + install_node(&ip_node); + install_node(&protocol_node); + + install_element(CONFIG_NODE, &allow_external_route_update_cmd); + install_element(CONFIG_NODE, &no_allow_external_route_update_cmd); + + install_element(CONFIG_NODE, &ip_multicast_mode_cmd); + install_element(CONFIG_NODE, &no_ip_multicast_mode_cmd); + + install_element(CONFIG_NODE, &zebra_nexthop_group_keep_cmd); + install_element(CONFIG_NODE, &ip_zebra_import_table_distance_cmd); + install_element(CONFIG_NODE, &no_ip_zebra_import_table_cmd); + install_element(CONFIG_NODE, &zebra_workqueue_timer_cmd); + install_element(CONFIG_NODE, &no_zebra_workqueue_timer_cmd); + install_element(CONFIG_NODE, &zebra_packet_process_cmd); + install_element(CONFIG_NODE, &no_zebra_packet_process_cmd); + install_element(CONFIG_NODE, &nexthop_group_use_enable_cmd); + install_element(CONFIG_NODE, &proto_nexthop_group_only_cmd); + install_element(CONFIG_NODE, &backup_nexthop_recursive_use_enable_cmd); + + install_element(VIEW_NODE, &show_nexthop_group_cmd); + install_element(VIEW_NODE, &show_interface_nexthop_group_cmd); + + install_element(VIEW_NODE, &show_vrf_cmd); + install_element(VIEW_NODE, &show_vrf_vni_cmd); + install_element(VIEW_NODE, &show_route_cmd); + install_element(VIEW_NODE, &show_ro_cmd); + install_element(VIEW_NODE, &show_route_detail_cmd); + install_element(VIEW_NODE, &show_route_summary_cmd); + install_element(VIEW_NODE, &show_ip_nht_cmd); + + install_element(VIEW_NODE, &show_ip_rpf_cmd); + install_element(VIEW_NODE, &show_ip_rpf_addr_cmd); + + install_element(CONFIG_NODE, &ip_nht_default_route_cmd); + install_element(CONFIG_NODE, &no_ip_nht_default_route_cmd); + install_element(CONFIG_NODE, &ipv6_nht_default_route_cmd); + install_element(CONFIG_NODE, &no_ipv6_nht_default_route_cmd); + install_element(VRF_NODE, &ip_nht_default_route_cmd); + install_element(VRF_NODE, &no_ip_nht_default_route_cmd); + install_element(VRF_NODE, &ipv6_nht_default_route_cmd); + install_element(VRF_NODE, &no_ipv6_nht_default_route_cmd); + install_element(CONFIG_NODE, &rnh_hide_backups_cmd); + + install_element(VIEW_NODE, &show_frr_cmd); + install_element(VIEW_NODE, &show_evpn_global_cmd); + install_element(VIEW_NODE, &show_evpn_vni_cmd); + install_element(VIEW_NODE, &show_evpn_vni_detail_cmd); + install_element(VIEW_NODE, &show_evpn_vni_vni_cmd); + install_element(VIEW_NODE, &show_evpn_l2_nh_cmd); + install_element(VIEW_NODE, &show_evpn_es_cmd); + install_element(VIEW_NODE, &show_evpn_es_evi_cmd); + install_element(VIEW_NODE, &show_evpn_access_vlan_cmd); + install_element(VIEW_NODE, &show_evpn_rmac_vni_mac_cmd); + install_element(VIEW_NODE, &show_evpn_rmac_vni_cmd); + install_element(VIEW_NODE, &show_evpn_rmac_vni_all_cmd); + install_element(VIEW_NODE, &show_evpn_nh_vni_ip_cmd); + install_element(VIEW_NODE, &show_evpn_nh_vni_cmd); + install_element(VIEW_NODE, &show_evpn_nh_vni_all_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_all_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_all_detail_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_all_vtep_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_mac_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_vtep_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_dad_cmd); + install_element(VIEW_NODE, &show_evpn_mac_vni_all_dad_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_all_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_all_detail_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_neigh_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_vtep_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_dad_cmd); + install_element(VIEW_NODE, &show_evpn_neigh_vni_all_dad_cmd); + install_element(ENABLE_NODE, &clear_evpn_dup_addr_cmd); + + install_element(VIEW_NODE, &show_neigh_cmd); + + install_element(VIEW_NODE, &show_pbr_ipset_cmd); + install_element(VIEW_NODE, &show_pbr_iptable_cmd); + install_element(VIEW_NODE, &show_pbr_rule_cmd); + install_element(CONFIG_NODE, &pbr_nexthop_resolve_cmd); + install_element(VIEW_NODE, &show_route_zebra_dump_cmd); + + install_element(CONFIG_NODE, &evpn_mh_mac_holdtime_cmd); + install_element(CONFIG_NODE, &evpn_mh_neigh_holdtime_cmd); + install_element(CONFIG_NODE, &evpn_mh_startup_delay_cmd); + install_element(CONFIG_NODE, &evpn_mh_redirect_off_cmd); + install_element(CONFIG_NODE, &default_vrf_vni_mapping_cmd); + install_element(CONFIG_NODE, &no_default_vrf_vni_mapping_cmd); + install_element(VRF_NODE, &vrf_vni_mapping_cmd); + install_element(VRF_NODE, &no_vrf_vni_mapping_cmd); + + install_element(VIEW_NODE, &show_dataplane_cmd); + install_element(VIEW_NODE, &show_dataplane_providers_cmd); + install_element(CONFIG_NODE, &zebra_dplane_queue_limit_cmd); + install_element(CONFIG_NODE, &no_zebra_dplane_queue_limit_cmd); + + install_element(CONFIG_NODE, &ip_table_range_cmd); + install_element(VRF_NODE, &ip_table_range_cmd); + +#ifdef HAVE_NETLINK + install_element(CONFIG_NODE, &zebra_kernel_netlink_batch_tx_buf_cmd); + install_element(CONFIG_NODE, &no_zebra_kernel_netlink_batch_tx_buf_cmd); + install_element(CONFIG_NODE, &zebra_protodown_bit_cmd); + install_element(CONFIG_NODE, &no_zebra_protodown_bit_cmd); +#endif /* HAVE_NETLINK */ + +#ifdef HAVE_SCRIPTING + install_element(CONFIG_NODE, &zebra_on_rib_process_script_cmd); +#endif /* HAVE_SCRIPTING */ + + install_element(VIEW_NODE, &zebra_show_routing_tables_summary_cmd); +} diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c new file mode 100644 index 0000000..34cce71 --- /dev/null +++ b/zebra/zebra_vxlan.c @@ -0,0 +1,6291 @@ +/* + * Zebra EVPN for VxLAN code + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "hash.h" +#include "if.h" +#include "jhash.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "stream.h" +#include "table.h" +#include "vlan.h" +#include "vxlan.h" +#ifdef GNU_LINUX +#include <linux/neighbour.h> +#endif +#include "lib/printfrr.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_l2.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/zebra_evpn_neigh.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_evpn_vxlan.h" +#include "zebra/zebra_router.h" + +DEFINE_MTYPE_STATIC(ZEBRA, HOST_PREFIX, "host prefix"); +DEFINE_MTYPE_STATIC(ZEBRA, ZL3VNI, "L3 VNI hash"); +DEFINE_MTYPE_STATIC(ZEBRA, L3VNI_MAC, "EVPN L3VNI MAC"); +DEFINE_MTYPE_STATIC(ZEBRA, L3NEIGH, "EVPN Neighbor"); +DEFINE_MTYPE_STATIC(ZEBRA, ZVXLAN_SG, "zebra VxLAN multicast group"); +DEFINE_MTYPE_STATIC(ZEBRA, EVPN_VTEP, "zebra VxLAN VTEP IP"); + +DEFINE_HOOK(zebra_rmac_update, + (struct zebra_mac * rmac, struct zebra_l3vni *zl3vni, bool delete, + const char *reason), + (rmac, zl3vni, delete, reason)); + +/* static function declarations */ +static void zevpn_print_neigh_hash_all_evpn(struct hash_bucket *bucket, + void **args); +static void zl3vni_print_nh(struct zebra_neigh *n, struct vty *vty, + json_object *json); +static void zl3vni_print_rmac(struct zebra_mac *zrmac, struct vty *vty, + json_object *json); +static void zevpn_print_mac_hash_all_evpn(struct hash_bucket *bucket, void *ctxt); + +/* l3-vni next-hop neigh related APIs */ +static struct zebra_neigh *zl3vni_nh_lookup(struct zebra_l3vni *zl3vni, + const struct ipaddr *ip); +static void *zl3vni_nh_alloc(void *p); +static struct zebra_neigh *zl3vni_nh_add(struct zebra_l3vni *zl3vni, + const struct ipaddr *vtep_ip, + const struct ethaddr *rmac); +static int zl3vni_nh_del(struct zebra_l3vni *zl3vni, struct zebra_neigh *n); +static int zl3vni_nh_install(struct zebra_l3vni *zl3vni, struct zebra_neigh *n); +static int zl3vni_nh_uninstall(struct zebra_l3vni *zl3vni, + struct zebra_neigh *n); + +/* l3-vni rmac related APIs */ +static void zl3vni_print_rmac_hash(struct hash_bucket *, void *); +static struct zebra_mac *zl3vni_rmac_lookup(struct zebra_l3vni *zl3vni, + const struct ethaddr *rmac); +static void *zl3vni_rmac_alloc(void *p); +static struct zebra_mac *zl3vni_rmac_add(struct zebra_l3vni *zl3vni, + const struct ethaddr *rmac); +static int zl3vni_rmac_del(struct zebra_l3vni *zl3vni, struct zebra_mac *zrmac); +static int zl3vni_rmac_install(struct zebra_l3vni *zl3vni, + struct zebra_mac *zrmac); +static int zl3vni_rmac_uninstall(struct zebra_l3vni *zl3vni, + struct zebra_mac *zrmac); + +/* l3-vni related APIs*/ +static void *zl3vni_alloc(void *p); +static struct zebra_l3vni *zl3vni_add(vni_t vni, vrf_id_t vrf_id); +static int zl3vni_del(struct zebra_l3vni *zl3vni); +static void zebra_vxlan_process_l3vni_oper_up(struct zebra_l3vni *zl3vni); +static void zebra_vxlan_process_l3vni_oper_down(struct zebra_l3vni *zl3vni); + +static void zevpn_build_hash_table(void); +static unsigned int zebra_vxlan_sg_hash_key_make(const void *p); +static bool zebra_vxlan_sg_hash_eq(const void *p1, const void *p2); +static void zebra_vxlan_sg_do_deref(struct zebra_vrf *zvrf, + struct in_addr sip, struct in_addr mcast_grp); +static struct zebra_vxlan_sg *zebra_vxlan_sg_do_ref(struct zebra_vrf *vrf, + struct in_addr sip, + struct in_addr mcast_grp); +static void zebra_vxlan_sg_deref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp); +static void zebra_vxlan_sg_ref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp); +static void zebra_vxlan_cleanup_sg_table(struct zebra_vrf *zvrf); + +bool zebra_evpn_do_dup_addr_detect(struct zebra_vrf *zvrf) +{ + return zvrf->dup_addr_detect && zebra_evpn_mh_do_dup_addr_detect(); +} + +/* Private functions */ +static int host_rb_entry_compare(const struct host_rb_entry *hle1, + const struct host_rb_entry *hle2) +{ + if (hle1->p.family < hle2->p.family) + return -1; + + if (hle1->p.family > hle2->p.family) + return 1; + + if (hle1->p.prefixlen < hle2->p.prefixlen) + return -1; + + if (hle1->p.prefixlen > hle2->p.prefixlen) + return 1; + + if (hle1->p.family == AF_INET) { + if (hle1->p.u.prefix4.s_addr < hle2->p.u.prefix4.s_addr) + return -1; + + if (hle1->p.u.prefix4.s_addr > hle2->p.u.prefix4.s_addr) + return 1; + + return 0; + } else if (hle1->p.family == AF_INET6) { + return memcmp(&hle1->p.u.prefix6, &hle2->p.u.prefix6, + IPV6_MAX_BYTELEN); + } else if (hle1->p.family == AF_EVPN) { + uint8_t family1; + uint8_t family2; + + /* two (v4/v6) dummy prefixes of route_type BGP_EVPN_AD_ROUTE + * are used for all nexthops associated with a non-zero ESI + */ + family1 = is_evpn_prefix_ipaddr_v4( + (const struct prefix_evpn *)&hle1->p) + ? AF_INET + : AF_INET6; + family2 = is_evpn_prefix_ipaddr_v4( + (const struct prefix_evpn *)&hle2->p) + ? AF_INET + : AF_INET6; + + + if (family1 < family2) + return -1; + + if (family1 > family2) + return 1; + + return 0; + } else { + zlog_debug("%s: Unexpected family type: %d", __func__, + hle1->p.family); + return 0; + } +} +RB_GENERATE(host_rb_tree_entry, host_rb_entry, hl_entry, host_rb_entry_compare); + +static uint32_t rb_host_count(struct host_rb_tree_entry *hrbe) +{ + struct host_rb_entry *hle; + uint32_t count = 0; + + RB_FOREACH (hle, host_rb_tree_entry, hrbe) + count++; + + return count; +} + +static int l3vni_rmac_nh_list_cmp(void *p1, void *p2) +{ + const struct ipaddr *vtep_ip1 = p1; + const struct ipaddr *vtep_ip2 = p2; + + return !ipaddr_cmp(vtep_ip1, vtep_ip2); +} + +static void l3vni_rmac_nh_free(struct ipaddr *vtep_ip) +{ + XFREE(MTYPE_EVPN_VTEP, vtep_ip); +} + +static void l3vni_rmac_nh_list_nh_delete(struct zebra_l3vni *zl3vni, + struct zebra_mac *zrmac, + struct ipaddr *vtep_ip) +{ + struct listnode *node = NULL, *nnode = NULL; + struct ipaddr *vtep = NULL; + + for (ALL_LIST_ELEMENTS(zrmac->nh_list, node, nnode, vtep)) { + if (ipaddr_cmp(vtep, vtep_ip) == 0) + break; + } + + if (node) { + l3vni_rmac_nh_free(vtep); + list_delete_node(zrmac->nh_list, node); + } +} + +/* + * Print neighbors for all EVPN. + */ +static void zevpn_print_neigh_hash_all_evpn(struct hash_bucket *bucket, + void **args) +{ + struct vty *vty; + json_object *json = NULL, *json_evpn = NULL; + struct zebra_evpn *zevpn; + uint32_t num_neigh; + struct neigh_walk_ctx wctx; + char vni_str[VNI_STR_LEN]; + uint32_t print_dup; + + vty = (struct vty *)args[0]; + json = (json_object *)args[1]; + print_dup = (uint32_t)(uintptr_t)args[2]; + + zevpn = (struct zebra_evpn *)bucket->data; + + num_neigh = hashcount(zevpn->neigh_table); + + if (print_dup) + num_neigh = num_dup_detected_neighs(zevpn); + + if (json == NULL) { + vty_out(vty, + "\nVNI %u #ARP (IPv4 and IPv6, local and remote) %u\n\n", + zevpn->vni, num_neigh); + } else { + json_evpn = json_object_new_object(); + json_object_int_add(json_evpn, "numArpNd", num_neigh); + snprintf(vni_str, VNI_STR_LEN, "%u", zevpn->vni); + } + + if (!num_neigh) { + if (json) + json_object_object_add(json, vni_str, json_evpn); + return; + } + + /* Since we have IPv6 addresses to deal with which can vary widely in + * size, we try to be a bit more elegant in display by first computing + * the maximum width. + */ + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.addr_width = 15; + wctx.json = json_evpn; + hash_iterate(zevpn->neigh_table, zebra_evpn_find_neigh_addr_width, + &wctx); + + if (json == NULL) + zebra_evpn_print_neigh_hdr(vty, &wctx); + + if (print_dup) + hash_iterate(zevpn->neigh_table, + zebra_evpn_print_dad_neigh_hash, &wctx); + else + hash_iterate(zevpn->neigh_table, zebra_evpn_print_neigh_hash, + &wctx); + + if (json) + json_object_object_add(json, vni_str, json_evpn); +} + +/* + * Print neighbors for all EVPNs in detail. + */ +static void zevpn_print_neigh_hash_all_evpn_detail(struct hash_bucket *bucket, + void **args) +{ + struct vty *vty; + json_object *json = NULL, *json_evpn = NULL; + struct zebra_evpn *zevpn; + uint32_t num_neigh; + struct neigh_walk_ctx wctx; + char vni_str[VNI_STR_LEN]; + uint32_t print_dup; + + vty = (struct vty *)args[0]; + json = (json_object *)args[1]; + print_dup = (uint32_t)(uintptr_t)args[2]; + + zevpn = (struct zebra_evpn *)bucket->data; + if (!zevpn) { + if (json) + vty_out(vty, "{}\n"); + return; + } + num_neigh = hashcount(zevpn->neigh_table); + + if (print_dup && num_dup_detected_neighs(zevpn) == 0) + return; + + if (json == NULL) { + vty_out(vty, + "\nVNI %u #ARP (IPv4 and IPv6, local and remote) %u\n\n", + zevpn->vni, num_neigh); + } else { + json_evpn = json_object_new_object(); + json_object_int_add(json_evpn, "numArpNd", num_neigh); + snprintf(vni_str, VNI_STR_LEN, "%u", zevpn->vni); + } + if (!num_neigh) { + if (json) + json_object_object_add(json, vni_str, json_evpn); + return; + } + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.addr_width = 15; + wctx.json = json_evpn; + + if (print_dup) + hash_iterate(zevpn->neigh_table, + zebra_evpn_print_dad_neigh_hash_detail, &wctx); + else + hash_iterate(zevpn->neigh_table, + zebra_evpn_print_neigh_hash_detail, &wctx); + + if (json) + json_object_object_add(json, vni_str, json_evpn); +} + +/* print a specific next hop for an l3vni */ +static void zl3vni_print_nh(struct zebra_neigh *n, struct vty *vty, + json_object *json) +{ + char buf1[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + json_object *json_hosts = NULL; + struct host_rb_entry *hle; + + if (!json) { + vty_out(vty, "Ip: %s\n", + ipaddr2str(&n->ip, buf2, sizeof(buf2))); + vty_out(vty, " RMAC: %s\n", + prefix_mac2str(&n->emac, buf1, sizeof(buf1))); + vty_out(vty, " Refcount: %d\n", + rb_host_count(&n->host_rb)); + vty_out(vty, " Prefixes:\n"); + RB_FOREACH (hle, host_rb_tree_entry, &n->host_rb) + vty_out(vty, " %pFX\n", &hle->p); + } else { + json_hosts = json_object_new_array(); + json_object_string_add( + json, "ip", ipaddr2str(&(n->ip), buf2, sizeof(buf2))); + json_object_string_add( + json, "routerMac", + prefix_mac2str(&n->emac, buf2, sizeof(buf2))); + json_object_int_add(json, "refCount", + rb_host_count(&n->host_rb)); + RB_FOREACH (hle, host_rb_tree_entry, &n->host_rb) + json_object_array_add(json_hosts, + json_object_new_string(prefix2str( + &hle->p, buf2, sizeof(buf2)))); + json_object_object_add(json, "prefixList", json_hosts); + } +} + +/* Print a specific RMAC entry */ +static void zl3vni_print_rmac(struct zebra_mac *zrmac, struct vty *vty, + json_object *json) +{ + struct listnode *node = NULL; + struct ipaddr *vtep = NULL; + json_object *json_nhs = NULL; + + if (!json) { + vty_out(vty, "MAC: %pEA\n", &zrmac->macaddr); + vty_out(vty, " Remote VTEP: %pI4\n", + &zrmac->fwd_info.r_vtep_ip); + } else { + json_nhs = json_object_new_array(); + json_object_string_addf(json, "routerMac", "%pEA", + &zrmac->macaddr); + json_object_string_addf(json, "vtepIp", "%pI4", + &zrmac->fwd_info.r_vtep_ip); + for (ALL_LIST_ELEMENTS_RO(zrmac->nh_list, node, vtep)) { + json_object_array_add(json_nhs, json_object_new_stringf( + "%pIA", vtep)); + } + json_object_object_add(json, "nexthops", json_nhs); + } +} + +/* + * Print MACs for all EVPNs. + */ +static void zevpn_print_mac_hash_all_evpn(struct hash_bucket *bucket, void *ctxt) +{ + struct vty *vty; + json_object *json = NULL, *json_evpn = NULL; + json_object *json_mac = NULL; + struct zebra_evpn *zevpn; + uint32_t num_macs; + struct mac_walk_ctx *wctx = ctxt; + char vni_str[VNI_STR_LEN]; + + vty = wctx->vty; + json = wctx->json; + + zevpn = (struct zebra_evpn *)bucket->data; + wctx->zevpn = zevpn; + + /*We are iterating over a new VNI, set the count to 0*/ + wctx->count = 0; + + num_macs = num_valid_macs(zevpn); + if (!num_macs) + return; + + if (wctx->print_dup) + num_macs = num_dup_detected_macs(zevpn); + + if (json) { + json_evpn = json_object_new_object(); + json_mac = json_object_new_object(); + snprintf(vni_str, VNI_STR_LEN, "%u", zevpn->vni); + } + + if (!CHECK_FLAG(wctx->flags, SHOW_REMOTE_MAC_FROM_VTEP)) { + if (json == NULL) { + vty_out(vty, "\nVNI %u #MACs (local and remote) %u\n\n", + zevpn->vni, num_macs); + vty_out(vty, + "Flags: N=sync-neighs, I=local-inactive, P=peer-active, X=peer-proxy\n"); + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %s\n", "MAC", + "Type", "Flags", "Intf/Remote ES/VTEP", + "VLAN", "Seq #'s"); + } else + json_object_int_add(json_evpn, "numMacs", num_macs); + } + + if (!num_macs) { + if (json) { + json_object_int_add(json_evpn, "numMacs", num_macs); + json_object_object_add(json, vni_str, json_evpn); + } + return; + } + + /* assign per-evpn to wctx->json object to fill macs + * under the evpn. Re-assign primary json object to fill + * next evpn information. + */ + wctx->json = json_mac; + if (wctx->print_dup) + hash_iterate(zevpn->mac_table, zebra_evpn_print_dad_mac_hash, + wctx); + else + hash_iterate(zevpn->mac_table, zebra_evpn_print_mac_hash, wctx); + wctx->json = json; + if (json) { + if (wctx->count) + json_object_object_add(json_evpn, "macs", json_mac); + json_object_object_add(json, vni_str, json_evpn); + } +} + +/* + * Print MACs in detail for all EVPNs. + */ +static void zevpn_print_mac_hash_all_evpn_detail(struct hash_bucket *bucket, + void *ctxt) +{ + struct vty *vty; + json_object *json = NULL, *json_evpn = NULL; + json_object *json_mac = NULL; + struct zebra_evpn *zevpn; + uint32_t num_macs; + struct mac_walk_ctx *wctx = ctxt; + char vni_str[VNI_STR_LEN]; + + vty = wctx->vty; + json = wctx->json; + + zevpn = (struct zebra_evpn *)bucket->data; + if (!zevpn) { + if (json) + vty_out(vty, "{}\n"); + return; + } + wctx->zevpn = zevpn; + + /*We are iterating over a new EVPN, set the count to 0*/ + wctx->count = 0; + + num_macs = num_valid_macs(zevpn); + if (!num_macs) + return; + + if (wctx->print_dup && (num_dup_detected_macs(zevpn) == 0)) + return; + + if (json) { + json_evpn = json_object_new_object(); + json_mac = json_object_new_object(); + snprintf(vni_str, VNI_STR_LEN, "%u", zevpn->vni); + } + + if (!CHECK_FLAG(wctx->flags, SHOW_REMOTE_MAC_FROM_VTEP)) { + if (json == NULL) { + vty_out(vty, "\nVNI %u #MACs (local and remote) %u\n\n", + zevpn->vni, num_macs); + } else + json_object_int_add(json_evpn, "numMacs", num_macs); + } + /* assign per-evpn to wctx->json object to fill macs + * under the evpn. Re-assign primary json object to fill + * next evpn information. + */ + wctx->json = json_mac; + if (wctx->print_dup) + hash_iterate(zevpn->mac_table, + zebra_evpn_print_dad_mac_hash_detail, wctx); + else + hash_iterate(zevpn->mac_table, zebra_evpn_print_mac_hash_detail, + wctx); + wctx->json = json; + if (json) { + if (wctx->count) + json_object_object_add(json_evpn, "macs", json_mac); + json_object_object_add(json, vni_str, json_evpn); + } +} + +static void zl3vni_print_nh_hash(struct hash_bucket *bucket, void *ctx) +{ + struct nh_walk_ctx *wctx = NULL; + struct vty *vty = NULL; + struct json_object *json_evpn = NULL; + struct json_object *json_nh = NULL; + struct zebra_neigh *n = NULL; + char buf1[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + + wctx = (struct nh_walk_ctx *)ctx; + vty = wctx->vty; + json_evpn = wctx->json; + if (json_evpn) + json_nh = json_object_new_object(); + n = (struct zebra_neigh *)bucket->data; + + if (!json_evpn) { + vty_out(vty, "%-15s %-17s\n", + ipaddr2str(&(n->ip), buf2, sizeof(buf2)), + prefix_mac2str(&n->emac, buf1, sizeof(buf1))); + } else { + json_object_string_add(json_nh, "nexthopIp", + ipaddr2str(&n->ip, buf2, sizeof(buf2))); + json_object_string_add( + json_nh, "routerMac", + prefix_mac2str(&n->emac, buf1, sizeof(buf1))); + json_object_object_add(json_evpn, + ipaddr2str(&(n->ip), buf2, sizeof(buf2)), + json_nh); + } +} + +static void zl3vni_print_nh_hash_all_vni(struct hash_bucket *bucket, + void **args) +{ + struct vty *vty = NULL; + json_object *json = NULL; + json_object *json_evpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + uint32_t num_nh = 0; + struct nh_walk_ctx wctx; + char vni_str[VNI_STR_LEN]; + + vty = (struct vty *)args[0]; + json = (struct json_object *)args[1]; + + zl3vni = (struct zebra_l3vni *)bucket->data; + + num_nh = hashcount(zl3vni->nh_table); + if (!num_nh) + return; + + if (json) { + json_evpn = json_object_new_object(); + snprintf(vni_str, VNI_STR_LEN, "%u", zl3vni->vni); + } + + if (json == NULL) { + vty_out(vty, "\nVNI %u #Next-Hops %u\n\n", zl3vni->vni, num_nh); + vty_out(vty, "%-15s %-17s\n", "IP", "RMAC"); + } else + json_object_int_add(json_evpn, "numNextHops", num_nh); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json_evpn; + hash_iterate(zl3vni->nh_table, zl3vni_print_nh_hash, &wctx); + if (json) + json_object_object_add(json, vni_str, json_evpn); +} + +static void zl3vni_print_rmac_hash_all_vni(struct hash_bucket *bucket, + void **args) +{ + struct vty *vty = NULL; + json_object *json = NULL; + json_object *json_evpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + uint32_t num_rmacs; + struct rmac_walk_ctx wctx; + char vni_str[VNI_STR_LEN]; + + vty = (struct vty *)args[0]; + json = (struct json_object *)args[1]; + + zl3vni = (struct zebra_l3vni *)bucket->data; + + num_rmacs = hashcount(zl3vni->rmac_table); + if (!num_rmacs) + return; + + if (json) { + json_evpn = json_object_new_object(); + snprintf(vni_str, VNI_STR_LEN, "%u", zl3vni->vni); + } + + if (json == NULL) { + vty_out(vty, "\nVNI %u #RMACs %u\n\n", zl3vni->vni, num_rmacs); + vty_out(vty, "%-17s %-21s\n", "RMAC", "Remote VTEP"); + } else + json_object_int_add(json_evpn, "numRmacs", num_rmacs); + + /* assign per-vni to wctx->json object to fill macs + * under the vni. Re-assign primary json object to fill + * next vni information. + */ + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json_evpn; + hash_iterate(zl3vni->rmac_table, zl3vni_print_rmac_hash, &wctx); + if (json) + json_object_object_add(json, vni_str, json_evpn); +} + +static void zl3vni_print_rmac_hash(struct hash_bucket *bucket, void *ctx) +{ + struct zebra_mac *zrmac = NULL; + struct rmac_walk_ctx *wctx = NULL; + struct vty *vty = NULL; + struct json_object *json = NULL; + struct json_object *json_rmac = NULL; + char buf[PREFIX_STRLEN]; + + wctx = (struct rmac_walk_ctx *)ctx; + vty = wctx->vty; + json = wctx->json; + if (json) + json_rmac = json_object_new_object(); + zrmac = (struct zebra_mac *)bucket->data; + + if (!json) { + vty_out(vty, "%-17s %-21pI4\n", + prefix_mac2str(&zrmac->macaddr, buf, sizeof(buf)), + &zrmac->fwd_info.r_vtep_ip); + } else { + json_object_string_add( + json_rmac, "routerMac", + prefix_mac2str(&zrmac->macaddr, buf, sizeof(buf))); + json_object_string_addf(json_rmac, "vtepIp", "%pI4", + &zrmac->fwd_info.r_vtep_ip); + json_object_object_add( + json, prefix_mac2str(&zrmac->macaddr, buf, sizeof(buf)), + json_rmac); + } +} + +/* print a specific L3 VNI entry */ +static void zl3vni_print(struct zebra_l3vni *zl3vni, void **ctx) +{ + char buf[PREFIX_STRLEN]; + struct vty *vty = NULL; + json_object *json = NULL; + struct zebra_evpn *zevpn = NULL; + json_object *json_evpn_list = NULL; + struct listnode *node = NULL, *nnode = NULL; + + vty = ctx[0]; + json = ctx[1]; + + if (!json) { + vty_out(vty, "VNI: %u\n", zl3vni->vni); + vty_out(vty, " Type: %s\n", "L3"); + vty_out(vty, " Tenant VRF: %s\n", zl3vni_vrf_name(zl3vni)); + vty_out(vty, " Local Vtep Ip: %pI4\n", + &zl3vni->local_vtep_ip); + vty_out(vty, " Vxlan-Intf: %s\n", + zl3vni_vxlan_if_name(zl3vni)); + vty_out(vty, " SVI-If: %s\n", zl3vni_svi_if_name(zl3vni)); + vty_out(vty, " State: %s\n", zl3vni_state2str(zl3vni)); + vty_out(vty, " VNI Filter: %s\n", + CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY) + ? "prefix-routes-only" + : "none"); + vty_out(vty, " System MAC: %s\n", + zl3vni_sysmac2str(zl3vni, buf, sizeof(buf))); + vty_out(vty, " Router MAC: %s\n", + zl3vni_rmac2str(zl3vni, buf, sizeof(buf))); + vty_out(vty, " L2 VNIs: "); + for (ALL_LIST_ELEMENTS(zl3vni->l2vnis, node, nnode, zevpn)) + vty_out(vty, "%u ", zevpn->vni); + vty_out(vty, "\n"); + } else { + json_evpn_list = json_object_new_array(); + json_object_int_add(json, "vni", zl3vni->vni); + json_object_string_add(json, "type", "L3"); + json_object_string_addf(json, "localVtepIp", "%pI4", + &zl3vni->local_vtep_ip); + json_object_string_add(json, "vxlanIntf", + zl3vni_vxlan_if_name(zl3vni)); + json_object_string_add(json, "sviIntf", + zl3vni_svi_if_name(zl3vni)); + json_object_string_add(json, "state", zl3vni_state2str(zl3vni)); + json_object_string_add(json, "vrf", zl3vni_vrf_name(zl3vni)); + json_object_string_add( + json, "sysMac", + zl3vni_sysmac2str(zl3vni, buf, sizeof(buf))); + json_object_string_add( + json, "routerMac", + zl3vni_rmac2str(zl3vni, buf, sizeof(buf))); + json_object_string_add( + json, "vniFilter", + CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY) + ? "prefix-routes-only" + : "none"); + for (ALL_LIST_ELEMENTS(zl3vni->l2vnis, node, nnode, zevpn)) { + json_object_array_add(json_evpn_list, + json_object_new_int(zevpn->vni)); + } + json_object_object_add(json, "l2Vnis", json_evpn_list); + } +} + +/* print a L3 VNI hash entry */ +static void zl3vni_print_hash(struct hash_bucket *bucket, void *ctx[]) +{ + struct vty *vty = NULL; + json_object *json = NULL; + json_object *json_evpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + vty = (struct vty *)ctx[0]; + json = (json_object *)ctx[1]; + + zl3vni = (struct zebra_l3vni *)bucket->data; + + if (!json) { + vty_out(vty, "%-10u %-4s %-21s %-8lu %-8lu %-15s %-37s\n", + zl3vni->vni, "L3", zl3vni_vxlan_if_name(zl3vni), + hashcount(zl3vni->rmac_table), + hashcount(zl3vni->nh_table), "n/a", + zl3vni_vrf_name(zl3vni)); + } else { + char vni_str[VNI_STR_LEN]; + + snprintf(vni_str, VNI_STR_LEN, "%u", zl3vni->vni); + json_evpn = json_object_new_object(); + json_object_int_add(json_evpn, "vni", zl3vni->vni); + json_object_string_add(json_evpn, "vxlanIf", + zl3vni_vxlan_if_name(zl3vni)); + json_object_int_add(json_evpn, "numMacs", + hashcount(zl3vni->rmac_table)); + json_object_int_add(json_evpn, "numArpNd", + hashcount(zl3vni->nh_table)); + json_object_string_add(json_evpn, "numRemoteVteps", "n/a"); + json_object_string_add(json_evpn, "type", "L3"); + json_object_string_add(json_evpn, "tenantVrf", + zl3vni_vrf_name(zl3vni)); + json_object_object_add(json, vni_str, json_evpn); + } +} + +/* print a L3 VNI hash entry in detail*/ +static void zl3vni_print_hash_detail(struct hash_bucket *bucket, void *data) +{ + struct vty *vty = NULL; + struct zebra_l3vni *zl3vni = NULL; + json_object *json_array = NULL; + bool use_json = false; + struct zebra_evpn_show *zes = data; + + vty = zes->vty; + json_array = zes->json; + use_json = zes->use_json; + + zl3vni = (struct zebra_l3vni *)bucket->data; + + zebra_vxlan_print_vni(vty, zes->zvrf, zl3vni->vni, + use_json, json_array); + + if (!use_json) + vty_out(vty, "\n"); +} + +static int zvni_map_to_svi_ns(struct ns *ns, + void *_in_param, + void **_p_ifp) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct zebra_from_svi_param *in_param = + (struct zebra_from_svi_param *)_in_param; + struct zebra_l2info_vlan *vl; + struct interface *tmp_if = NULL; + struct interface **p_ifp = (struct interface **)_p_ifp; + struct zebra_if *zif; + + assert(in_param && p_ifp); + + /* TODO: Optimize with a hash. */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + /* Check oper status of the SVI. */ + if (!tmp_if || !if_is_operative(tmp_if)) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VLAN + || zif->link != in_param->br_if) + continue; + vl = (struct zebra_l2info_vlan *)&zif->l2info.vl; + + if (vl->vid == in_param->vid) { + *p_ifp = tmp_if; + return NS_WALK_STOP; + } + } + return NS_WALK_CONTINUE; +} + +/* Map to SVI on bridge corresponding to specified VLAN. This can be one + * of two cases: + * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN interface + * linked to the bridge + * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge interface + * itself + */ +struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) +{ + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct zebra_l2info_bridge *br; + struct zebra_from_svi_param in_param; + struct interface **p_ifp; + /* Defensive check, caller expected to invoke only with valid bridge. */ + if (!br_if) + return NULL; + + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; + assert(zif); + br = &zif->l2info.br; + in_param.bridge_vlan_aware = br->vlan_aware; + /* Check oper status of the SVI. */ + if (!in_param.bridge_vlan_aware) + return if_is_operative(br_if) ? br_if : NULL; + + in_param.vid = vid; + in_param.br_if = br_if; + in_param.zif = NULL; + p_ifp = &tmp_if; + /* Identify corresponding VLAN interface. */ + ns_walk_func(zvni_map_to_svi_ns, (void *)&in_param, + (void **)p_ifp); + return tmp_if; +} + +static int zebra_evpn_vxlan_del(struct zebra_evpn *zevpn) +{ + zevpn_vxlan_if_set(zevpn, zevpn->vxlan_if, false /* set */); + + /* Remove references to the BUM mcast grp */ + zebra_vxlan_sg_deref(zevpn->local_vtep_ip, zevpn->mcast_grp); + + return zebra_evpn_del(zevpn); +} + +static int zevpn_build_hash_table_zns(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct interface *ifp; + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + + /* Walk VxLAN interfaces and create EVPN hash. */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + vni_t vni; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl; + + ifp = (struct interface *)rn->info; + if (!ifp) + continue; + zif = ifp->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + + vxl = &zif->l2info.vxl; + vni = vxl->vni; + /* link of VXLAN interface should be in zebra_evpn_vrf */ + if (zvrf->zns->ns_id != vxl->link_nsid) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Intf %s(%u) VNI %u, link not in same " + "namespace than BGP EVPN core instance ", + ifp->name, ifp->ifindex, vni); + continue; + } + /* L3-VNI and L2-VNI are handled seperately */ + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "create L3-VNI hash for Intf %s(%u) L3-VNI %u", + ifp->name, ifp->ifindex, vni); + + /* associate with vxlan_if */ + zl3vni->local_vtep_ip = vxl->vtep_ip; + zl3vni->vxlan_if = ifp; + + /* + * we need to associate with SVI. + * we can associate with svi-if only after association + * with vxlan-intf is complete + */ + zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + + /* Associate l3vni to mac-vlan and extract VRR MAC */ + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("create l3vni %u svi_if %s mac_vlan_if %s", + vni, zl3vni->svi_if ? zl3vni->svi_if->name + : "NIL", + zl3vni->mac_vlan_if ? + zl3vni->mac_vlan_if->name : "NIL"); + + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + + } else { + struct interface *vlan_if = NULL; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Create L2-VNI hash for intf %s(%u) L2-VNI %u local IP %pI4", + ifp->name, ifp->ifindex, vni, + &vxl->vtep_ip); + + /* EVPN hash entry is expected to exist, if the BGP process is killed */ + zevpn = zebra_evpn_lookup(vni); + if (zevpn) { + zlog_debug( + "EVPN hash already present for IF %s(%u) L2-VNI %u", + ifp->name, ifp->ifindex, vni); + + /* + * Inform BGP if intf is up and mapped to + * bridge. + */ + if (if_is_operative(ifp) && + zif->brslave_info.br_if) + zebra_evpn_send_add_to_client(zevpn); + + /* Send Local MAC-entries to client */ + zebra_evpn_send_mac_list_to_client(zevpn); + + /* Send Loval Neighbor entries to client */ + zebra_evpn_send_neigh_to_client(zevpn); + } else { + zevpn = zebra_evpn_add(vni); + if (!zevpn) { + zlog_debug( + "Failed to add EVPN hash, IF %s(%u) L2-VNI %u", + ifp->name, ifp->ifindex, vni); + return NS_WALK_CONTINUE; + } + + if (zevpn->local_vtep_ip.s_addr != + vxl->vtep_ip.s_addr || + zevpn->mcast_grp.s_addr != + vxl->mcast_grp.s_addr) { + zebra_vxlan_sg_deref( + zevpn->local_vtep_ip, + zevpn->mcast_grp); + zebra_vxlan_sg_ref(vxl->vtep_ip, + vxl->mcast_grp); + zevpn->local_vtep_ip = vxl->vtep_ip; + zevpn->mcast_grp = vxl->mcast_grp; + /* on local vtep-ip check if ES + * orig-ip needs to be updated + */ + zebra_evpn_es_set_base_evpn(zevpn); + } + zevpn_vxlan_if_set(zevpn, ifp, true /* set */); + vlan_if = zvni_map_to_svi( + vxl->access_vlan, + zif->brslave_info.br_if); + if (vlan_if) { + zevpn->svi_if = vlan_if; + zevpn->vrf_id = vlan_if->vrf->vrf_id; + zl3vni = zl3vni_from_vrf( + vlan_if->vrf->vrf_id); + if (zl3vni) + listnode_add_sort( + zl3vni->l2vnis, zevpn); + } + + /* + * Inform BGP if intf is up and mapped to + * bridge. + */ + if (if_is_operative(ifp) && + zif->brslave_info.br_if) + zebra_evpn_send_add_to_client(zevpn); + } + } + } + return NS_WALK_CONTINUE; +} + +/* + * Build the VNI hash table by going over the VxLAN interfaces. This + * is called when EVPN (advertise-all-vni) is enabled. + */ + +static void zevpn_build_hash_table(void) +{ + ns_walk_func(zevpn_build_hash_table_zns, NULL, NULL); +} + +/* + * Cleanup EVPN/VTEP and update kernel + */ +static void zebra_evpn_vxlan_cleanup_all(struct hash_bucket *bucket, void *arg) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + zevpn = (struct zebra_evpn *)bucket->data; + + /* remove l2vni from l2vni's tenant-vrf l3-vni list */ + zl3vni = zl3vni_from_vrf(zevpn->vrf_id); + if (zl3vni) + listnode_delete(zl3vni->l2vnis, zevpn); + + zebra_evpn_cleanup_all(bucket, arg); +} + +/* cleanup L3VNI */ +static void zl3vni_cleanup_all(struct hash_bucket *bucket, void *args) +{ + struct zebra_l3vni *zl3vni = NULL; + + zl3vni = (struct zebra_l3vni *)bucket->data; + + zebra_vxlan_process_l3vni_oper_down(zl3vni); +} + +static void rb_find_or_add_host(struct host_rb_tree_entry *hrbe, + const struct prefix *host) +{ + struct host_rb_entry lookup; + struct host_rb_entry *hle; + + memset(&lookup, 0, sizeof(lookup)); + memcpy(&lookup.p, host, sizeof(*host)); + + hle = RB_FIND(host_rb_tree_entry, hrbe, &lookup); + if (hle) + return; + + hle = XCALLOC(MTYPE_HOST_PREFIX, sizeof(struct host_rb_entry)); + memcpy(hle, &lookup, sizeof(lookup)); + + RB_INSERT(host_rb_tree_entry, hrbe, hle); +} + +static void rb_delete_host(struct host_rb_tree_entry *hrbe, struct prefix *host) +{ + struct host_rb_entry lookup; + struct host_rb_entry *hle; + + memset(&lookup, 0, sizeof(lookup)); + memcpy(&lookup.p, host, sizeof(*host)); + + hle = RB_FIND(host_rb_tree_entry, hrbe, &lookup); + if (hle) { + RB_REMOVE(host_rb_tree_entry, hrbe, hle); + XFREE(MTYPE_HOST_PREFIX, hle); + } + + return; +} + +/* + * Look up MAC hash entry. + */ +static struct zebra_mac *zl3vni_rmac_lookup(struct zebra_l3vni *zl3vni, + const struct ethaddr *rmac) +{ + struct zebra_mac tmp; + struct zebra_mac *pmac; + + memset(&tmp, 0, sizeof(tmp)); + memcpy(&tmp.macaddr, rmac, ETH_ALEN); + pmac = hash_lookup(zl3vni->rmac_table, &tmp); + + return pmac; +} + +/* + * Callback to allocate RMAC hash entry. + */ +static void *zl3vni_rmac_alloc(void *p) +{ + const struct zebra_mac *tmp_rmac = p; + struct zebra_mac *zrmac; + + zrmac = XCALLOC(MTYPE_L3VNI_MAC, sizeof(struct zebra_mac)); + *zrmac = *tmp_rmac; + + return ((void *)zrmac); +} + +/* + * Add RMAC entry to l3-vni + */ +static struct zebra_mac *zl3vni_rmac_add(struct zebra_l3vni *zl3vni, + const struct ethaddr *rmac) +{ + struct zebra_mac tmp_rmac; + struct zebra_mac *zrmac = NULL; + + memset(&tmp_rmac, 0, sizeof(tmp_rmac)); + memcpy(&tmp_rmac.macaddr, rmac, ETH_ALEN); + zrmac = hash_get(zl3vni->rmac_table, &tmp_rmac, zl3vni_rmac_alloc); + zrmac->nh_list = list_new(); + zrmac->nh_list->cmp = (int (*)(void *, void *))l3vni_rmac_nh_list_cmp; + zrmac->nh_list->del = (void (*)(void *))l3vni_rmac_nh_free; + + SET_FLAG(zrmac->flags, ZEBRA_MAC_REMOTE); + SET_FLAG(zrmac->flags, ZEBRA_MAC_REMOTE_RMAC); + + return zrmac; +} + +/* + * Delete MAC entry. + */ +static int zl3vni_rmac_del(struct zebra_l3vni *zl3vni, struct zebra_mac *zrmac) +{ + struct zebra_mac *tmp_rmac; + + /* free the list of nh list*/ + list_delete(&zrmac->nh_list); + + tmp_rmac = hash_release(zl3vni->rmac_table, zrmac); + XFREE(MTYPE_L3VNI_MAC, tmp_rmac); + + return 0; +} + +/* + * Install remote RMAC into the forwarding plane. + */ +static int zl3vni_rmac_install(struct zebra_l3vni *zl3vni, + struct zebra_mac *zrmac) +{ + const struct zebra_if *zif = NULL, *br_zif = NULL; + const struct zebra_l2info_vxlan *vxl = NULL; + const struct interface *br_ifp; + enum zebra_dplane_result res; + vlanid_t vid; + + if (!(CHECK_FLAG(zrmac->flags, ZEBRA_MAC_REMOTE)) + || !(CHECK_FLAG(zrmac->flags, ZEBRA_MAC_REMOTE_RMAC))) + return 0; + + zif = zl3vni->vxlan_if->info; + if (!zif) + return -1; + + br_ifp = zif->brslave_info.br_if; + if (br_ifp == NULL) + return -1; + + vxl = &zif->l2info.vxl; + + br_zif = (const struct zebra_if *)br_ifp->info; + + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) + vid = vxl->access_vlan; + else + vid = 0; + + res = dplane_rem_mac_add(zl3vni->vxlan_if, br_ifp, vid, + &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0, 0, + false /*was_static*/); + if (res != ZEBRA_DPLANE_REQUEST_FAILURE) + return 0; + else + return -1; +} + +/* + * Uninstall remote RMAC from the forwarding plane. + */ +static int zl3vni_rmac_uninstall(struct zebra_l3vni *zl3vni, + struct zebra_mac *zrmac) +{ + const struct zebra_if *zif = NULL, *br_zif; + const struct zebra_l2info_vxlan *vxl = NULL; + const struct interface *br_ifp; + vlanid_t vid; + enum zebra_dplane_result res; + + if (!(CHECK_FLAG(zrmac->flags, ZEBRA_MAC_REMOTE)) + || !(CHECK_FLAG(zrmac->flags, ZEBRA_MAC_REMOTE_RMAC))) + return 0; + + if (!zl3vni->vxlan_if) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "RMAC %pEA on L3-VNI %u hash %p couldn't be uninstalled - no vxlan_if", + &zrmac->macaddr, zl3vni->vni, zl3vni); + return -1; + } + + zif = zl3vni->vxlan_if->info; + if (!zif) + return -1; + + br_ifp = zif->brslave_info.br_if; + if (br_ifp == NULL) + return -1; + + vxl = &zif->l2info.vxl; + + br_zif = (const struct zebra_if *)br_ifp->info; + if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) + vid = vxl->access_vlan; + else + vid = 0; + + res = dplane_rem_mac_del(zl3vni->vxlan_if, br_ifp, vid, + &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip); + if (res != ZEBRA_DPLANE_REQUEST_FAILURE) + return 0; + else + return -1; +} + +/* handle rmac add */ +static int zl3vni_remote_rmac_add(struct zebra_l3vni *zl3vni, + const struct ethaddr *rmac, + const struct ipaddr *vtep_ip) +{ + struct zebra_mac *zrmac = NULL; + struct ipaddr *vtep = NULL; + + zrmac = zl3vni_rmac_lookup(zl3vni, rmac); + if (!zrmac) { + + /* Create the RMAC entry, or update its vtep, if necessary. */ + zrmac = zl3vni_rmac_add(zl3vni, rmac); + if (!zrmac) { + zlog_debug( + "Failed to add RMAC %pEA L3VNI %u Remote VTEP %pIA", + rmac, zl3vni->vni, vtep_ip); + return -1; + } + memset(&zrmac->fwd_info, 0, sizeof(zrmac->fwd_info)); + zrmac->fwd_info.r_vtep_ip = vtep_ip->ipaddr_v4; + + vtep = XCALLOC(MTYPE_EVPN_VTEP, sizeof(struct ipaddr)); + memcpy(vtep, vtep_ip, sizeof(struct ipaddr)); + if (!listnode_add_sort_nodup(zrmac->nh_list, (void *)vtep)) + XFREE(MTYPE_EVPN_VTEP, vtep); + + /* Send RMAC for FPM processing */ + hook_call(zebra_rmac_update, zrmac, zl3vni, false, + "new RMAC added"); + + /* install rmac in kernel */ + zl3vni_rmac_install(zl3vni, zrmac); + } else if (!IPV4_ADDR_SAME(&zrmac->fwd_info.r_vtep_ip, + &vtep_ip->ipaddr_v4)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "L3VNI %u Remote VTEP change(%pI4 -> %pIA) for RMAC %pEA", + zl3vni->vni, &zrmac->fwd_info.r_vtep_ip, + vtep_ip, rmac); + + zrmac->fwd_info.r_vtep_ip = vtep_ip->ipaddr_v4; + + vtep = XCALLOC(MTYPE_EVPN_VTEP, sizeof(struct ipaddr)); + memcpy(vtep, vtep_ip, sizeof(struct ipaddr)); + if (!listnode_add_sort_nodup(zrmac->nh_list, (void *)vtep)) + XFREE(MTYPE_EVPN_VTEP, vtep); + + /* install rmac in kernel */ + zl3vni_rmac_install(zl3vni, zrmac); + } + + return 0; +} + + +/* handle rmac delete */ +static void zl3vni_remote_rmac_del(struct zebra_l3vni *zl3vni, + struct zebra_mac *zrmac, + struct ipaddr *vtep_ip) +{ + struct ipaddr ipv4_vtep; + + if (!zl3vni_nh_lookup(zl3vni, vtep_ip)) { + memset(&ipv4_vtep, 0, sizeof(ipv4_vtep)); + ipv4_vtep.ipa_type = IPADDR_V4; + if (vtep_ip->ipa_type == IPADDR_V6) + ipv4_mapped_ipv6_to_ipv4(&vtep_ip->ipaddr_v6, + &ipv4_vtep.ipaddr_v4); + else + memcpy(&(ipv4_vtep.ipaddr_v4), &vtep_ip->ipaddr_v4, + sizeof(struct in_addr)); + + /* remove nh from rmac's list */ + l3vni_rmac_nh_list_nh_delete(zl3vni, zrmac, &ipv4_vtep); + /* delete nh is same as current selected, fall back to + * one present in the list + */ + if (IPV4_ADDR_SAME(&zrmac->fwd_info.r_vtep_ip, + &ipv4_vtep.ipaddr_v4) && + listcount(zrmac->nh_list)) { + struct ipaddr *vtep; + + vtep = listgetdata(listhead(zrmac->nh_list)); + zrmac->fwd_info.r_vtep_ip = vtep->ipaddr_v4; + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "L3VNI %u Remote VTEP nh change(%pIA -> %pI4) for RMAC %pEA", + zl3vni->vni, &ipv4_vtep, + &zrmac->fwd_info.r_vtep_ip, + &zrmac->macaddr); + + /* install rmac in kernel */ + zl3vni_rmac_install(zl3vni, zrmac); + } + + if (!listcount(zrmac->nh_list)) { + /* uninstall from kernel */ + zl3vni_rmac_uninstall(zl3vni, zrmac); + + /* Send RMAC for FPM processing */ + hook_call(zebra_rmac_update, zrmac, zl3vni, true, + "RMAC deleted"); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "L3VNI %u RMAC %pEA vtep_ip %pIA delete", + zl3vni->vni, &zrmac->macaddr, vtep_ip); + + /* del the rmac entry */ + zl3vni_rmac_del(zl3vni, zrmac); + } + } +} + +/* + * Look up nh hash entry on a l3-vni. + */ +static struct zebra_neigh *zl3vni_nh_lookup(struct zebra_l3vni *zl3vni, + const struct ipaddr *ip) +{ + struct zebra_neigh tmp; + struct zebra_neigh *n; + + memset(&tmp, 0, sizeof(tmp)); + memcpy(&tmp.ip, ip, sizeof(struct ipaddr)); + n = hash_lookup(zl3vni->nh_table, &tmp); + + return n; +} + + +/* + * Callback to allocate NH hash entry on L3-VNI. + */ +static void *zl3vni_nh_alloc(void *p) +{ + const struct zebra_neigh *tmp_n = p; + struct zebra_neigh *n; + + n = XCALLOC(MTYPE_L3NEIGH, sizeof(struct zebra_neigh)); + *n = *tmp_n; + + return ((void *)n); +} + +/* + * Add neighbor entry. + */ +static struct zebra_neigh *zl3vni_nh_add(struct zebra_l3vni *zl3vni, + const struct ipaddr *ip, + const struct ethaddr *mac) +{ + struct zebra_neigh tmp_n; + struct zebra_neigh *n = NULL; + + memset(&tmp_n, 0, sizeof(tmp_n)); + memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr)); + n = hash_get(zl3vni->nh_table, &tmp_n, zl3vni_nh_alloc); + + RB_INIT(host_rb_tree_entry, &n->host_rb); + + memcpy(&n->emac, mac, ETH_ALEN); + SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE); + SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE_NH); + + return n; +} + +/* + * Delete neighbor entry. + */ +static int zl3vni_nh_del(struct zebra_l3vni *zl3vni, struct zebra_neigh *n) +{ + struct zebra_neigh *tmp_n; + struct host_rb_entry *hle; + + while (!RB_EMPTY(host_rb_tree_entry, &n->host_rb)) { + hle = RB_ROOT(host_rb_tree_entry, &n->host_rb); + + RB_REMOVE(host_rb_tree_entry, &n->host_rb, hle); + XFREE(MTYPE_HOST_PREFIX, hle); + } + + tmp_n = hash_release(zl3vni->nh_table, n); + XFREE(MTYPE_L3NEIGH, tmp_n); + + return 0; +} + +/* + * Install remote nh as neigh into the kernel. + */ +static int zl3vni_nh_install(struct zebra_l3vni *zl3vni, struct zebra_neigh *n) +{ + uint8_t flags; + int ret = 0; + + if (!is_l3vni_oper_up(zl3vni)) + return -1; + + if (!(n->flags & ZEBRA_NEIGH_REMOTE) + || !(n->flags & ZEBRA_NEIGH_REMOTE_NH)) + return 0; + + flags = DPLANE_NTF_EXT_LEARNED; + if (n->flags & ZEBRA_NEIGH_ROUTER_FLAG) + flags |= DPLANE_NTF_ROUTER; + + dplane_rem_neigh_add(zl3vni->svi_if, &n->ip, &n->emac, flags, + false /*was_static*/); + + return ret; +} + +/* + * Uninstall remote nh from the kernel. + */ +static int zl3vni_nh_uninstall(struct zebra_l3vni *zl3vni, + struct zebra_neigh *n) +{ + if (!(n->flags & ZEBRA_NEIGH_REMOTE) + || !(n->flags & ZEBRA_NEIGH_REMOTE_NH)) + return 0; + + if (!zl3vni->svi_if || !if_is_operative(zl3vni->svi_if)) + return 0; + + dplane_rem_neigh_delete(zl3vni->svi_if, &n->ip); + + return 0; +} + +/* add remote vtep as a neigh entry */ +static int zl3vni_remote_nh_add(struct zebra_l3vni *zl3vni, + const struct ipaddr *vtep_ip, + const struct ethaddr *rmac, + const struct prefix *host_prefix) +{ + struct zebra_neigh *nh = NULL; + + /* Create the next hop entry, or update its mac, if necessary. */ + nh = zl3vni_nh_lookup(zl3vni, vtep_ip); + if (!nh) { + nh = zl3vni_nh_add(zl3vni, vtep_ip, rmac); + if (!nh) { + zlog_debug( + "Failed to add NH %pIA as Neigh (RMAC %pEA L3-VNI %u prefix %pFX)", + vtep_ip, rmac, zl3vni->vni, host_prefix); + return -1; + } + + /* install the nh neigh in kernel */ + zl3vni_nh_install(zl3vni, nh); + } else if (memcmp(&nh->emac, rmac, ETH_ALEN) != 0) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "L3VNI %u RMAC change(%pEA --> %pEA) for nexthop %pIA, prefix %pFX", + zl3vni->vni, &nh->emac, rmac, vtep_ip, + host_prefix); + + memcpy(&nh->emac, rmac, ETH_ALEN); + /* install (update) the nh neigh in kernel */ + zl3vni_nh_install(zl3vni, nh); + } + + rb_find_or_add_host(&nh->host_rb, host_prefix); + + return 0; +} + +/* handle nh neigh delete */ +static void zl3vni_remote_nh_del(struct zebra_l3vni *zl3vni, + struct zebra_neigh *nh, + struct prefix *host_prefix) +{ + rb_delete_host(&nh->host_rb, host_prefix); + + if (RB_EMPTY(host_rb_tree_entry, &nh->host_rb)) { + /* uninstall from kernel */ + zl3vni_nh_uninstall(zl3vni, nh); + + /* delete the nh entry */ + zl3vni_nh_del(zl3vni, nh); + } +} + +/* handle neigh update from kernel - the only thing of interest is to + * readd stale entries. + */ +static int zl3vni_local_nh_add_update(struct zebra_l3vni *zl3vni, + struct ipaddr *ip, uint16_t state) +{ +#ifdef GNU_LINUX + struct zebra_neigh *n = NULL; + + n = zl3vni_nh_lookup(zl3vni, ip); + if (!n) + return 0; + + /* all next hop neigh are remote and installed by frr. + * If the kernel has aged this entry, re-install. + */ + if (state & NUD_STALE) + zl3vni_nh_install(zl3vni, n); +#endif + return 0; +} + +/* handle neigh delete from kernel */ +static int zl3vni_local_nh_del(struct zebra_l3vni *zl3vni, struct ipaddr *ip) +{ + struct zebra_neigh *n = NULL; + + n = zl3vni_nh_lookup(zl3vni, ip); + if (!n) + return 0; + + /* all next hop neigh are remote and installed by frr. + * If we get an age out notification for these neigh entries, we have to + * install it back + */ + zl3vni_nh_install(zl3vni, n); + + return 0; +} + +/* + * Hash function for L3 VNI. + */ +static unsigned int l3vni_hash_keymake(const void *p) +{ + const struct zebra_l3vni *zl3vni = p; + + return jhash_1word(zl3vni->vni, 0); +} + +/* + * Compare 2 L3 VNI hash entries. + */ +static bool l3vni_hash_cmp(const void *p1, const void *p2) +{ + const struct zebra_l3vni *zl3vni1 = p1; + const struct zebra_l3vni *zl3vni2 = p2; + + return (zl3vni1->vni == zl3vni2->vni); +} + +/* + * Callback to allocate L3 VNI hash entry. + */ +static void *zl3vni_alloc(void *p) +{ + struct zebra_l3vni *zl3vni = NULL; + const struct zebra_l3vni *tmp_l3vni = p; + + zl3vni = XCALLOC(MTYPE_ZL3VNI, sizeof(struct zebra_l3vni)); + zl3vni->vni = tmp_l3vni->vni; + return ((void *)zl3vni); +} + +/* + * Look up L3 VNI hash entry. + */ +struct zebra_l3vni *zl3vni_lookup(vni_t vni) +{ + struct zebra_l3vni tmp_l3vni; + struct zebra_l3vni *zl3vni = NULL; + + memset(&tmp_l3vni, 0, sizeof(tmp_l3vni)); + tmp_l3vni.vni = vni; + zl3vni = hash_lookup(zrouter.l3vni_table, &tmp_l3vni); + + return zl3vni; +} + +/* + * Add L3 VNI hash entry. + */ +static struct zebra_l3vni *zl3vni_add(vni_t vni, vrf_id_t vrf_id) +{ + struct zebra_l3vni tmp_zl3vni; + struct zebra_l3vni *zl3vni = NULL; + + memset(&tmp_zl3vni, 0, sizeof(tmp_zl3vni)); + tmp_zl3vni.vni = vni; + + zl3vni = hash_get(zrouter.l3vni_table, &tmp_zl3vni, zl3vni_alloc); + + zl3vni->vrf_id = vrf_id; + zl3vni->svi_if = NULL; + zl3vni->vxlan_if = NULL; + zl3vni->l2vnis = list_new(); + zl3vni->l2vnis->cmp = zebra_evpn_list_cmp; + + /* Create hash table for remote RMAC */ + zl3vni->rmac_table = zebra_mac_db_create("Zebra L3-VNI RMAC-Table"); + + /* Create hash table for neighbors */ + zl3vni->nh_table = zebra_neigh_db_create("Zebra L3-VNI next-hop table"); + + return zl3vni; +} + +/* + * Delete L3 VNI hash entry. + */ +static int zl3vni_del(struct zebra_l3vni *zl3vni) +{ + struct zebra_l3vni *tmp_zl3vni; + + /* free the list of l2vnis */ + list_delete(&zl3vni->l2vnis); + zl3vni->l2vnis = NULL; + + /* Free the rmac table */ + hash_free(zl3vni->rmac_table); + zl3vni->rmac_table = NULL; + + /* Free the nh table */ + hash_free(zl3vni->nh_table); + zl3vni->nh_table = NULL; + + /* Free the VNI hash entry and allocated memory. */ + tmp_zl3vni = hash_release(zrouter.l3vni_table, zl3vni); + XFREE(MTYPE_ZL3VNI, tmp_zl3vni); + + return 0; +} + +static int zl3vni_map_to_vxlan_if_ns(struct ns *ns, + void *_zl3vni, + void **_pifp) +{ + struct zebra_ns *zns = ns->info; + struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)_zl3vni; + struct route_node *rn = NULL; + struct interface *ifp = NULL; + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + + assert(_pifp); + + /* loop through all vxlan-interface */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + + ifp = (struct interface *)rn->info; + if (!ifp) + continue; + + zif = ifp->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + + vxl = &zif->l2info.vxl; + if (vxl->vni != zl3vni->vni) + continue; + + /* link of VXLAN interface should be in zebra_evpn_vrf */ + if (zvrf->zns->ns_id != vxl->link_nsid) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Intf %s(%u) VNI %u, link not in same " + "namespace than BGP EVPN core instance ", + ifp->name, ifp->ifindex, vxl->vni); + continue; + } + + + zl3vni->local_vtep_ip = vxl->vtep_ip; + *_pifp = (void *)ifp; + return NS_WALK_STOP; + } + + return NS_WALK_CONTINUE; +} + +struct interface *zl3vni_map_to_vxlan_if(struct zebra_l3vni *zl3vni) +{ + struct interface **p_ifp; + struct interface *ifp = NULL; + + p_ifp = &ifp; + + ns_walk_func(zl3vni_map_to_vxlan_if_ns, + (void *)zl3vni, (void **)p_ifp); + return ifp; +} + +struct interface *zl3vni_map_to_svi_if(struct zebra_l3vni *zl3vni) +{ + struct zebra_if *zif = NULL; /* zebra_if for vxlan_if */ + struct zebra_l2info_vxlan *vxl = NULL; /* l2 info for vxlan_if */ + + if (!zl3vni) + return NULL; + + if (!zl3vni->vxlan_if) + return NULL; + + zif = zl3vni->vxlan_if->info; + if (!zif) + return NULL; + + vxl = &zif->l2info.vxl; + + return zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); +} + +struct interface *zl3vni_map_to_mac_vlan_if(struct zebra_l3vni *zl3vni) +{ + struct zebra_if *zif = NULL; /* zebra_if for vxlan_if */ + + if (!zl3vni) + return NULL; + + if (!zl3vni->vxlan_if) + return NULL; + + zif = zl3vni->vxlan_if->info; + if (!zif) + return NULL; + + return zebra_evpn_map_to_macvlan(zif->brslave_info.br_if, + zl3vni->svi_if); +} + + +struct zebra_l3vni *zl3vni_from_vrf(vrf_id_t vrf_id) +{ + struct zebra_vrf *zvrf = NULL; + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + if (!zvrf) + return NULL; + + return zl3vni_lookup(zvrf->l3vni); +} + +static int zl3vni_from_svi_ns(struct ns *ns, void *_in_param, void **_p_zl3vni) +{ + struct zebra_ns *zns = ns->info; + struct zebra_l3vni **p_zl3vni = (struct zebra_l3vni **)_p_zl3vni; + struct zebra_from_svi_param *in_param = + (struct zebra_from_svi_param *)_in_param; + struct route_node *rn = NULL; + struct interface *tmp_if = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + + assert(in_param && p_zl3vni); + + /* loop through all vxlan-interface */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + if (!tmp_if) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + if (!if_is_operative(tmp_if)) + continue; + vxl = &zif->l2info.vxl; + + if (zif->brslave_info.br_if != in_param->br_if) + continue; + + if (!in_param->bridge_vlan_aware + || vxl->access_vlan == in_param->vid) { + *p_zl3vni = zl3vni_lookup(vxl->vni); + return NS_WALK_STOP; + } + } + + return NS_WALK_CONTINUE; +} + +/* + * Map SVI and associated bridge to a VNI. This is invoked upon getting + * neighbor notifications, to see if they are of interest. + */ +static struct zebra_l3vni *zl3vni_from_svi(struct interface *ifp, + struct interface *br_if) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_bridge *br = NULL; + struct zebra_from_svi_param in_param = {}; + struct zebra_l3vni **p_zl3vni; + + if (!br_if) + return NULL; + + /* Make sure the linked interface is a bridge. */ + if (!IS_ZEBRA_IF_BRIDGE(br_if)) + return NULL; + in_param.br_if = br_if; + + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; + assert(zif); + br = &zif->l2info.br; + in_param.bridge_vlan_aware = br->vlan_aware; + if (in_param.bridge_vlan_aware) { + struct zebra_l2info_vlan *vl; + + if (!IS_ZEBRA_IF_VLAN(ifp)) + return NULL; + + zif = ifp->info; + assert(zif); + vl = &zif->l2info.vl; + in_param.vid = vl->vid; + } + + /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ + /* TODO: Optimize with a hash. */ + + p_zl3vni = &zl3vni; + + ns_walk_func(zl3vni_from_svi_ns, (void *)&in_param, (void **)p_zl3vni); + return zl3vni; +} + +vni_t vni_id_from_svi(struct interface *ifp, struct interface *br_if) +{ + vni_t vni = 0; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* Check if an L3VNI belongs to this SVI interface. + * If not, check if an L2VNI belongs to this SVI interface. + */ + zl3vni = zl3vni_from_svi(ifp, br_if); + if (zl3vni) + vni = zl3vni->vni; + else { + zevpn = zebra_evpn_from_svi(ifp, br_if); + if (zevpn) + vni = zevpn->vni; + } + + return vni; +} + +static inline void zl3vni_get_vrr_rmac(struct zebra_l3vni *zl3vni, + struct ethaddr *rmac) +{ + if (!zl3vni) + return; + + if (!is_l3vni_oper_up(zl3vni)) + return; + + if (zl3vni->mac_vlan_if && if_is_operative(zl3vni->mac_vlan_if)) + memcpy(rmac->octet, zl3vni->mac_vlan_if->hw_addr, ETH_ALEN); +} + +/* + * Inform BGP about l3-vni. + */ +static int zl3vni_send_add_to_client(struct zebra_l3vni *zl3vni) +{ + struct stream *s = NULL; + struct zserv *client = NULL; + struct ethaddr svi_rmac, vrr_rmac = {.octet = {0} }; + struct zebra_vrf *zvrf; + bool is_anycast_mac = true; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + zvrf = zebra_vrf_lookup_by_id(zl3vni->vrf_id); + assert(zvrf); + + /* get the svi and vrr rmac values */ + memset(&svi_rmac, 0, sizeof(svi_rmac)); + zl3vni_get_svi_rmac(zl3vni, &svi_rmac); + zl3vni_get_vrr_rmac(zl3vni, &vrr_rmac); + + /* In absence of vrr mac use svi mac as anycast MAC value */ + if (is_zero_mac(&vrr_rmac)) { + memcpy(&vrr_rmac, &svi_rmac, ETH_ALEN); + is_anycast_mac = false; + } + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + /* The message is used for both vni add and/or update like + * vrr mac is added for l3vni SVI. + */ + zclient_create_header(s, ZEBRA_L3VNI_ADD, zl3vni_vrf_id(zl3vni)); + stream_putl(s, zl3vni->vni); + stream_put(s, &svi_rmac, sizeof(struct ethaddr)); + stream_put_in_addr(s, &zl3vni->local_vtep_ip); + stream_put(s, &zl3vni->filter, sizeof(int)); + stream_putl(s, zl3vni->svi_if->ifindex); + stream_put(s, &vrr_rmac, sizeof(struct ethaddr)); + stream_putl(s, is_anycast_mac); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Send L3_VNI_ADD %u VRF %s RMAC %pEA VRR %pEA local-ip %pI4 filter %s to %s", + zl3vni->vni, vrf_id_to_name(zl3vni_vrf_id(zl3vni)), + &svi_rmac, &vrr_rmac, &zl3vni->local_vtep_ip, + CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY) + ? "prefix-routes-only" + : "none", + zebra_route_string(client->proto)); + + client->l3vniadd_cnt++; + return zserv_send_message(client, s); +} + +/* + * Inform BGP about local l3-VNI deletion. + */ +static int zl3vni_send_del_to_client(struct zebra_l3vni *zl3vni) +{ + struct stream *s = NULL; + struct zserv *client = NULL; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_L3VNI_DEL, zl3vni_vrf_id(zl3vni)); + stream_putl(s, zl3vni->vni); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Send L3_VNI_DEL %u VRF %s to %s", zl3vni->vni, + vrf_id_to_name(zl3vni_vrf_id(zl3vni)), + zebra_route_string(client->proto)); + + client->l3vnidel_cnt++; + return zserv_send_message(client, s); +} + +static void zebra_vxlan_process_l3vni_oper_up(struct zebra_l3vni *zl3vni) +{ + if (!zl3vni) + return; + + /* send l3vni add to BGP */ + zl3vni_send_add_to_client(zl3vni); +} + +static void zebra_vxlan_process_l3vni_oper_down(struct zebra_l3vni *zl3vni) +{ + if (!zl3vni) + return; + + /* send l3-vni del to BGP*/ + zl3vni_send_del_to_client(zl3vni); +} + +static void zevpn_add_to_l3vni_list(struct hash_bucket *bucket, void *ctxt) +{ + struct zebra_evpn *zevpn = (struct zebra_evpn *)bucket->data; + struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)ctxt; + + if (zevpn->vrf_id == zl3vni_vrf_id(zl3vni)) + listnode_add_sort(zl3vni->l2vnis, zevpn); +} + +/* + * Handle transition of vni from l2 to l3 and vice versa. + * This function handles only the L2VNI add/delete part of + * the above transition. + * L3VNI add/delete is handled by the calling functions. + */ +static int zebra_vxlan_handle_vni_transition(struct zebra_vrf *zvrf, vni_t vni, + int add) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* There is a possibility that VNI notification was already received + * from kernel and we programmed it as L2-VNI + * In such a case we need to delete this L2-VNI first, so + * that it can be reprogrammed as L3-VNI in the system. It is also + * possible that the vrf-vni mapping is removed from FRR while the vxlan + * interface is still present in kernel. In this case to keep it + * symmetric, we will delete the l3-vni and reprogram it as l2-vni + */ + if (add) { + /* Locate hash entry */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return 0; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Del L2-VNI %u - transition to L3-VNI", vni); + + /* Delete EVPN from BGP. */ + zebra_evpn_send_del_to_client(zevpn); + + zebra_evpn_neigh_del_all(zevpn, 0, 0, DEL_ALL_NEIGH); + zebra_evpn_mac_del_all(zevpn, 0, 0, DEL_ALL_MAC); + + /* Free up all remote VTEPs, if any. */ + zebra_evpn_vtep_del_all(zevpn, 1); + + zl3vni = zl3vni_from_vrf(zevpn->vrf_id); + if (zl3vni) + listnode_delete(zl3vni->l2vnis, zevpn); + + /* Delete the hash entry. */ + if (zebra_evpn_vxlan_del(zevpn)) { + flog_err(EC_ZEBRA_VNI_DEL_FAILED, + "Failed to del EVPN hash %p, VNI %u", zevpn, + zevpn->vni); + return -1; + } + } else { + struct zebra_ns *zns; + struct route_node *rn; + struct interface *ifp; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl; + struct interface *vlan_if; + bool found = false; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Adding L2-VNI %u - transition from L3-VNI", + vni); + + /* Find VxLAN interface for this VNI. */ + zns = zebra_ns_lookup(NS_DEFAULT); + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + ifp = (struct interface *)rn->info; + if (!ifp) + continue; + zif = ifp->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + + vxl = &zif->l2info.vxl; + if (vxl->vni == vni) { + found = true; + break; + } + } + + if (!found) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_err( + "Adding L2-VNI - Failed to find VxLAN interface for VNI %u", + vni); + return -1; + } + + /* Create VNI hash entry for L2VNI */ + zevpn = zebra_evpn_lookup(vni); + if (zevpn) + return 0; + + zevpn = zebra_evpn_add(vni); + + /* Find bridge interface for the VNI */ + vlan_if = zvni_map_to_svi(vxl->access_vlan, + zif->brslave_info.br_if); + if (vlan_if) { + zevpn->vrf_id = vlan_if->vrf->vrf_id; + zl3vni = zl3vni_from_vrf(vlan_if->vrf->vrf_id); + if (zl3vni) + listnode_add_sort_nodup(zl3vni->l2vnis, zevpn); + } + + zevpn->vxlan_if = ifp; + zevpn->local_vtep_ip = vxl->vtep_ip; + + /* Inform BGP if the VNI is up and mapped to a bridge. */ + if (if_is_operative(ifp) && zif->brslave_info.br_if) { + zebra_evpn_send_add_to_client(zevpn); + zebra_evpn_read_mac_neigh(zevpn, ifp); + } + } + + return 0; +} + +/* delete and uninstall rmac hash entry */ +static void zl3vni_del_rmac_hash_entry(struct hash_bucket *bucket, void *ctx) +{ + struct zebra_mac *zrmac = NULL; + struct zebra_l3vni *zl3vni = NULL; + + zrmac = (struct zebra_mac *)bucket->data; + zl3vni = (struct zebra_l3vni *)ctx; + zl3vni_rmac_uninstall(zl3vni, zrmac); + + /* Send RMAC for FPM processing */ + hook_call(zebra_rmac_update, zrmac, zl3vni, true, "RMAC deleted"); + + zl3vni_rmac_del(zl3vni, zrmac); +} + +/* delete and uninstall nh hash entry */ +static void zl3vni_del_nh_hash_entry(struct hash_bucket *bucket, void *ctx) +{ + struct zebra_neigh *n = NULL; + struct zebra_l3vni *zl3vni = NULL; + + n = (struct zebra_neigh *)bucket->data; + zl3vni = (struct zebra_l3vni *)ctx; + zl3vni_nh_uninstall(zl3vni, n); + zl3vni_nh_del(zl3vni, n); +} + +/* re-add remote rmac if needed */ +static int zebra_vxlan_readd_remote_rmac(struct zebra_l3vni *zl3vni, + struct ethaddr *rmac) +{ + struct zebra_mac *zrmac = NULL; + + zrmac = zl3vni_rmac_lookup(zl3vni, rmac); + if (!zrmac) + return 0; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Del remote RMAC %pEA L3VNI %u - readd", + rmac, zl3vni->vni); + + zl3vni_rmac_install(zl3vni, zrmac); + return 0; +} + +/* Public functions */ + +int is_l3vni_for_prefix_routes_only(vni_t vni) +{ + struct zebra_l3vni *zl3vni = NULL; + + zl3vni = zl3vni_lookup(vni); + if (!zl3vni) + return 0; + + return CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY) ? 1 : 0; +} + +/* handle evpn route in vrf table */ +void zebra_vxlan_evpn_vrf_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, + const struct ipaddr *vtep_ip, + const struct prefix *host_prefix) +{ + struct zebra_l3vni *zl3vni = NULL; + struct ipaddr ipv4_vtep; + + zl3vni = zl3vni_from_vrf(vrf_id); + if (!zl3vni || !is_l3vni_oper_up(zl3vni)) + return; + + /* + * add the next hop neighbor - + * neigh to be installed is the ipv6 nexthop neigh + */ + zl3vni_remote_nh_add(zl3vni, vtep_ip, rmac, host_prefix); + + /* + * if the remote vtep is a ipv4 mapped ipv6 address convert it to ipv4 + * address. Rmac is programmed against the ipv4 vtep because we only + * support ipv4 tunnels in the h/w right now + */ + memset(&ipv4_vtep, 0, sizeof(ipv4_vtep)); + ipv4_vtep.ipa_type = IPADDR_V4; + if (vtep_ip->ipa_type == IPADDR_V6) + ipv4_mapped_ipv6_to_ipv4(&vtep_ip->ipaddr_v6, + &(ipv4_vtep.ipaddr_v4)); + else + memcpy(&(ipv4_vtep.ipaddr_v4), &vtep_ip->ipaddr_v4, + sizeof(struct in_addr)); + + /* + * add the rmac - remote rmac to be installed is against the ipv4 + * nexthop address + */ + zl3vni_remote_rmac_add(zl3vni, rmac, &ipv4_vtep); +} + +/* handle evpn vrf route delete */ +void zebra_vxlan_evpn_vrf_route_del(vrf_id_t vrf_id, + struct ipaddr *vtep_ip, + struct prefix *host_prefix) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_neigh *nh = NULL; + struct zebra_mac *zrmac = NULL; + + zl3vni = zl3vni_from_vrf(vrf_id); + if (!zl3vni) + return; + + /* find the next hop entry and rmac entry */ + nh = zl3vni_nh_lookup(zl3vni, vtep_ip); + if (!nh) + return; + zrmac = zl3vni_rmac_lookup(zl3vni, &nh->emac); + + /* delete the next hop entry */ + zl3vni_remote_nh_del(zl3vni, nh, host_prefix); + + /* delete the rmac entry */ + if (zrmac) + zl3vni_remote_rmac_del(zl3vni, zrmac, vtep_ip); +} + +void zebra_vxlan_print_specific_rmac_l3vni(struct vty *vty, vni_t l3vni, + struct ethaddr *rmac, bool use_json) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_mac *zrmac = NULL; + json_object *json = NULL; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + + if (use_json) + json = json_object_new_object(); + + zl3vni = zl3vni_lookup(l3vni); + if (!zl3vni) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% L3-VNI %u doesn't exist\n", l3vni); + return; + } + + zrmac = zl3vni_rmac_lookup(zl3vni, rmac); + if (!zrmac) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, + "%% Requested RMAC doesn't exist in L3-VNI %u\n", + l3vni); + return; + } + + zl3vni_print_rmac(zrmac, vty, json); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_print_rmacs_l3vni(struct vty *vty, vni_t l3vni, bool use_json) +{ + struct zebra_l3vni *zl3vni; + uint32_t num_rmacs; + struct rmac_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + + zl3vni = zl3vni_lookup(l3vni); + if (!zl3vni) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% L3-VNI %u does not exist\n", l3vni); + return; + } + num_rmacs = hashcount(zl3vni->rmac_table); + if (!num_rmacs) + return; + + if (use_json) + json = json_object_new_object(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json; + if (!use_json) { + vty_out(vty, "Number of Remote RMACs known for this VNI: %u\n", + num_rmacs); + vty_out(vty, "%-17s %-21s\n", "MAC", "Remote VTEP"); + } else + json_object_int_add(json, "numRmacs", num_rmacs); + + hash_iterate(zl3vni->rmac_table, zl3vni_print_rmac_hash, &wctx); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_print_rmacs_all_l3vni(struct vty *vty, bool use_json) +{ + json_object *json = NULL; + void *args[2]; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + + if (use_json) + json = json_object_new_object(); + + args[0] = vty; + args[1] = json; + hash_iterate(zrouter.l3vni_table, + (void (*)(struct hash_bucket *, + void *))zl3vni_print_rmac_hash_all_vni, + args); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_print_specific_nh_l3vni(struct vty *vty, vni_t l3vni, + struct ipaddr *ip, bool use_json) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_neigh *n = NULL; + json_object *json = NULL; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + + if (use_json) + json = json_object_new_object(); + + zl3vni = zl3vni_lookup(l3vni); + if (!zl3vni) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% L3-VNI %u does not exist\n", l3vni); + return; + } + + n = zl3vni_nh_lookup(zl3vni, ip); + if (!n) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, + "%% Requested next-hop not present for L3-VNI %u", + l3vni); + return; + } + + zl3vni_print_nh(n, vty, json); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_print_nh_l3vni(struct vty *vty, vni_t l3vni, bool use_json) +{ + uint32_t num_nh; + struct nh_walk_ctx wctx; + json_object *json = NULL; + struct zebra_l3vni *zl3vni = NULL; + + if (!is_evpn_enabled()) + return; + + zl3vni = zl3vni_lookup(l3vni); + if (!zl3vni) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% L3-VNI %u does not exist\n", l3vni); + return; + } + + num_nh = hashcount(zl3vni->nh_table); + if (!num_nh) + return; + + if (use_json) + json = json_object_new_object(); + + wctx.vty = vty; + wctx.json = json; + if (!use_json) { + vty_out(vty, "Number of NH Neighbors known for this VNI: %u\n", + num_nh); + vty_out(vty, "%-15s %-17s\n", "IP", "RMAC"); + } else + json_object_int_add(json, "numNextHops", num_nh); + + hash_iterate(zl3vni->nh_table, zl3vni_print_nh_hash, &wctx); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_print_nh_all_l3vni(struct vty *vty, bool use_json) +{ + json_object *json = NULL; + void *args[2]; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + + if (use_json) + json = json_object_new_object(); + + args[0] = vty; + args[1] = json; + hash_iterate(zrouter.l3vni_table, + (void (*)(struct hash_bucket *, + void *))zl3vni_print_nh_hash_all_vni, + args); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display L3 VNI information (VTY command handler). + */ +void zebra_vxlan_print_l3vni(struct vty *vty, vni_t vni, bool use_json) +{ + void *args[2]; + json_object *json = NULL; + struct zebra_l3vni *zl3vni = NULL; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + + zl3vni = zl3vni_lookup(vni); + if (!zl3vni) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + + if (use_json) + json = json_object_new_object(); + + args[0] = vty; + args[1] = json; + zl3vni_print(zl3vni, (void *)args); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_print_vrf_vni(struct vty *vty, struct zebra_vrf *zvrf, + json_object *json_vrfs) +{ + char buf[ETHER_ADDR_STRLEN]; + struct zebra_l3vni *zl3vni = NULL; + + zl3vni = zl3vni_lookup(zvrf->l3vni); + if (!zl3vni) + return; + + if (!json_vrfs) { + vty_out(vty, "%-37s %-10u %-20s %-20s %-5s %-18s\n", + zvrf_name(zvrf), zl3vni->vni, + zl3vni_vxlan_if_name(zl3vni), + zl3vni_svi_if_name(zl3vni), zl3vni_state2str(zl3vni), + zl3vni_rmac2str(zl3vni, buf, sizeof(buf))); + } else { + json_object *json_vrf = NULL; + + json_vrf = json_object_new_object(); + json_object_string_add(json_vrf, "vrf", zvrf_name(zvrf)); + json_object_int_add(json_vrf, "vni", zl3vni->vni); + json_object_string_add(json_vrf, "vxlanIntf", + zl3vni_vxlan_if_name(zl3vni)); + json_object_string_add(json_vrf, "sviIntf", + zl3vni_svi_if_name(zl3vni)); + json_object_string_add(json_vrf, "state", + zl3vni_state2str(zl3vni)); + json_object_string_add( + json_vrf, "routerMac", + zl3vni_rmac2str(zl3vni, buf, sizeof(buf))); + json_object_array_add(json_vrfs, json_vrf); + } +} + +/* + * Display Neighbors for a VNI (VTY command handler). + */ +void zebra_vxlan_print_neigh_vni(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, bool use_json) +{ + struct zebra_evpn *zevpn; + uint32_t num_neigh; + struct neigh_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + num_neigh = hashcount(zevpn->neigh_table); + if (!num_neigh) + return; + + if (use_json) + json = json_object_new_object(); + + /* Since we have IPv6 addresses to deal with which can vary widely in + * size, we try to be a bit more elegant in display by first computing + * the maximum width. + */ + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.addr_width = 15; + wctx.json = json; + hash_iterate(zevpn->neigh_table, zebra_evpn_find_neigh_addr_width, + &wctx); + + if (!use_json) { + vty_out(vty, + "Number of ARPs (local and remote) known for this VNI: %u\n", + num_neigh); + zebra_evpn_print_neigh_hdr(vty, &wctx); + } else + json_object_int_add(json, "numArpNd", num_neigh); + + hash_iterate(zevpn->neigh_table, zebra_evpn_print_neigh_hash, &wctx); + if (use_json) + vty_json(vty, json); +} + +/* + * Display neighbors across all VNIs (VTY command handler). + */ +void zebra_vxlan_print_neigh_all_vni(struct vty *vty, struct zebra_vrf *zvrf, + bool print_dup, bool use_json) +{ + json_object *json = NULL; + void *args[3]; + + if (!is_evpn_enabled()) + return; + + if (use_json) + json = json_object_new_object(); + + args[0] = vty; + args[1] = json; + args[2] = (void *)(ptrdiff_t)print_dup; + + hash_iterate(zvrf->evpn_table, + (void (*)(struct hash_bucket *, + void *))zevpn_print_neigh_hash_all_evpn, + args); + if (use_json) + vty_json(vty, json); +} + +/* + * Display neighbors across all VNIs in detail(VTY command handler). + */ +void zebra_vxlan_print_neigh_all_vni_detail(struct vty *vty, + struct zebra_vrf *zvrf, + bool print_dup, bool use_json) +{ + json_object *json = NULL; + void *args[3]; + + if (!is_evpn_enabled()) + return; + + if (use_json) + json = json_object_new_object(); + + args[0] = vty; + args[1] = json; + args[2] = (void *)(ptrdiff_t)print_dup; + + hash_iterate(zvrf->evpn_table, + (void (*)(struct hash_bucket *, + void *))zevpn_print_neigh_hash_all_evpn_detail, + args); + if (use_json) + vty_json(vty, json); +} + +/* + * Display specific neighbor for a VNI, if present (VTY command handler). + */ +void zebra_vxlan_print_specific_neigh_vni(struct vty *vty, + struct zebra_vrf *zvrf, vni_t vni, + struct ipaddr *ip, bool use_json) +{ + struct zebra_evpn *zevpn; + struct zebra_neigh *n; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + n = zebra_evpn_neigh_lookup(zevpn, ip); + if (!n) { + if (!use_json) + vty_out(vty, + "%% Requested neighbor does not exist in VNI %u\n", + vni); + return; + } + if (use_json) + json = json_object_new_object(); + + zebra_evpn_print_neigh(n, vty, json); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display neighbors for a VNI from specific VTEP (VTY command handler). + * By definition, these are remote neighbors. + */ +void zebra_vxlan_print_neigh_vni_vtep(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, struct in_addr vtep_ip, + bool use_json) +{ + struct zebra_evpn *zevpn; + uint32_t num_neigh; + struct neigh_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + num_neigh = hashcount(zevpn->neigh_table); + if (!num_neigh) + return; + + if (use_json) + json = json_object_new_object(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.addr_width = 15; + wctx.flags = SHOW_REMOTE_NEIGH_FROM_VTEP; + wctx.r_vtep_ip = vtep_ip; + wctx.json = json; + hash_iterate(zevpn->neigh_table, zebra_evpn_find_neigh_addr_width, + &wctx); + hash_iterate(zevpn->neigh_table, zebra_evpn_print_neigh_hash, &wctx); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display Duplicate detected Neighbors for a VNI + * (VTY command handler). + */ +void zebra_vxlan_print_neigh_vni_dad(struct vty *vty, + struct zebra_vrf *zvrf, + vni_t vni, + bool use_json) +{ + struct zebra_evpn *zevpn; + uint32_t num_neigh; + struct neigh_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + + num_neigh = hashcount(zevpn->neigh_table); + if (!num_neigh) + return; + + num_neigh = num_dup_detected_neighs(zevpn); + if (!num_neigh) + return; + + if (use_json) + json = json_object_new_object(); + + /* Since we have IPv6 addresses to deal with which can vary widely in + * size, we try to be a bit more elegant in display by first computing + * the maximum width. + */ + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.addr_width = 15; + wctx.json = json; + hash_iterate(zevpn->neigh_table, zebra_evpn_find_neigh_addr_width, + &wctx); + + if (!use_json) { + vty_out(vty, + "Number of ARPs (local and remote) known for this VNI: %u\n", + num_neigh); + vty_out(vty, "%*s %-6s %-8s %-17s %-30s\n", + -wctx.addr_width, "IP", "Type", + "State", "MAC", "Remote ES/VTEP"); + } else + json_object_int_add(json, "numArpNd", num_neigh); + + hash_iterate(zevpn->neigh_table, zebra_evpn_print_dad_neigh_hash, + &wctx); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display MACs for a VNI (VTY command handler). + */ +void zebra_vxlan_print_macs_vni(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, bool use_json) +{ + struct zebra_evpn *zevpn; + uint32_t num_macs; + struct mac_walk_ctx wctx; + json_object *json = NULL; + json_object *json_mac = NULL; + + if (!is_evpn_enabled()) + return; + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + num_macs = num_valid_macs(zevpn); + if (!num_macs) + return; + + if (use_json) { + json = json_object_new_object(); + json_mac = json_object_new_object(); + } + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.json = json_mac; + + if (!use_json) { + vty_out(vty, + "Number of MACs (local and remote) known for this VNI: %u\n", + num_macs); + vty_out(vty, + "Flags: N=sync-neighs, I=local-inactive, P=peer-active, X=peer-proxy\n"); + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %s\n", "MAC", "Type", + "Flags", "Intf/Remote ES/VTEP", "VLAN", "Seq #'s"); + } else + json_object_int_add(json, "numMacs", num_macs); + + hash_iterate(zevpn->mac_table, zebra_evpn_print_mac_hash, &wctx); + + if (use_json) { + json_object_object_add(json, "macs", json_mac); + vty_json(vty, json); + } +} + +/* + * Display MACs for all VNIs (VTY command handler). + */ +void zebra_vxlan_print_macs_all_vni(struct vty *vty, struct zebra_vrf *zvrf, + bool print_dup, bool use_json) +{ + struct mac_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + if (use_json) + json = json_object_new_object(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json; + wctx.print_dup = print_dup; + hash_iterate(zvrf->evpn_table, zevpn_print_mac_hash_all_evpn, &wctx); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display MACs in detail for all VNIs (VTY command handler). + */ +void zebra_vxlan_print_macs_all_vni_detail(struct vty *vty, + struct zebra_vrf *zvrf, + bool print_dup, bool use_json) +{ + struct mac_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) { + if (use_json) + vty_out(vty, "{}\n"); + return; + } + if (use_json) + json = json_object_new_object(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json; + wctx.print_dup = print_dup; + hash_iterate(zvrf->evpn_table, zevpn_print_mac_hash_all_evpn_detail, + &wctx); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display MACs for all VNIs (VTY command handler). + */ +void zebra_vxlan_print_macs_all_vni_vtep(struct vty *vty, + struct zebra_vrf *zvrf, + struct in_addr vtep_ip, bool use_json) +{ + struct mac_walk_ctx wctx; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + + if (use_json) + json = json_object_new_object(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.flags = SHOW_REMOTE_MAC_FROM_VTEP; + wctx.r_vtep_ip = vtep_ip; + wctx.json = json; + hash_iterate(zvrf->evpn_table, zevpn_print_mac_hash_all_evpn, &wctx); + + if (use_json) + vty_json(vty, json); +} + +/* + * Display specific MAC for a VNI, if present (VTY command handler). + */ +void zebra_vxlan_print_specific_mac_vni(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, struct ethaddr *macaddr, + bool use_json) +{ + struct zebra_evpn *zevpn; + struct zebra_mac *mac; + json_object *json = NULL; + + if (!is_evpn_enabled()) + return; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, + "%% Requested MAC does not exist in VNI %u\n", + vni); + return; + } + + if (use_json) + json = json_object_new_object(); + + zebra_evpn_print_mac(mac, vty, json); + if (use_json) + vty_json(vty, json); +} + +/* Print Duplicate MACs per VNI */ +void zebra_vxlan_print_macs_vni_dad(struct vty *vty, + struct zebra_vrf *zvrf, + vni_t vni, bool use_json) +{ + struct zebra_evpn *zevpn; + struct mac_walk_ctx wctx; + uint32_t num_macs; + json_object *json = NULL; + json_object *json_mac = NULL; + + if (!is_evpn_enabled()) + return; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + + num_macs = num_valid_macs(zevpn); + if (!num_macs) + return; + + num_macs = num_dup_detected_macs(zevpn); + if (!num_macs) + return; + + if (use_json) { + json = json_object_new_object(); + json_mac = json_object_new_object(); + } + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.json = json_mac; + + if (!use_json) { + vty_out(vty, + "Number of MACs (local and remote) known for this VNI: %u\n", + num_macs); + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s\n", "MAC", "Type", + "Flags", "Intf/Remote ES/VTEP", "VLAN"); + } else + json_object_int_add(json, "numMacs", num_macs); + + hash_iterate(zevpn->mac_table, zebra_evpn_print_dad_mac_hash, &wctx); + + if (use_json) { + json_object_object_add(json, "macs", json_mac); + vty_json(vty, json); + } + +} + +int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, vni_t vni, + struct ethaddr *macaddr, char *errmsg, + size_t errmsg_len) +{ + struct zebra_evpn *zevpn; + struct zebra_mac *mac; + struct listnode *node = NULL; + struct zebra_neigh *nbr = NULL; + + if (!is_evpn_enabled()) + return 0; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + snprintfrr(errmsg, errmsg_len, "VNI %u does not exist", vni); + return -1; + } + + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) { + snprintf(errmsg, errmsg_len, + "Requested MAC does not exist in VNI %u\n", vni); + return -1; + } + + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) { + snprintfrr(errmsg, errmsg_len, + "Requested MAC is not duplicate detected\n"); + return -1; + } + + /* Remove all IPs as duplicate associcated with this MAC */ + for (ALL_LIST_ELEMENTS_RO(mac->neigh_list, node, nbr)) { + /* For local neigh mark inactive so MACIP update is generated + * to BGP. This is a scenario where MAC update received + * and detected as duplicate which marked neigh as duplicate. + * Later local neigh update did not get a chance to relay + * to BGP. Similarly remote macip update, neigh needs to be + * installed locally. + */ + if (zvrf->dad_freeze && + CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) { + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) + ZEBRA_NEIGH_SET_INACTIVE(nbr); + else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) + zebra_evpn_rem_neigh_install( + zevpn, nbr, false /*was_static*/); + } + + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->dad_dup_detect_time = 0; + } + + UNSET_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE); + mac->dad_count = 0; + mac->detect_start_time.tv_sec = 0; + mac->detect_start_time.tv_usec = 0; + mac->dad_dup_detect_time = 0; + THREAD_OFF(mac->dad_mac_auto_recovery_timer); + + /* warn-only action return */ + if (!zvrf->dad_freeze) + return 0; + + /* Local: Notify Peer VTEPs, Remote: Install the entry */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + /* Inform to BGP */ + if (zebra_evpn_mac_send_add_to_client(zevpn->vni, &mac->macaddr, + mac->flags, mac->loc_seq, + mac->es)) + return 0; + + /* Process all neighbors associated with this MAC. */ + zebra_evpn_process_neigh_on_local_mac_change(zevpn, mac, 0, + 0 /*es_change*/); + + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + zebra_evpn_process_neigh_on_remote_mac_add(zevpn, mac); + + /* Install the entry. */ + zebra_evpn_rem_mac_install(zevpn, mac, false /* was_static */); + } + + return 0; +} + +int zebra_vxlan_clear_dup_detect_vni_ip(struct zebra_vrf *zvrf, vni_t vni, + struct ipaddr *ip, char *errmsg, + size_t errmsg_len) +{ + struct zebra_evpn *zevpn; + struct zebra_neigh *nbr; + struct zebra_mac *mac; + char buf[INET6_ADDRSTRLEN]; + char buf2[ETHER_ADDR_STRLEN]; + + if (!is_evpn_enabled()) + return 0; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + snprintfrr(errmsg, errmsg_len, "VNI %u does not exist\n", vni); + return -1; + } + + nbr = zebra_evpn_neigh_lookup(zevpn, ip); + if (!nbr) { + snprintfrr(errmsg, errmsg_len, + "Requested host IP does not exist in VNI %u\n", vni); + return -1; + } + + ipaddr2str(&nbr->ip, buf, sizeof(buf)); + + if (!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE)) { + snprintfrr(errmsg, errmsg_len, + "Requested host IP %s is not duplicate detected\n", + buf); + return -1; + } + + mac = zebra_evpn_mac_lookup(zevpn, &nbr->emac); + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) { + snprintfrr( + errmsg, errmsg_len, + "Requested IP's associated MAC %s is still in duplicate state\n", + prefix_mac2str(&nbr->emac, buf2, sizeof(buf2))); + return -1; + } + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("%s: clear neigh %s in dup state, flags 0x%x seq %u", + __func__, buf, nbr->flags, nbr->loc_seq); + + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->detect_start_time.tv_usec = 0; + nbr->dad_dup_detect_time = 0; + THREAD_OFF(nbr->dad_ip_auto_recovery_timer); + + if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) { + zebra_evpn_neigh_send_add_to_client(zevpn->vni, ip, &nbr->emac, + nbr->mac, nbr->flags, + nbr->loc_seq); + } else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_evpn_rem_neigh_install(zevpn, nbr, false /*was_static*/); + } + + return 0; +} + +static void zevpn_clear_dup_mac_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct mac_walk_ctx *wctx = ctxt; + struct zebra_mac *mac; + struct zebra_evpn *zevpn; + struct listnode *node = NULL; + struct zebra_neigh *nbr = NULL; + + mac = (struct zebra_mac *)bucket->data; + if (!mac) + return; + + zevpn = wctx->zevpn; + + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) + return; + + UNSET_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE); + mac->dad_count = 0; + mac->detect_start_time.tv_sec = 0; + mac->detect_start_time.tv_usec = 0; + mac->dad_dup_detect_time = 0; + THREAD_OFF(mac->dad_mac_auto_recovery_timer); + + /* Remove all IPs as duplicate associcated with this MAC */ + for (ALL_LIST_ELEMENTS_RO(mac->neigh_list, node, nbr)) { + if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL) + && nbr->dad_count) + ZEBRA_NEIGH_SET_INACTIVE(nbr); + + UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); + nbr->dad_count = 0; + nbr->detect_start_time.tv_sec = 0; + nbr->dad_dup_detect_time = 0; + } + + /* Local: Notify Peer VTEPs, Remote: Install the entry */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + /* Inform to BGP */ + if (zebra_evpn_mac_send_add_to_client(zevpn->vni, &mac->macaddr, + mac->flags, mac->loc_seq, + mac->es)) + return; + + /* Process all neighbors associated with this MAC. */ + zebra_evpn_process_neigh_on_local_mac_change(zevpn, mac, 0, + 0 /*es_change*/); + + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + zebra_evpn_process_neigh_on_remote_mac_add(zevpn, mac); + + /* Install the entry. */ + zebra_evpn_rem_mac_install(zevpn, mac, false /* was_static */); + } +} + +static void zevpn_clear_dup_detect_hash_vni_all(struct hash_bucket *bucket, + void **args) +{ + struct zebra_evpn *zevpn; + struct zebra_vrf *zvrf; + struct mac_walk_ctx m_wctx; + struct neigh_walk_ctx n_wctx; + + zevpn = (struct zebra_evpn *)bucket->data; + if (!zevpn) + return; + + zvrf = (struct zebra_vrf *)args[0]; + + if (hashcount(zevpn->neigh_table)) { + memset(&n_wctx, 0, sizeof(n_wctx)); + n_wctx.zevpn = zevpn; + n_wctx.zvrf = zvrf; + hash_iterate(zevpn->neigh_table, + zebra_evpn_clear_dup_neigh_hash, &n_wctx); + } + + if (num_valid_macs(zevpn)) { + memset(&m_wctx, 0, sizeof(m_wctx)); + m_wctx.zevpn = zevpn; + m_wctx.zvrf = zvrf; + hash_iterate(zevpn->mac_table, zevpn_clear_dup_mac_hash, &m_wctx); + } + +} + +int zebra_vxlan_clear_dup_detect_vni_all(struct zebra_vrf *zvrf) +{ + void *args[1]; + + if (!is_evpn_enabled()) + return 0; + + args[0] = zvrf; + + hash_iterate(zvrf->evpn_table, + (void (*)(struct hash_bucket *, void *)) + zevpn_clear_dup_detect_hash_vni_all, args); + + return 0; +} + +int zebra_vxlan_clear_dup_detect_vni(struct zebra_vrf *zvrf, vni_t vni) +{ + struct zebra_evpn *zevpn; + struct mac_walk_ctx m_wctx; + struct neigh_walk_ctx n_wctx; + + if (!is_evpn_enabled()) + return 0; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + zlog_warn("VNI %u does not exist", vni); + return CMD_WARNING; + } + + if (hashcount(zevpn->neigh_table)) { + memset(&n_wctx, 0, sizeof(n_wctx)); + n_wctx.zevpn = zevpn; + n_wctx.zvrf = zvrf; + hash_iterate(zevpn->neigh_table, + zebra_evpn_clear_dup_neigh_hash, &n_wctx); + } + + if (num_valid_macs(zevpn)) { + memset(&m_wctx, 0, sizeof(m_wctx)); + m_wctx.zevpn = zevpn; + m_wctx.zvrf = zvrf; + hash_iterate(zevpn->mac_table, zevpn_clear_dup_mac_hash, &m_wctx); + } + + return 0; +} + +/* + * Display MACs for a VNI from specific VTEP (VTY command handler). + */ +void zebra_vxlan_print_macs_vni_vtep(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, struct in_addr vtep_ip, + bool use_json) +{ + struct zebra_evpn *zevpn; + uint32_t num_macs; + struct mac_walk_ctx wctx; + json_object *json = NULL; + json_object *json_mac = NULL; + + if (!is_evpn_enabled()) + return; + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (use_json) + vty_out(vty, "{}\n"); + else + vty_out(vty, "%% VNI %u does not exist\n", vni); + return; + } + num_macs = num_valid_macs(zevpn); + if (!num_macs) + return; + + if (use_json) { + json = json_object_new_object(); + json_mac = json_object_new_object(); + } + + memset(&wctx, 0, sizeof(wctx)); + wctx.zevpn = zevpn; + wctx.vty = vty; + wctx.flags = SHOW_REMOTE_MAC_FROM_VTEP; + wctx.r_vtep_ip = vtep_ip; + wctx.json = json_mac; + hash_iterate(zevpn->mac_table, zebra_evpn_print_mac_hash, &wctx); + + if (use_json) { + json_object_int_add(json, "numMacs", wctx.count); + if (wctx.count) + json_object_object_add(json, "macs", json_mac); + vty_json(vty, json); + } +} + + +/* + * Display VNI information (VTY command handler). + * + * use_json flag indicates that output should be in JSON format. + * json_array is non NULL when JSON output needs to be aggregated (by the + * caller) and then printed, otherwise, JSON evpn vni info is printed + * right away. + */ +void zebra_vxlan_print_vni(struct vty *vty, struct zebra_vrf *zvrf, vni_t vni, + bool use_json, json_object *json_array) +{ + json_object *json = NULL; + void *args[2]; + struct zebra_l3vni *zl3vni = NULL; + struct zebra_evpn *zevpn = NULL; + + if (!is_evpn_enabled()) + return; + + if (use_json) + json = json_object_new_object(); + + args[0] = vty; + args[1] = json; + + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + zl3vni_print(zl3vni, (void *)args); + } else { + zevpn = zebra_evpn_lookup(vni); + if (zevpn) + zebra_evpn_print(zevpn, (void *)args); + else if (!json) + vty_out(vty, "%% VNI %u does not exist\n", vni); + } + + if (use_json) { + /* + * Each "json" object contains info about 1 VNI. + * When "json_array" is non-null, we aggreggate the json output + * into json_array and print it as a JSON array. + */ + if (json_array) + json_object_array_add(json_array, json); + else + vty_json(vty, json); + } +} + +/* Display all global details for EVPN */ +void zebra_vxlan_print_evpn(struct vty *vty, bool uj) +{ + int num_l2vnis = 0; + int num_l3vnis = 0; + int num_vnis = 0; + json_object *json = NULL; + struct zebra_vrf *zvrf = NULL; + + if (!is_evpn_enabled()) + return; + + zvrf = zebra_vrf_get_evpn(); + + num_l3vnis = hashcount(zrouter.l3vni_table); + num_l2vnis = hashcount(zvrf->evpn_table); + num_vnis = num_l2vnis + num_l3vnis; + + if (uj) { + json = json_object_new_object(); + json_object_string_add(json, "advertiseGatewayMacip", + zvrf->advertise_gw_macip ? "Yes" : "No"); + json_object_int_add(json, "numVnis", num_vnis); + json_object_int_add(json, "numL2Vnis", num_l2vnis); + json_object_int_add(json, "numL3Vnis", num_l3vnis); + if (zebra_evpn_do_dup_addr_detect(zvrf)) + json_object_boolean_true_add(json, + "isDuplicateAddrDetection"); + else + json_object_boolean_false_add(json, + "isDuplicateAddrDetection"); + json_object_int_add(json, "maxMoves", zvrf->dad_max_moves); + json_object_int_add(json, "detectionTime", zvrf->dad_time); + json_object_int_add(json, "detectionFreezeTime", + zvrf->dad_freeze_time); + zebra_evpn_mh_json(json); + } else { + vty_out(vty, "L2 VNIs: %u\n", num_l2vnis); + vty_out(vty, "L3 VNIs: %u\n", num_l3vnis); + vty_out(vty, "Advertise gateway mac-ip: %s\n", + zvrf->advertise_gw_macip ? "Yes" : "No"); + vty_out(vty, "Advertise svi mac-ip: %s\n", + zvrf->advertise_svi_macip ? "Yes" : "No"); + vty_out(vty, "Advertise svi mac: %s\n", + zebra_evpn_mh_do_adv_svi_mac() ? "Yes" : "No"); + vty_out(vty, "Duplicate address detection: %s\n", + zebra_evpn_do_dup_addr_detect(zvrf) ? "Enable" + : "Disable"); + vty_out(vty, " Detection max-moves %u, time %d\n", + zvrf->dad_max_moves, zvrf->dad_time); + if (zvrf->dad_freeze) { + if (zvrf->dad_freeze_time) + vty_out(vty, " Detection freeze %u\n", + zvrf->dad_freeze_time); + else + vty_out(vty, " Detection freeze %s\n", + "permanent"); + } + zebra_evpn_mh_print(vty); + } + + if (uj) + vty_json(vty, json); +} + +/* + * Display VNI hash table (VTY command handler). + */ +void zebra_vxlan_print_vnis(struct vty *vty, struct zebra_vrf *zvrf, + bool use_json) +{ + json_object *json = NULL; + void *args[2]; + + if (!is_evpn_enabled()) + return; + + if (use_json) + json = json_object_new_object(); + else + vty_out(vty, "%-10s %-4s %-21s %-8s %-8s %-15s %-37s\n", "VNI", + "Type", "VxLAN IF", "# MACs", "# ARPs", + "# Remote VTEPs", "Tenant VRF"); + + args[0] = vty; + args[1] = json; + + /* Display all L2-VNIs */ + hash_iterate( + zvrf->evpn_table, + (void (*)(struct hash_bucket *, void *))zebra_evpn_print_hash, + args); + + /* Display all L3-VNIs */ + hash_iterate(zrouter.l3vni_table, + (void (*)(struct hash_bucket *, void *))zl3vni_print_hash, + args); + + if (use_json) + vty_json(vty, json); +} + +void zebra_vxlan_dup_addr_detection(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + int time = 0; + uint32_t max_moves = 0; + uint32_t freeze_time = 0; + bool dup_addr_detect = false; + bool freeze = false; + bool old_addr_detect; + + s = msg; + STREAM_GETL(s, dup_addr_detect); + STREAM_GETL(s, time); + STREAM_GETL(s, max_moves); + STREAM_GETL(s, freeze); + STREAM_GETL(s, freeze_time); + + old_addr_detect = zebra_evpn_do_dup_addr_detect(zvrf); + zvrf->dup_addr_detect = dup_addr_detect; + dup_addr_detect = zebra_evpn_do_dup_addr_detect(zvrf); + + /* DAD previous state was enabled, and new state is disable, + * clear all duplicate detected addresses. + */ + if (old_addr_detect && !dup_addr_detect) + zebra_vxlan_clear_dup_detect_vni_all(zvrf); + + zvrf->dad_time = time; + zvrf->dad_max_moves = max_moves; + zvrf->dad_freeze = freeze; + zvrf->dad_freeze_time = freeze_time; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "VRF %s duplicate detect %s max_moves %u timeout %u freeze %s freeze_time %u", + vrf_id_to_name(zvrf->vrf->vrf_id), + dup_addr_detect ? "enable" : "disable", + zvrf->dad_max_moves, zvrf->dad_time, + zvrf->dad_freeze ? "enable" : "disable", + zvrf->dad_freeze_time); + +stream_failure: + return; +} + +/* + * Display VNI hash table in detail(VTY command handler). + */ +void zebra_vxlan_print_vnis_detail(struct vty *vty, struct zebra_vrf *zvrf, + bool use_json) +{ + json_object *json_array = NULL; + struct zebra_ns *zns = NULL; + struct zebra_evpn_show zes; + + if (!is_evpn_enabled()) + return; + + zns = zebra_ns_lookup(NS_DEFAULT); + if (!zns) + return; + + if (use_json) + json_array = json_object_new_array(); + + zes.vty = vty; + zes.json = json_array; + zes.zvrf = zvrf; + zes.use_json = use_json; + + /* Display all L2-VNIs */ + hash_iterate(zvrf->evpn_table, + (void (*)(struct hash_bucket *, + void *))zebra_evpn_print_hash_detail, + &zes); + + /* Display all L3-VNIs */ + hash_iterate(zrouter.l3vni_table, + (void (*)(struct hash_bucket *, + void *))zl3vni_print_hash_detail, + &zes); + + if (use_json) + vty_json(vty, json_array); +} + +/* + * Handle neighbor delete notification from the kernel (on a VLAN device + * / L3 interface). This may result in either the neighbor getting deleted + * from our database or being re-added to the kernel (if it is a valid + * remote neighbor). + */ +int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp, + struct interface *link_if, + struct ipaddr *ip) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* check if this is a remote neigh entry corresponding to remote + * next-hop + */ + zl3vni = zl3vni_from_svi(ifp, link_if); + if (zl3vni) + return zl3vni_local_nh_del(zl3vni, ip); + + /* We are only interested in neighbors on an SVI that resides on top + * of a VxLAN bridge. + */ + zevpn = zebra_evpn_from_svi(ifp, link_if); + if (!zevpn) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: Del neighbor %pIA EVPN is not present for interface %s", + __func__, ip, ifp->name); + return 0; + } + + if (!zevpn->vxlan_if) { + zlog_debug( + "VNI %u hash %p doesn't have intf upon local neighbor DEL", + zevpn->vni, zevpn); + return -1; + } + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Del neighbor %pIA intf %s(%u) -> L2-VNI %u", + ip, ifp->name, ifp->ifindex, zevpn->vni); + + return zebra_evpn_neigh_del_ip(zevpn, ip); +} + +/* + * Handle neighbor add or update notification from the kernel (on a VLAN + * device / L3 interface). This is typically for a local neighbor but can + * also be for a remote neighbor (e.g., ageout notification). It could + * also be a "move" scenario. + */ +int zebra_vxlan_handle_kernel_neigh_update(struct interface *ifp, + struct interface *link_if, + struct ipaddr *ip, + struct ethaddr *macaddr, + uint16_t state, + bool is_ext, + bool is_router, + bool local_inactive, bool dp_static) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* check if this is a remote neigh entry corresponding to remote + * next-hop + */ + zl3vni = zl3vni_from_svi(ifp, link_if); + if (zl3vni) + return zl3vni_local_nh_add_update(zl3vni, ip, state); + + /* We are only interested in neighbors on an SVI that resides on top + * of a VxLAN bridge. + */ + zevpn = zebra_evpn_from_svi(ifp, link_if); + if (!zevpn) + return 0; + + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug( + "Add/Update neighbor %pIA MAC %pEA intf %s(%u) state 0x%x %s%s%s%s-> L2-VNI %u", + ip, macaddr, ifp->name, + ifp->ifindex, state, is_ext ? "ext-learned " : "", + is_router ? "router " : "", + local_inactive ? "local_inactive " : "", + dp_static ? "peer_sync " : "", zevpn->vni); + + /* Is this about a local neighbor or a remote one? */ + if (!is_ext) + return zebra_evpn_local_neigh_update(zevpn, ifp, ip, macaddr, + is_router, local_inactive, + dp_static); + + return zebra_evpn_remote_neigh_update(zevpn, ifp, ip, macaddr, state); +} + +static int32_t +zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni, + struct ethaddr *macaddr, uint16_t *ipa_len, + struct ipaddr *ip, struct in_addr *vtep_ip, + uint8_t *flags, uint32_t *seq, esi_t *esi) +{ + uint16_t l = 0; + + /* + * Obtain each remote MACIP and process. + * Message contains VNI, followed by MAC followed by IP (if any) + * followed by remote VTEP IP. + */ + memset(ip, 0, sizeof(*ip)); + STREAM_GETL(s, *vni); + STREAM_GET(macaddr->octet, s, ETH_ALEN); + STREAM_GETW(s, *ipa_len); + + if (*ipa_len) { + if (*ipa_len == IPV4_MAX_BYTELEN) + ip->ipa_type = IPADDR_V4; + else if (*ipa_len == IPV6_MAX_BYTELEN) + ip->ipa_type = IPADDR_V6; + else { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "ipa_len *must* be %d or %d bytes in length not %d", + IPV4_MAX_BYTELEN, IPV6_MAX_BYTELEN, + *ipa_len); + goto stream_failure; + } + + STREAM_GET(&ip->ip.addr, s, *ipa_len); + } + l += 4 + ETH_ALEN + 4 + *ipa_len; + STREAM_GET(&vtep_ip->s_addr, s, IPV4_MAX_BYTELEN); + l += IPV4_MAX_BYTELEN; + + if (add) { + STREAM_GETC(s, *flags); + STREAM_GETL(s, *seq); + l += 5; + STREAM_GET(esi, s, sizeof(esi_t)); + l += sizeof(esi_t); + } + + return l; + +stream_failure: + return -1; +} + +/* + * Handle message from client to delete a remote MACIP for a VNI. + */ +void zebra_vxlan_remote_macip_del(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + vni_t vni; + struct ethaddr macaddr; + struct ipaddr ip; + struct in_addr vtep_ip; + uint16_t l = 0, ipa_len; + char buf1[INET6_ADDRSTRLEN]; + + s = msg; + + while (l < hdr->length) { + int res_length = zebra_vxlan_remote_macip_helper( + false, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, NULL, + NULL, NULL); + + if (res_length == -1) + goto stream_failure; + + l += res_length; + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Recv MACIP DEL VNI %u MAC %pEA%s%s Remote VTEP %pI4 from %s", + vni, &macaddr, + ipa_len ? " IP " : "", + ipa_len ? + ipaddr2str(&ip, buf1, sizeof(buf1)) : "", + &vtep_ip, zebra_route_string(client->proto)); + + /* Enqueue to workqueue for processing */ + zebra_rib_queue_evpn_rem_macip_del(vni, &macaddr, &ip, vtep_ip); + } + +stream_failure: + return; +} + +/* + * Handle message from client to add a remote MACIP for a VNI. This + * could be just the add of a MAC address or the add of a neighbor + * (IP+MAC). + */ +void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + vni_t vni; + struct ethaddr macaddr; + struct ipaddr ip; + struct in_addr vtep_ip; + uint16_t l = 0, ipa_len; + uint8_t flags = 0; + uint32_t seq; + char buf1[INET6_ADDRSTRLEN]; + esi_t esi; + char esi_buf[ESI_STR_LEN]; + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("EVPN not enabled, ignoring remote MACIP ADD"); + return; + } + + s = msg; + + while (l < hdr->length) { + + int res_length = zebra_vxlan_remote_macip_helper( + true, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, + &flags, &seq, &esi); + + if (res_length == -1) + goto stream_failure; + + l += res_length; + if (IS_ZEBRA_DEBUG_VXLAN) { + if (memcmp(&esi, zero_esi, sizeof(esi_t))) + esi_to_str(&esi, esi_buf, sizeof(esi_buf)); + else + strlcpy(esi_buf, "-", ESI_STR_LEN); + zlog_debug( + "Recv %sMACIP ADD VNI %u MAC %pEA%s%s flags 0x%x seq %u VTEP %pI4 ESI %s from %s", + (flags & ZEBRA_MACIP_TYPE_SYNC_PATH) ? + "sync-" : "", + vni, &macaddr, + ipa_len ? " IP " : "", + ipa_len ? + ipaddr2str(&ip, buf1, sizeof(buf1)) : "", + flags, seq, &vtep_ip, esi_buf, + zebra_route_string(client->proto)); + } + + /* Enqueue to workqueue for processing */ + zebra_rib_queue_evpn_rem_macip_add(vni, &macaddr, &ip, flags, + seq, vtep_ip, &esi); + } + +stream_failure: + return; +} + +/* + * Handle remote vtep delete by kernel; re-add the vtep if we have it + */ +int zebra_vxlan_check_readd_vtep(struct interface *ifp, + struct in_addr vtep_ip) +{ + struct zebra_if *zif; + struct zebra_vrf *zvrf = NULL; + struct zebra_l2info_vxlan *vxl; + vni_t vni; + struct zebra_evpn *zevpn = NULL; + struct zebra_vtep *zvtep = NULL; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + /* If EVPN is not enabled, nothing to do. */ + if (!is_evpn_enabled()) + return 0; + + /* Locate VRF corresponding to interface. */ + zvrf = ifp->vrf->info; + if (!zvrf) + return -1; + + /* Locate hash entry; it is expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return 0; + + /* If the remote vtep entry doesn't exists nothing to do */ + zvtep = zebra_evpn_vtep_find(zevpn, &vtep_ip); + if (!zvtep) + return 0; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Del MAC for remote VTEP %pI4 intf %s(%u) VNI %u - readd", + &vtep_ip, ifp->name, ifp->ifindex, vni); + + zebra_evpn_vtep_install(zevpn, zvtep); + return 0; +} + +/* + * Handle notification of MAC add/update over VxLAN. If the kernel is notifying + * us, this must involve a multihoming scenario. Treat this as implicit delete + * of any prior local MAC. + */ +static int zebra_vxlan_check_del_local_mac(struct interface *ifp, + struct interface *br_if, + struct ethaddr *macaddr, + vlanid_t vid) +{ + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl; + vni_t vni; + struct zebra_evpn *zevpn; + struct zebra_mac *mac; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + /* Locate hash entry; it is expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return 0; + + /* If entry doesn't exist, nothing to do. */ + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) + return 0; + + /* Is it a local entry? */ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + return 0; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Add/update remote MAC %pEA intf %s(%u) VNI %u flags 0x%x - del local", + macaddr, ifp->name, ifp->ifindex, vni, mac->flags); + + /* Remove MAC from BGP. */ + zebra_evpn_mac_send_del_to_client(zevpn->vni, macaddr, mac->flags, + false /* force */); + + /* + * If there are no neigh associated with the mac delete the mac + * else mark it as AUTO for forward reference + */ + if (!listcount(mac->neigh_list)) { + zebra_evpn_mac_del(zevpn, mac); + } else { + zebra_evpn_mac_clear_fwd_info(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS); + UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY); + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + } + + return 0; +} + +/* MAC notification from the dataplane with a network dest port - + * 1. This can be a local MAC on a down ES (if fast-failover is not possible + * 2. Or it can be a remote MAC + */ +int zebra_vxlan_dp_network_mac_add(struct interface *ifp, + struct interface *br_if, + struct ethaddr *macaddr, vlanid_t vid, + uint32_t nhg_id, bool sticky, bool dp_static) +{ + struct zebra_evpn_es *es; + struct interface *acc_ifp; + + /* If netlink message is with vid, it will have no nexthop. + * So skip it. + */ + if (vid) { + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("dpAdd MAC %pEA VID %u - ignore as no nhid", + macaddr, vid); + return 0; + } + + /* Get vxlan's vid for netlink message has no it. */ + vid = ((struct zebra_if *)ifp->info)->l2info.vxl.access_vlan; + + /* if remote mac delete the local entry */ + if (!nhg_id || !zebra_evpn_nhg_is_local_es(nhg_id, &es) + || !zebra_evpn_es_local_mac_via_network_port(es)) { + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("dpAdd remote MAC %pEA VID %u", macaddr, + vid); + return zebra_vxlan_check_del_local_mac(ifp, br_if, macaddr, + vid); + } + + /* If local MAC on a down local ES translate the network-mac-add + * to a local-active-mac-add + */ + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("dpAdd local-nw-MAC %pEA VID %u", macaddr, vid); + acc_ifp = es->zif->ifp; + return zebra_vxlan_local_mac_add_update( + acc_ifp, br_if, macaddr, vid, sticky, + false /* local_inactive */, dp_static); +} + +/* + * Handle network MAC delete by kernel - + * 1. readd the remote MAC if we have it + * 2. local MAC with does ES may also need to be re-installed + */ +int zebra_vxlan_dp_network_mac_del(struct interface *ifp, + struct interface *br_if, + struct ethaddr *macaddr, vlanid_t vid) +{ + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + vni_t vni; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + struct zebra_mac *mac = NULL; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + /* check if this is a remote RMAC and readd simillar to remote macs */ + zl3vni = zl3vni_lookup(vni); + if (zl3vni) + return zebra_vxlan_readd_remote_rmac(zl3vni, macaddr); + + /* Locate hash entry; it is expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return 0; + + /* If entry doesn't exist, nothing to do. */ + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) + return 0; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { + /* If remote entry simply re-install */ + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug( + "dpDel remote MAC %pEA intf %s(%u) VNI %u - readd", + macaddr, ifp->name, ifp->ifindex, vni); + zebra_evpn_rem_mac_install(zevpn, mac, false /* was_static */); + } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) && mac->es + && zebra_evpn_es_local_mac_via_network_port(mac->es)) { + /* If local entry via nw-port call local-del which will + * re-install entry in the dataplane is needed + */ + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("dpDel local-nw-MAC %pEA VNI %u", macaddr, + vni); + + zebra_evpn_del_local_mac(zevpn, mac, false); + } + + return 0; +} + +/* + * Handle local MAC delete (on a port or VLAN corresponding to this VNI). + */ +int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if, + struct ethaddr *macaddr, vlanid_t vid) +{ + struct zebra_evpn *zevpn; + struct zebra_mac *mac; + + /* We are interested in MACs only on ports or (port, VLAN) that + * map to a VNI. + */ + zevpn = zebra_evpn_map_vlan(ifp, br_if, vid); + if (!zevpn) + return 0; + if (!zevpn->vxlan_if) { + zlog_debug( + "VNI %u hash %p doesn't have intf upon local MAC DEL", + zevpn->vni, zevpn); + return -1; + } + + /* If entry doesn't exist, nothing to do. */ + mac = zebra_evpn_mac_lookup(zevpn, macaddr); + if (!mac) + return 0; + + /* Is it a local entry? */ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + return 0; + + return zebra_evpn_del_local_mac(zevpn, mac, false); +} + +/* + * Handle local MAC add (on a port or VLAN corresponding to this VNI). + */ +int zebra_vxlan_local_mac_add_update(struct interface *ifp, + struct interface *br_if, + struct ethaddr *macaddr, vlanid_t vid, + bool sticky, bool local_inactive, + bool dp_static) +{ + struct zebra_evpn *zevpn; + struct zebra_vrf *zvrf; + + assert(ifp); + + /* We are interested in MACs only on ports or (port, VLAN) that + * map to an EVPN. + */ + zevpn = zebra_evpn_map_vlan(ifp, br_if, vid); + if (!zevpn) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + " Add/Update %sMAC %pEA intf %s(%u) VID %u, could not find EVPN", + sticky ? "sticky " : "", macaddr, + ifp->name, ifp->ifindex, vid); + return 0; + } + + if (!zevpn->vxlan_if) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + " VNI %u hash %p doesn't have intf upon local MAC ADD", + zevpn->vni, zevpn); + return -1; + } + + zvrf = zebra_vrf_get_evpn(); + return zebra_evpn_add_update_local_mac(zvrf, zevpn, ifp, macaddr, vid, + sticky, local_inactive, + dp_static, NULL); +} + +/* + * Handle message from client to delete a remote VTEP for an EVPN. + */ +void zebra_vxlan_remote_vtep_del_zapi(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + unsigned short l = 0; + vni_t vni; + struct in_addr vtep_ip; + + if (!is_evpn_enabled()) { + zlog_debug( + "%s: EVPN is not enabled yet we have received a VTEP DEL msg", + __func__); + return; + } + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("Recv VTEP DEL zapi for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + s = msg; + + while (l < hdr->length) { + int flood_control __attribute__((unused)); + + /* Obtain each remote VTEP and process. */ + STREAM_GETL(s, vni); + l += 4; + STREAM_GET(&vtep_ip.s_addr, s, IPV4_MAX_BYTELEN); + l += IPV4_MAX_BYTELEN; + + /* Flood control is intentionally ignored right now */ + STREAM_GETL(s, flood_control); + l += 4; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Recv VTEP DEL %pI4 VNI %u from %s", + &vtep_ip, vni, + zebra_route_string(client->proto)); + + /* Enqueue for processing */ + zebra_rib_queue_evpn_rem_vtep_del(zvrf_id(zvrf), vni, vtep_ip); + } + +stream_failure: + return; +} + +/* + * Handle message from client to delete a remote VTEP for an EVPN. + */ +void zebra_vxlan_remote_vtep_del(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip) +{ + struct zebra_evpn *zevpn; + struct zebra_vtep *zvtep; + struct interface *ifp; + struct zebra_if *zif; + struct zebra_vrf *zvrf; + + if (!is_evpn_enabled()) { + zlog_debug("%s: Can't process vtep del: EVPN is not enabled", + __func__); + return; + } + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + if (!zvrf) + return; + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("Can't process VTEP DEL for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + /* Locate VNI hash entry - expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Failed to locate VNI hash for remote VTEP DEL, VNI %u", + vni); + return; + } + + ifp = zevpn->vxlan_if; + if (!ifp) { + zlog_debug( + "VNI %u hash %p doesn't have intf upon remote VTEP DEL", + zevpn->vni, zevpn); + return; + } + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + /* If the remote VTEP does not exist, there's nothing more to + * do. + * Otherwise, uninstall any remote MACs pointing to this VTEP + * and then, the VTEP entry itself and remove it. + */ + zvtep = zebra_evpn_vtep_find(zevpn, &vtep_ip); + if (!zvtep) + return; + + zebra_evpn_vtep_uninstall(zevpn, &vtep_ip); + zebra_evpn_vtep_del(zevpn, zvtep); +} + +/* + * Handle message from client to add a remote VTEP for an EVPN. + */ +void zebra_vxlan_remote_vtep_add(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip, int flood_control) +{ + struct zebra_evpn *zevpn; + struct interface *ifp; + struct zebra_if *zif; + struct zebra_vtep *zvtep; + struct zebra_vrf *zvrf; + + if (!is_evpn_enabled()) { + zlog_debug("%s: EVPN not enabled: can't process a VTEP ADD", + __func__); + return; + } + + zvrf = zebra_vrf_lookup_by_id(vrf_id); + if (!zvrf) + return; + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("Can't process VTEP ADD for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + /* Locate VNI hash entry - expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + flog_err( + EC_ZEBRA_VTEP_ADD_FAILED, + "Failed to locate EVPN hash upon remote VTEP ADD, VNI %u", + vni); + return; + } + + ifp = zevpn->vxlan_if; + if (!ifp) { + flog_err( + EC_ZEBRA_VTEP_ADD_FAILED, + "VNI %u hash %p doesn't have intf upon remote VTEP ADD", + zevpn->vni, zevpn); + return; + } + + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + zvtep = zebra_evpn_vtep_find(zevpn, &vtep_ip); + if (zvtep) { + /* If the remote VTEP already exists check if + * the flood mode has changed + */ + if (zvtep->flood_control != flood_control) { + if (zvtep->flood_control == VXLAN_FLOOD_DISABLED) + /* old mode was head-end-replication but + * is no longer; get rid of the HER fdb + * entry installed before + */ + zebra_evpn_vtep_uninstall(zevpn, &vtep_ip); + zvtep->flood_control = flood_control; + zebra_evpn_vtep_install(zevpn, zvtep); + } + } else { + zvtep = zebra_evpn_vtep_add(zevpn, &vtep_ip, flood_control); + if (zvtep) + zebra_evpn_vtep_install(zevpn, zvtep); + else + flog_err(EC_ZEBRA_VTEP_ADD_FAILED, + "Failed to add remote VTEP, VNI %u zevpn %p", + vni, zevpn); + } +} + +/* + * Handle message from client to add a remote VTEP for an EVPN. + */ +void zebra_vxlan_remote_vtep_add_zapi(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + unsigned short l = 0; + vni_t vni; + struct in_addr vtep_ip; + int flood_control; + + if (!is_evpn_enabled()) { + zlog_debug( + "%s: EVPN not enabled yet we received a VTEP ADD zapi msg", + __func__); + return; + } + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("Recv VTEP ADD zapi for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + s = msg; + + while (l < hdr->length) { + /* Obtain each remote VTEP and process. */ + STREAM_GETL(s, vni); + l += 4; + STREAM_GET(&vtep_ip.s_addr, s, IPV4_MAX_BYTELEN); + STREAM_GETL(s, flood_control); + l += IPV4_MAX_BYTELEN + 4; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Recv VTEP ADD %pI4 VNI %u flood %d from %s", + &vtep_ip, vni, flood_control, + zebra_route_string(client->proto)); + + /* Enqueue for processing */ + zebra_rib_queue_evpn_rem_vtep_add(zvrf_id(zvrf), vni, vtep_ip, + flood_control); + } + +stream_failure: + return; +} + +/* + * Add/Del gateway macip to evpn + * g/w can be: + * 1. SVI interface on a vlan aware bridge + * 2. SVI interface on a vlan unaware bridge + * 3. vrr interface (MACVLAN) associated to a SVI + * We advertise macip routes for an interface if it is associated to VxLan vlan + */ +int zebra_vxlan_add_del_gw_macip(struct interface *ifp, const struct prefix *p, + int add) +{ + struct ipaddr ip; + struct ethaddr macaddr; + struct zebra_evpn *zevpn = NULL; + + memset(&ip, 0, sizeof(ip)); + memset(&macaddr, 0, sizeof(macaddr)); + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + if (IS_ZEBRA_IF_MACVLAN(ifp)) { + struct interface *svi_if = + NULL; /* SVI corresponding to the MACVLAN */ + struct zebra_if *ifp_zif = + NULL; /* Zebra daemon specific info for MACVLAN */ + struct zebra_if *svi_if_zif = + NULL; /* Zebra daemon specific info for SVI*/ + + ifp_zif = ifp->info; + if (!ifp_zif) + return -1; + + /* + * for a MACVLAN interface the link represents the svi_if + */ + svi_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + ifp_zif->link_ifindex); + if (!svi_if) { + zlog_debug("MACVLAN %s(%u) without link information", + ifp->name, ifp->ifindex); + return -1; + } + + if (IS_ZEBRA_IF_VLAN(svi_if)) { + /* + * If it is a vlan aware bridge then the link gives the + * bridge information + */ + struct interface *svi_if_link = NULL; + + svi_if_zif = svi_if->info; + if (svi_if_zif) { + svi_if_link = if_lookup_by_index_per_ns( + zebra_ns_lookup(NS_DEFAULT), + svi_if_zif->link_ifindex); + zevpn = zebra_evpn_from_svi(svi_if, + svi_if_link); + } + } else if (IS_ZEBRA_IF_BRIDGE(svi_if)) { + /* + * If it is a vlan unaware bridge then svi is the bridge + * itself + */ + zevpn = zebra_evpn_from_svi(svi_if, svi_if); + } + } else if (IS_ZEBRA_IF_VLAN(ifp)) { + struct zebra_if *svi_if_zif = + NULL; /* Zebra daemon specific info for SVI */ + struct interface *svi_if_link = + NULL; /* link info for the SVI = bridge info */ + + svi_if_zif = ifp->info; + if (svi_if_zif) { + svi_if_link = if_lookup_by_index_per_ns( + zebra_ns_lookup(NS_DEFAULT), + svi_if_zif->link_ifindex); + if (svi_if_link) + zevpn = zebra_evpn_from_svi(ifp, svi_if_link); + } + } else if (IS_ZEBRA_IF_BRIDGE(ifp)) { + zevpn = zebra_evpn_from_svi(ifp, ifp); + } + + if (!zevpn) + return 0; + + if (!zevpn->vxlan_if) { + zlog_debug("VNI %u hash %p doesn't have intf upon MACVLAN up", + zevpn->vni, zevpn); + return -1; + } + + /* VRR IP is advertised only if gw-macip-adv-enabled */ + if (IS_ZEBRA_IF_MACVLAN(ifp)) { + if (!advertise_gw_macip_enabled(zevpn)) + return 0; + } else { + /* SVI IP is advertised if gw or svi macip-adv-enabled */ + if (!advertise_svi_macip_enabled(zevpn) + && !advertise_gw_macip_enabled(zevpn)) + return 0; + } + + memcpy(&macaddr.octet, ifp->hw_addr, ETH_ALEN); + + if (p->family == AF_INET) { + ip.ipa_type = IPADDR_V4; + memcpy(&(ip.ipaddr_v4), &(p->u.prefix4), + sizeof(struct in_addr)); + } else if (p->family == AF_INET6) { + ip.ipa_type = IPADDR_V6; + memcpy(&(ip.ipaddr_v6), &(p->u.prefix6), + sizeof(struct in6_addr)); + } + + + if (add) + zebra_evpn_gw_macip_add(ifp, zevpn, &macaddr, &ip); + else + zebra_evpn_gw_macip_del(ifp, zevpn, &ip); + + return 0; +} + +/* + * Handle SVI interface going down. + * SVI can be associated to either L3-VNI or L2-VNI. + * For L2-VNI: At this point, this is a NOP since + * the kernel deletes the neighbor entries on this SVI (if any). + * We only need to update the vrf corresponding to zevpn. + * For L3-VNI: L3-VNI is operationally down, update mac-ip routes and delete + * from bgp + */ +int zebra_vxlan_svi_down(struct interface *ifp, struct interface *link_if) +{ + struct zebra_l3vni *zl3vni = NULL; + + zl3vni = zl3vni_from_svi(ifp, link_if); + if (zl3vni) { + + /* process l3-vni down */ + zebra_vxlan_process_l3vni_oper_down(zl3vni); + + /* remove association with svi-if */ + zl3vni->svi_if = NULL; + } else { + struct zebra_evpn *zevpn = NULL; + + /* Unlink the SVI from the access VLAN */ + zebra_evpn_acc_bd_svi_set(ifp->info, link_if->info, false); + + /* since we dont have svi corresponding to zevpn, we associate it + * to default vrf. Note: the corresponding neigh entries on the + * SVI would have already been deleted */ + zevpn = zebra_evpn_from_svi(ifp, link_if); + + if (zevpn) { + /* remove from l3-vni list */ + zl3vni = zl3vni_from_vrf(zevpn->vrf_id); + if (zl3vni) + listnode_delete(zl3vni->l2vnis, zevpn); + + zevpn->svi_if = NULL; + zevpn->vrf_id = VRF_DEFAULT; + + /* update the tenant vrf in BGP */ + if (if_is_operative(zevpn->vxlan_if)) + zebra_evpn_send_add_to_client(zevpn); + } + } + return 0; +} + +/* + * Handle SVI interface coming up. + * SVI can be associated to L3-VNI (l3vni vxlan interface) or L2-VNI (l2-vni + * vxlan intf). + * For L2-VNI: we need to install any remote neighbors entried (used for + * arp-suppression) + * For L3-VNI: SVI will be used to get the rmac to be used with L3-VNI + */ +int zebra_vxlan_svi_up(struct interface *ifp, struct interface *link_if) +{ + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + zl3vni = zl3vni_from_svi(ifp, link_if); + if (zl3vni) { + + /* associate with svi */ + zl3vni->svi_if = ifp; + + /* process oper-up */ + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } else { + + /* process SVI up for l2-vni */ + struct neigh_walk_ctx n_wctx; + + zevpn = zebra_evpn_from_svi(ifp, link_if); + if (!zevpn) + return 0; + + if (!zevpn->vxlan_if) { + zlog_debug( + "VNI %u hash %p doesn't have intf upon SVI up", + zevpn->vni, zevpn); + return -1; + } + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "SVI %s(%u) VNI %u VRF %s is UP, installing neighbors", + ifp->name, ifp->ifindex, zevpn->vni, + ifp->vrf->name); + + /* update the vrf information for l2-vni and inform bgp */ + zevpn->svi_if = ifp; + zevpn->vrf_id = ifp->vrf->vrf_id; + + zl3vni = zl3vni_from_vrf(zevpn->vrf_id); + if (zl3vni) + listnode_add_sort_nodup(zl3vni->l2vnis, zevpn); + + if (if_is_operative(zevpn->vxlan_if)) + zebra_evpn_send_add_to_client(zevpn); + + /* Install any remote neighbors for this VNI. */ + memset(&n_wctx, 0, sizeof(n_wctx)); + n_wctx.zevpn = zevpn; + hash_iterate(zevpn->neigh_table, zebra_evpn_install_neigh_hash, + &n_wctx); + + /* Link the SVI from the access VLAN */ + zebra_evpn_acc_bd_svi_set(ifp->info, link_if->info, true); + + /* Update MACIP routes created by advertise-svi-ip */ + if (advertise_svi_macip_enabled(zevpn)) { + zebra_evpn_del_macip_for_intf(ifp, zevpn); + zebra_evpn_add_macip_for_intf(ifp, zevpn); + } + } + + return 0; +} + +/* + * Handle MAC-VLAN interface going down. + * L3VNI: When MAC-VLAN interface goes down, + * find its associated SVI and update type2/type-5 routes + * with SVI as RMAC + */ +void zebra_vxlan_macvlan_down(struct interface *ifp) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_if *zif, *link_zif; + struct interface *link_ifp, *link_if; + + zif = ifp->info; + assert(zif); + link_ifp = zif->link; + if (!link_ifp) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "macvlan parent link is not found. Parent index %d ifp %s", + zif->link_ifindex, + ifindex2ifname(zif->link_ifindex, + ifp->vrf->vrf_id)); + return; + } + link_zif = link_ifp->info; + assert(link_zif); + + link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + link_zif->link_ifindex); + + zl3vni = zl3vni_from_svi(link_ifp, link_if); + if (zl3vni) { + zl3vni->mac_vlan_if = NULL; + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } +} + +/* + * Handle MAC-VLAN interface going up. + * L3VNI: When MAC-VLAN interface comes up, + * find its associated SVI and update type-2 routes + * with MAC-VLAN's MAC as RMAC and for type-5 routes + * use SVI's MAC as RMAC. + */ +void zebra_vxlan_macvlan_up(struct interface *ifp) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_if *zif, *link_zif; + struct interface *link_ifp, *link_if; + + zif = ifp->info; + assert(zif); + link_ifp = zif->link; + link_zif = link_ifp->info; + assert(link_zif); + + link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + link_zif->link_ifindex); + zl3vni = zl3vni_from_svi(link_ifp, link_if); + if (zl3vni) { + /* associate with macvlan (VRR) interface */ + zl3vni->mac_vlan_if = ifp; + + /* process oper-up */ + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } +} + +/* + * Handle VxLAN interface down + */ +int zebra_vxlan_if_down(struct interface *ifp) +{ + vni_t vni; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_l3vni *zl3vni = NULL; + struct zebra_evpn *zevpn; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + /* process-if-down for l3-vni */ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Intf %s(%u) L3-VNI %u is DOWN", ifp->name, + ifp->ifindex, vni); + + zebra_vxlan_process_l3vni_oper_down(zl3vni); + } else { + /* process if-down for l2-vni */ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Intf %s(%u) L2-VNI %u is DOWN", ifp->name, + ifp->ifindex, vni); + + /* Locate hash entry; it is expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + zlog_debug( + "Failed to locate VNI hash at DOWN, IF %s(%u) VNI %u", + ifp->name, ifp->ifindex, vni); + return -1; + } + + assert(zevpn->vxlan_if == ifp); + + /* remove from l3-vni list */ + zl3vni = zl3vni_from_vrf(zevpn->vrf_id); + if (zl3vni) + listnode_delete(zl3vni->l2vnis, zevpn); + + /* Delete this VNI from BGP. */ + zebra_evpn_send_del_to_client(zevpn); + + /* Free up all neighbors and MACs, if any. */ + zebra_evpn_neigh_del_all(zevpn, 1, 0, DEL_ALL_NEIGH); + zebra_evpn_mac_del_all(zevpn, 1, 0, DEL_ALL_MAC); + + /* Free up all remote VTEPs, if any. */ + zebra_evpn_vtep_del_all(zevpn, 1); + } + return 0; +} + +/* + * Handle VxLAN interface up - update BGP if required. + */ +int zebra_vxlan_if_up(struct interface *ifp) +{ + vni_t vni; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + /* we need to associate with SVI, if any, we can associate with + * svi-if only after association with vxlan-intf is complete + */ + zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Intf %s(%u) L3-VNI %u is UP svi_if %s mac_vlan_if %s" + , ifp->name, ifp->ifindex, vni, + zl3vni->svi_if ? zl3vni->svi_if->name : "NIL", + zl3vni->mac_vlan_if ? + zl3vni->mac_vlan_if->name : "NIL"); + + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } else { + /* Handle L2-VNI add */ + struct interface *vlan_if = NULL; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Intf %s(%u) L2-VNI %u is UP", ifp->name, + ifp->ifindex, vni); + + /* Locate hash entry; it is expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + zlog_debug( + "Failed to locate EVPN hash at UP, IF %s(%u) VNI %u", + ifp->name, ifp->ifindex, vni); + return -1; + } + + assert(zevpn->vxlan_if == ifp); + vlan_if = zvni_map_to_svi(vxl->access_vlan, + zif->brslave_info.br_if); + if (vlan_if) { + zevpn->svi_if = vlan_if; + zevpn->vrf_id = vlan_if->vrf->vrf_id; + zl3vni = zl3vni_from_vrf(vlan_if->vrf->vrf_id); + if (zl3vni) + listnode_add_sort_nodup(zl3vni->l2vnis, zevpn); + } + + /* If part of a bridge, inform BGP about this VNI. */ + /* Also, read and populate local MACs and neighbors. */ + if (zif->brslave_info.br_if) { + zebra_evpn_send_add_to_client(zevpn); + zebra_evpn_read_mac_neigh(zevpn, ifp); + } + } + + return 0; +} + +/* + * Handle VxLAN interface delete. Locate and remove entry in hash table + * and update BGP, if required. + */ +int zebra_vxlan_if_del(struct interface *ifp) +{ + vni_t vni; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Del L3-VNI %u intf %s(%u)", vni, ifp->name, + ifp->ifindex); + + /* process oper-down for l3-vni */ + zebra_vxlan_process_l3vni_oper_down(zl3vni); + + /* remove the association with vxlan_if */ + memset(&zl3vni->local_vtep_ip, 0, sizeof(struct in_addr)); + zl3vni->vxlan_if = NULL; + } else { + + /* process if-del for l2-vni*/ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Del L2-VNI %u intf %s(%u)", vni, ifp->name, + ifp->ifindex); + + /* Locate hash entry; it is expected to exist. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + zlog_debug( + "Failed to locate VNI hash at del, IF %s(%u) VNI %u", + ifp->name, ifp->ifindex, vni); + return 0; + } + + /* remove from l3-vni list */ + zl3vni = zl3vni_from_vrf(zevpn->vrf_id); + if (zl3vni) + listnode_delete(zl3vni->l2vnis, zevpn); + /* Delete VNI from BGP. */ + zebra_evpn_send_del_to_client(zevpn); + + /* Free up all neighbors and MAC, if any. */ + zebra_evpn_neigh_del_all(zevpn, 0, 0, DEL_ALL_NEIGH); + zebra_evpn_mac_del_all(zevpn, 0, 0, DEL_ALL_MAC); + + /* Free up all remote VTEPs, if any. */ + zebra_evpn_vtep_del_all(zevpn, 0); + + /* Delete the hash entry. */ + if (zebra_evpn_vxlan_del(zevpn)) { + flog_err(EC_ZEBRA_VNI_DEL_FAILED, + "Failed to del EVPN hash %p, IF %s(%u) VNI %u", + zevpn, ifp->name, ifp->ifindex, zevpn->vni); + return -1; + } + } + return 0; +} + +/* + * Handle VxLAN interface update - change to tunnel IP, master or VLAN. + */ +int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags) +{ + vni_t vni; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + struct interface *vlan_if = NULL; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Update L3-VNI %u intf %s(%u) VLAN %u local IP %pI4 master %u chg 0x%x", + vni, ifp->name, ifp->ifindex, vxl->access_vlan, + &vxl->vtep_ip, + zif->brslave_info.bridge_ifindex, chgflags); + + /* Removed from bridge? Cleanup and return */ + if ((chgflags & ZEBRA_VXLIF_MASTER_CHANGE) + && (zif->brslave_info.bridge_ifindex == IFINDEX_INTERNAL)) { + zebra_vxlan_process_l3vni_oper_down(zl3vni); + return 0; + } + + if ((chgflags & ZEBRA_VXLIF_MASTER_MAC_CHANGE) + && if_is_operative(ifp) && is_l3vni_oper_up(zl3vni)) { + zebra_vxlan_process_l3vni_oper_down(zl3vni); + zebra_vxlan_process_l3vni_oper_up(zl3vni); + return 0; + } + + /* access-vlan change - process oper down, associate with new + * svi_if and then process oper up again + */ + if (chgflags & ZEBRA_VXLIF_VLAN_CHANGE) { + if (if_is_operative(ifp)) { + zebra_vxlan_process_l3vni_oper_down(zl3vni); + zl3vni->svi_if = NULL; + zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + zl3vni->mac_vlan_if = + zl3vni_map_to_mac_vlan_if(zl3vni); + zl3vni->local_vtep_ip = vxl->vtep_ip; + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up( + zl3vni); + } + } + + /* + * local-ip change - process oper down, associate with new + * local-ip and then process oper up again + */ + if (chgflags & ZEBRA_VXLIF_LOCAL_IP_CHANGE) { + if (if_is_operative(ifp)) { + zebra_vxlan_process_l3vni_oper_down(zl3vni); + zl3vni->local_vtep_ip = vxl->vtep_ip; + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up( + zl3vni); + } + } + + /* Update local tunnel IP. */ + zl3vni->local_vtep_ip = vxl->vtep_ip; + + /* if we have a valid new master, process l3-vni oper up */ + if (chgflags & ZEBRA_VXLIF_MASTER_CHANGE) { + if (if_is_operative(ifp) && is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } + } else { + + /* Update VNI hash. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) { + zlog_debug( + "Failed to find EVPN hash on update, IF %s(%u) VNI %u", + ifp->name, ifp->ifindex, vni); + return -1; + } + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Update L2-VNI %u intf %s(%u) VLAN %u local IP %pI4 master %u chg 0x%x", + vni, ifp->name, ifp->ifindex, vxl->access_vlan, + &vxl->vtep_ip, + zif->brslave_info.bridge_ifindex, chgflags); + + /* Removed from bridge? Cleanup and return */ + if ((chgflags & ZEBRA_VXLIF_MASTER_CHANGE) + && (zif->brslave_info.bridge_ifindex == IFINDEX_INTERNAL)) { + /* Delete from client, remove all remote VTEPs */ + /* Also, free up all MACs and neighbors. */ + zevpn->svi_if = NULL; + zebra_evpn_send_del_to_client(zevpn); + zebra_evpn_neigh_del_all(zevpn, 1, 0, DEL_ALL_NEIGH); + zebra_evpn_mac_del_all(zevpn, 1, 0, DEL_ALL_MAC); + zebra_evpn_vtep_del_all(zevpn, 1); + return 0; + } + + /* Handle other changes. */ + if (chgflags & ZEBRA_VXLIF_VLAN_CHANGE) { + /* Remove all existing local neigh and MACs for this VNI + * (including from BGP) + */ + zebra_evpn_neigh_del_all(zevpn, 0, 1, DEL_LOCAL_MAC); + zebra_evpn_mac_del_all(zevpn, 0, 1, DEL_LOCAL_MAC); + } + + if (zevpn->local_vtep_ip.s_addr != vxl->vtep_ip.s_addr || + zevpn->mcast_grp.s_addr != vxl->mcast_grp.s_addr) { + zebra_vxlan_sg_deref(zevpn->local_vtep_ip, + zevpn->mcast_grp); + zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp); + zevpn->local_vtep_ip = vxl->vtep_ip; + zevpn->mcast_grp = vxl->mcast_grp; + /* on local vtep-ip check if ES orig-ip + * needs to be updated + */ + zebra_evpn_es_set_base_evpn(zevpn); + } + zevpn_vxlan_if_set(zevpn, ifp, true /* set */); + vlan_if = zvni_map_to_svi(vxl->access_vlan, + zif->brslave_info.br_if); + if (vlan_if) { + zevpn->svi_if = vlan_if; + zevpn->vrf_id = vlan_if->vrf->vrf_id; + zl3vni = zl3vni_from_vrf(vlan_if->vrf->vrf_id); + if (zl3vni) + listnode_add_sort_nodup(zl3vni->l2vnis, zevpn); + } + + /* Take further actions needed. + * Note that if we are here, there is a change of interest. + */ + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return 0; + + /* Inform BGP, if there is a change of interest. */ + if (chgflags & + (ZEBRA_VXLIF_MASTER_CHANGE | ZEBRA_VXLIF_LOCAL_IP_CHANGE | + ZEBRA_VXLIF_MCAST_GRP_CHANGE | ZEBRA_VXLIF_VLAN_CHANGE)) + zebra_evpn_send_add_to_client(zevpn); + + /* If there is a valid new master or a VLAN mapping change, + * read and populate local MACs and neighbors. + * Also, reinstall any remote MACs and neighbors + * for this VNI (based on new VLAN). + */ + if (chgflags & ZEBRA_VXLIF_MASTER_CHANGE) + zebra_evpn_read_mac_neigh(zevpn, ifp); + else if (chgflags & ZEBRA_VXLIF_VLAN_CHANGE) { + struct mac_walk_ctx m_wctx; + struct neigh_walk_ctx n_wctx; + + zebra_evpn_read_mac_neigh(zevpn, ifp); + + memset(&m_wctx, 0, sizeof(m_wctx)); + m_wctx.zevpn = zevpn; + hash_iterate(zevpn->mac_table, + zebra_evpn_install_mac_hash, &m_wctx); + + memset(&n_wctx, 0, sizeof(n_wctx)); + n_wctx.zevpn = zevpn; + hash_iterate(zevpn->neigh_table, + zebra_evpn_install_neigh_hash, &n_wctx); + } + } + + return 0; +} + +/* + * Handle VxLAN interface add. + */ +int zebra_vxlan_if_add(struct interface *ifp) +{ + vni_t vni; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_evpn *zevpn = NULL; + struct zebra_l3vni *zl3vni = NULL; + + /* Check if EVPN is enabled. */ + if (!is_evpn_enabled()) + return 0; + + zif = ifp->info; + assert(zif); + vxl = &zif->l2info.vxl; + vni = vxl->vni; + + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + + /* process if-add for l3-vni*/ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Add L3-VNI %u intf %s(%u) VLAN %u local IP %pI4 master %u", + vni, ifp->name, ifp->ifindex, vxl->access_vlan, + &vxl->vtep_ip, + zif->brslave_info.bridge_ifindex); + + /* associate with vxlan_if */ + zl3vni->local_vtep_ip = vxl->vtep_ip; + zl3vni->vxlan_if = ifp; + + /* Associate with SVI, if any. We can associate with svi-if only + * after association with vxlan_if is complete */ + zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } else { + + /* process if-add for l2-vni */ + struct interface *vlan_if = NULL; + + /* Create or update EVPN hash. */ + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + zevpn = zebra_evpn_add(vni); + + if (zevpn->local_vtep_ip.s_addr != vxl->vtep_ip.s_addr || + zevpn->mcast_grp.s_addr != vxl->mcast_grp.s_addr) { + zebra_vxlan_sg_deref(zevpn->local_vtep_ip, + zevpn->mcast_grp); + zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp); + zevpn->local_vtep_ip = vxl->vtep_ip; + zevpn->mcast_grp = vxl->mcast_grp; + /* on local vtep-ip check if ES orig-ip + * needs to be updated + */ + zebra_evpn_es_set_base_evpn(zevpn); + } + zevpn_vxlan_if_set(zevpn, ifp, true /* set */); + vlan_if = zvni_map_to_svi(vxl->access_vlan, + zif->brslave_info.br_if); + if (vlan_if) { + zevpn->svi_if = vlan_if; + zevpn->vrf_id = vlan_if->vrf->vrf_id; + zl3vni = zl3vni_from_vrf(vlan_if->vrf->vrf_id); + if (zl3vni) + listnode_add_sort_nodup(zl3vni->l2vnis, zevpn); + } + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Add L2-VNI %u VRF %s intf %s(%u) VLAN %u local IP %pI4 mcast_grp %pI4 master %u", + vni, + vlan_if ? vlan_if->vrf->name : VRF_DEFAULT_NAME, + ifp->name, ifp->ifindex, vxl->access_vlan, + &vxl->vtep_ip, &vxl->mcast_grp, + zif->brslave_info.bridge_ifindex); + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return 0; + + /* Inform BGP */ + zebra_evpn_send_add_to_client(zevpn); + + /* Read and populate local MACs and neighbors */ + zebra_evpn_read_mac_neigh(zevpn, ifp); + } + + return 0; +} + +int zebra_vxlan_process_vrf_vni_cmd(struct zebra_vrf *zvrf, vni_t vni, + char *err, int err_str_sz, int filter, + int add) +{ + struct zebra_l3vni *zl3vni = NULL; + struct zebra_vrf *zvrf_evpn = NULL; + + zvrf_evpn = zebra_vrf_get_evpn(); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("vrf %s vni %u %s", zvrf_name(zvrf), vni, + add ? "ADD" : "DEL"); + + if (add) { + /* check if the vni is already present under zvrf */ + if (zvrf->l3vni) { + snprintf(err, err_str_sz, + "VNI is already configured under the vrf"); + return -1; + } + + /* check if this VNI is already present in the system */ + zl3vni = zl3vni_lookup(vni); + if (zl3vni) { + snprintf(err, err_str_sz, + "VNI is already configured as L3-VNI"); + return -1; + } + + /* Remove L2VNI if present */ + zebra_vxlan_handle_vni_transition(zvrf, vni, add); + + /* add the L3-VNI to the global table */ + zl3vni = zl3vni_add(vni, zvrf_id(zvrf)); + + /* associate the vrf with vni */ + zvrf->l3vni = vni; + + /* set the filter in l3vni to denote if we are using l3vni only + * for prefix routes + */ + if (filter) + SET_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY); + + /* associate with vxlan-intf; + * we need to associate with the vxlan-intf first + */ + zl3vni->vxlan_if = zl3vni_map_to_vxlan_if(zl3vni); + + /* associate with corresponding SVI interface, we can associate + * with svi-if only after vxlan interface association is + * complete + */ + zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "%s: l3vni %u svi_if %s mac_vlan_if %s", + __func__, vni, + zl3vni->svi_if ? zl3vni->svi_if->name : "NIL", + zl3vni->mac_vlan_if ? zl3vni->mac_vlan_if->name + : "NIL"); + + /* formulate l2vni list */ + hash_iterate(zvrf_evpn->evpn_table, zevpn_add_to_l3vni_list, + zl3vni); + + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + + } else { + zl3vni = zl3vni_lookup(vni); + if (!zl3vni) { + snprintf(err, err_str_sz, "VNI doesn't exist"); + return -1; + } + + if (zvrf->l3vni != vni) { + snprintf(err, err_str_sz, + "VNI %d doesn't exist in VRF: %s", + vni, zvrf->vrf->name); + return -1; + } + + if (filter && !CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY)) { + snprintf(err, ERR_STR_SZ, + "prefix-routes-only is not set for the vni"); + return -1; + } + + zebra_vxlan_process_l3vni_oper_down(zl3vni); + + /* delete and uninstall all rmacs */ + hash_iterate(zl3vni->rmac_table, zl3vni_del_rmac_hash_entry, + zl3vni); + + /* delete and uninstall all next-hops */ + hash_iterate(zl3vni->nh_table, zl3vni_del_nh_hash_entry, + zl3vni); + + zvrf->l3vni = 0; + zl3vni_del(zl3vni); + + /* Add L2VNI for this VNI */ + zebra_vxlan_handle_vni_transition(zvrf, vni, add); + } + return 0; +} + +int zebra_vxlan_vrf_enable(struct zebra_vrf *zvrf) +{ + struct zebra_l3vni *zl3vni = NULL; + + if (zvrf->l3vni) + zl3vni = zl3vni_lookup(zvrf->l3vni); + if (!zl3vni) + return 0; + + zl3vni->vrf_id = zvrf_id(zvrf); + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + return 0; +} + +int zebra_vxlan_vrf_disable(struct zebra_vrf *zvrf) +{ + struct zebra_l3vni *zl3vni = NULL; + + if (zvrf->l3vni) + zl3vni = zl3vni_lookup(zvrf->l3vni); + if (!zl3vni) + return 0; + + zebra_vxlan_process_l3vni_oper_down(zl3vni); + + /* delete and uninstall all rmacs */ + hash_iterate(zl3vni->rmac_table, zl3vni_del_rmac_hash_entry, zl3vni); + /* delete and uninstall all next-hops */ + hash_iterate(zl3vni->nh_table, zl3vni_del_nh_hash_entry, zl3vni); + + zl3vni->vrf_id = VRF_UNKNOWN; + + return 0; +} + +int zebra_vxlan_vrf_delete(struct zebra_vrf *zvrf) +{ + struct zebra_l3vni *zl3vni = NULL; + vni_t vni; + + if (zvrf->l3vni) + zl3vni = zl3vni_lookup(zvrf->l3vni); + if (!zl3vni) + return 0; + + vni = zl3vni->vni; + zl3vni_del(zl3vni); + zebra_vxlan_handle_vni_transition(zvrf, vni, 0); + + return 0; +} + +/* + * Handle message from client to specify the flooding mechanism for + * BUM packets. The default is to do head-end (ingress) replication + * and the other supported option is to disable it. This applies to + * all BUM traffic and disabling it applies to both the transmit and + * receive direction. + */ +void zebra_vxlan_flood_control(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + enum vxlan_flood_control flood_ctrl; + + if (!EVPN_ENABLED(zvrf)) { + zlog_err("EVPN flood control for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + s = msg; + STREAM_GETC(s, flood_ctrl); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("EVPN flood control %u, currently %u", + flood_ctrl, zvrf->vxlan_flood_ctrl); + + if (zvrf->vxlan_flood_ctrl == flood_ctrl) + return; + + zvrf->vxlan_flood_ctrl = flood_ctrl; + + /* Install or uninstall flood entries corresponding to + * remote VTEPs. + */ + hash_iterate(zvrf->evpn_table, zebra_evpn_handle_flooding_remote_vteps, + zvrf); + +stream_failure: + return; +} + +/* + * Handle message from client to enable/disable advertisement of svi macip + * routes + */ +void zebra_vxlan_advertise_svi_macip(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + int advertise; + vni_t vni = 0; + struct zebra_evpn *zevpn = NULL; + struct interface *ifp = NULL; + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("EVPN SVI-MACIP Adv for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + s = msg; + STREAM_GETC(s, advertise); + STREAM_GETL(s, vni); + + if (!vni) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("EVPN SVI-MACIP Adv %s, currently %s", + advertise ? "enabled" : "disabled", + advertise_svi_macip_enabled(NULL) + ? "enabled" + : "disabled"); + + if (zvrf->advertise_svi_macip == advertise) + return; + + + if (advertise) { + zvrf->advertise_svi_macip = advertise; + hash_iterate(zvrf->evpn_table, + zebra_evpn_gw_macip_add_for_evpn_hash, + NULL); + } else { + hash_iterate(zvrf->evpn_table, + zebra_evpn_svi_macip_del_for_evpn_hash, + NULL); + zvrf->advertise_svi_macip = advertise; + } + + } else { + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + struct interface *vlan_if = NULL; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "EVPN SVI macip Adv %s on VNI %d , currently %s", + advertise ? "enabled" : "disabled", vni, + advertise_svi_macip_enabled(zevpn) + ? "enabled" + : "disabled"); + + if (zevpn->advertise_svi_macip == advertise) + return; + + /* Store flag even though SVI is not present. + * Once SVI comes up triggers self MAC-IP route add. + */ + zevpn->advertise_svi_macip = advertise; + + ifp = zevpn->vxlan_if; + if (!ifp) + return; + + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + zl2_info = zif->l2info.vxl; + vlan_if = zvni_map_to_svi(zl2_info.access_vlan, + zif->brslave_info.br_if); + if (!vlan_if) + return; + + if (advertise) { + /* Add primary SVI MAC-IP */ + zebra_evpn_add_macip_for_intf(vlan_if, zevpn); + } else { + /* Del primary SVI MAC-IP */ + zebra_evpn_del_macip_for_intf(vlan_if, zevpn); + } + } + +stream_failure: + return; +} + +/* + * Handle message from client to enable/disable advertisement of g/w macip + * routes + */ +void zebra_vxlan_advertise_subnet(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + int advertise; + vni_t vni = 0; + struct zebra_evpn *zevpn = NULL; + struct interface *ifp = NULL; + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + struct interface *vlan_if = NULL; + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("EVPN GW-MACIP Adv for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + s = msg; + STREAM_GETC(s, advertise); + STREAM_GET(&vni, s, 3); + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return; + + if (zevpn->advertise_subnet == advertise) + return; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("EVPN subnet Adv %s on VNI %d , currently %s", + advertise ? "enabled" : "disabled", vni, + zevpn->advertise_subnet ? "enabled" : "disabled"); + + + zevpn->advertise_subnet = advertise; + + ifp = zevpn->vxlan_if; + if (!ifp) + return; + + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + zl2_info = zif->l2info.vxl; + + vlan_if = + zvni_map_to_svi(zl2_info.access_vlan, zif->brslave_info.br_if); + if (!vlan_if) + return; + + if (zevpn->advertise_subnet) + zebra_evpn_advertise_subnet(zevpn, vlan_if, 1); + else + zebra_evpn_advertise_subnet(zevpn, vlan_if, 0); + +stream_failure: + return; +} + +/* + * Handle message from client to enable/disable advertisement of g/w macip + * routes + */ +void zebra_vxlan_advertise_gw_macip(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + int advertise; + vni_t vni = 0; + struct zebra_evpn *zevpn = NULL; + struct interface *ifp = NULL; + + if (!EVPN_ENABLED(zvrf)) { + zlog_debug("EVPN GW-MACIP Adv for non-EVPN VRF %u", + zvrf_id(zvrf)); + return; + } + + s = msg; + STREAM_GETC(s, advertise); + STREAM_GETL(s, vni); + + if (!vni) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("EVPN gateway macip Adv %s, currently %s", + advertise ? "enabled" : "disabled", + advertise_gw_macip_enabled(NULL) + ? "enabled" + : "disabled"); + + if (zvrf->advertise_gw_macip == advertise) + return; + + zvrf->advertise_gw_macip = advertise; + + if (advertise_gw_macip_enabled(zevpn)) + hash_iterate(zvrf->evpn_table, + zebra_evpn_gw_macip_add_for_evpn_hash, + NULL); + else + hash_iterate(zvrf->evpn_table, + zebra_evpn_gw_macip_del_for_evpn_hash, + NULL); + + } else { + struct zebra_if *zif = NULL; + struct zebra_l2info_vxlan zl2_info; + struct interface *vlan_if = NULL; + struct interface *vrr_if = NULL; + + zevpn = zebra_evpn_lookup(vni); + if (!zevpn) + return; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "EVPN gateway macip Adv %s on VNI %d , currently %s", + advertise ? "enabled" : "disabled", vni, + advertise_gw_macip_enabled(zevpn) ? "enabled" + : "disabled"); + + if (zevpn->advertise_gw_macip == advertise) + return; + + zevpn->advertise_gw_macip = advertise; + + ifp = zevpn->vxlan_if; + if (!ifp) + return; + + zif = ifp->info; + + /* If down or not mapped to a bridge, we're done. */ + if (!if_is_operative(ifp) || !zif->brslave_info.br_if) + return; + + zl2_info = zif->l2info.vxl; + + vlan_if = zvni_map_to_svi(zl2_info.access_vlan, + zif->brslave_info.br_if); + if (!vlan_if) + return; + + if (advertise_gw_macip_enabled(zevpn)) { + /* Add primary SVI MAC-IP */ + zebra_evpn_add_macip_for_intf(vlan_if, zevpn); + + /* Add VRR MAC-IP - if any*/ + vrr_if = zebra_get_vrr_intf_for_svi(vlan_if); + if (vrr_if) + zebra_evpn_add_macip_for_intf(vrr_if, zevpn); + } else { + /* Del primary MAC-IP */ + zebra_evpn_del_macip_for_intf(vlan_if, zevpn); + + /* Del VRR MAC-IP - if any*/ + vrr_if = zebra_get_vrr_intf_for_svi(vlan_if); + if (vrr_if) + zebra_evpn_del_macip_for_intf(vrr_if, zevpn); + } + } + +stream_failure: + return; +} + +static int macfdb_read_ns(struct ns *ns, + void *_in_param __attribute__((unused)), + void **out_param __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + + macfdb_read(zns); + return NS_WALK_CONTINUE; +} + +static int neigh_read_ns(struct ns *ns, + void *_in_param __attribute__((unused)), + void **out_param __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + + neigh_read(zns); + return NS_WALK_CONTINUE; +} + +/* + * Handle message from client to learn (or stop learning) about VNIs and MACs. + * When enabled, the VNI hash table will be built and MAC FDB table read; + * when disabled, the entries should be deleted and remote VTEPs and MACs + * uninstalled from the kernel. + * This also informs the setting for BUM handling at the time this change + * occurs; it is relevant only when specifying "learn". + */ +void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS) +{ + struct stream *s = NULL; + int advertise = 0; + enum vxlan_flood_control flood_ctrl; + + /* Mismatch between EVPN VRF and current VRF (should be prevented by + * bgpd's cli) */ + if (is_evpn_enabled() && !EVPN_ENABLED(zvrf)) + return; + + s = msg; + STREAM_GETC(s, advertise); + STREAM_GETC(s, flood_ctrl); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("EVPN VRF %s(%u) VNI Adv %s, currently %s, flood control %u", + zvrf_name(zvrf), zvrf_id(zvrf), + advertise ? "enabled" : "disabled", + is_evpn_enabled() ? "enabled" : "disabled", + flood_ctrl); + + if (zvrf->advertise_all_vni == advertise) + return; + + zvrf->advertise_all_vni = advertise; + if (EVPN_ENABLED(zvrf)) { + zrouter.evpn_vrf = zvrf; + + /* Note BUM handling */ + zvrf->vxlan_flood_ctrl = flood_ctrl; + + /* Replay all ESs */ + zebra_evpn_es_send_all_to_client(true /* add */); + + /* Build EVPN hash table and inform BGP. */ + zevpn_build_hash_table(); + + /* Add all SVI (L3 GW) MACs to BGP*/ + hash_iterate(zvrf->evpn_table, + zebra_evpn_gw_macip_add_for_evpn_hash, NULL); + + /* Read the MAC FDB */ + ns_walk_func(macfdb_read_ns, NULL, NULL); + + /* Read neighbors */ + ns_walk_func(neigh_read_ns, NULL, NULL); + } else { + /* Cleanup VTEPs for all EVPNs - uninstall from + * kernel and free entries. + */ + hash_iterate(zvrf->evpn_table, zebra_evpn_vxlan_cleanup_all, + zvrf); + + /* Delete all ESs in BGP */ + zebra_evpn_es_send_all_to_client(false /* add */); + + /* cleanup all l3vnis */ + hash_iterate(zrouter.l3vni_table, zl3vni_cleanup_all, NULL); + + /* Mark as "no EVPN VRF" */ + zrouter.evpn_vrf = NULL; + } + +stream_failure: + return; +} + +/* + * Allocate EVPN hash table for this VRF and do other initialization. + * NOTE: Currently supported only for default VRF. + */ +void zebra_vxlan_init_tables(struct zebra_vrf *zvrf) +{ + char buffer[80]; + + if (!zvrf) + return; + + snprintf(buffer, sizeof(buffer), "Zebra VRF EVPN Table: %s", + zvrf->vrf->name); + zvrf->evpn_table = hash_create_size(8, zebra_evpn_hash_keymake, + zebra_evpn_hash_cmp, buffer); + + snprintf(buffer, sizeof(buffer), "Zebra VxLAN SG Table: %s", + zvrf->vrf->name); + zvrf->vxlan_sg_table = hash_create_size(8, zebra_vxlan_sg_hash_key_make, + zebra_vxlan_sg_hash_eq, buffer); +} + +/* Cleanup EVPN info, but don't free the table. */ +void zebra_vxlan_cleanup_tables(struct zebra_vrf *zvrf) +{ + struct zebra_vrf *evpn_zvrf = zebra_vrf_get_evpn(); + + hash_iterate(zvrf->evpn_table, zebra_evpn_vxlan_cleanup_all, zvrf); + zebra_vxlan_cleanup_sg_table(zvrf); + + if (zvrf == evpn_zvrf) + zebra_evpn_es_cleanup(); +} + +/* Close all EVPN handling */ +void zebra_vxlan_close_tables(struct zebra_vrf *zvrf) +{ + if (!zvrf) + return; + hash_iterate(zvrf->evpn_table, zebra_evpn_vxlan_cleanup_all, zvrf); + hash_free(zvrf->evpn_table); + if (zvrf->vxlan_sg_table) { + zebra_vxlan_cleanup_sg_table(zvrf); + hash_free(zvrf->vxlan_sg_table); + zvrf->vxlan_sg_table = NULL; + } +} + +/* init the l3vni table */ +void zebra_vxlan_init(void) +{ + zrouter.l3vni_table = hash_create(l3vni_hash_keymake, l3vni_hash_cmp, + "Zebra VRF L3 VNI table"); + zrouter.evpn_vrf = NULL; + zebra_evpn_mh_init(); +} + +/* free l3vni table */ +void zebra_vxlan_disable(void) +{ + hash_free(zrouter.l3vni_table); + zebra_evpn_mh_terminate(); +} + +/* get the l3vni svi ifindex */ +ifindex_t get_l3vni_svi_ifindex(vrf_id_t vrf_id) +{ + struct zebra_l3vni *zl3vni = NULL; + + zl3vni = zl3vni_from_vrf(vrf_id); + if (!zl3vni || !is_l3vni_oper_up(zl3vni)) + return 0; + + return zl3vni->svi_if->ifindex; +} + +/************************** vxlan SG cache management ************************/ +/* Inform PIM about the mcast group */ +static int zebra_vxlan_sg_send(struct zebra_vrf *zvrf, + struct prefix_sg *sg, + char *sg_str, uint16_t cmd) +{ + struct zserv *client = NULL; + struct stream *s = NULL; + + client = zserv_find_client(ZEBRA_ROUTE_PIM, 0); + if (!client) + return 0; + + if (!CHECK_FLAG(zvrf->flags, ZEBRA_PIM_SEND_VXLAN_SG)) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, VRF_DEFAULT); + stream_putl(s, IPV4_MAX_BYTELEN); + stream_put(s, &sg->src.s_addr, IPV4_MAX_BYTELEN); + stream_put(s, &sg->grp.s_addr, IPV4_MAX_BYTELEN); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Send %s %s to %s", + (cmd == ZEBRA_VXLAN_SG_ADD) ? "add" : "del", sg_str, + zebra_route_string(client->proto)); + + if (cmd == ZEBRA_VXLAN_SG_ADD) + client->vxlan_sg_add_cnt++; + else + client->vxlan_sg_del_cnt++; + + return zserv_send_message(client, s); +} + +static unsigned int zebra_vxlan_sg_hash_key_make(const void *p) +{ + const struct zebra_vxlan_sg *vxlan_sg = p; + + return (jhash_2words(vxlan_sg->sg.src.s_addr, + vxlan_sg->sg.grp.s_addr, 0)); +} + +static bool zebra_vxlan_sg_hash_eq(const void *p1, const void *p2) +{ + const struct zebra_vxlan_sg *sg1 = p1; + const struct zebra_vxlan_sg *sg2 = p2; + + return ((sg1->sg.src.s_addr == sg2->sg.src.s_addr) + && (sg1->sg.grp.s_addr == sg2->sg.grp.s_addr)); +} + +static struct zebra_vxlan_sg *zebra_vxlan_sg_new(struct zebra_vrf *zvrf, + struct prefix_sg *sg) +{ + struct zebra_vxlan_sg *vxlan_sg; + + vxlan_sg = XCALLOC(MTYPE_ZVXLAN_SG, sizeof(*vxlan_sg)); + + vxlan_sg->zvrf = zvrf; + vxlan_sg->sg = *sg; + prefix_sg2str(sg, vxlan_sg->sg_str); + + vxlan_sg = hash_get(zvrf->vxlan_sg_table, vxlan_sg, hash_alloc_intern); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("vxlan SG %s created", vxlan_sg->sg_str); + + return vxlan_sg; +} + +static struct zebra_vxlan_sg *zebra_vxlan_sg_find(struct zebra_vrf *zvrf, + struct prefix_sg *sg) +{ + struct zebra_vxlan_sg lookup; + + lookup.sg = *sg; + return hash_lookup(zvrf->vxlan_sg_table, &lookup); +} + +static struct zebra_vxlan_sg *zebra_vxlan_sg_add(struct zebra_vrf *zvrf, + struct prefix_sg *sg) +{ + struct zebra_vxlan_sg *vxlan_sg; + struct zebra_vxlan_sg *parent = NULL; + struct in_addr sip; + + vxlan_sg = zebra_vxlan_sg_find(zvrf, sg); + if (vxlan_sg) + return vxlan_sg; + + /* create a *G entry for every BUM group implicitly - + * 1. The SG entry is used by pimd to setup the vxlan-origination-mroute + * 2. the XG entry is used by pimd to setup the + * vxlan-termination-mroute + */ + if (sg->src.s_addr != INADDR_ANY) { + memset(&sip, 0, sizeof(sip)); + parent = zebra_vxlan_sg_do_ref(zvrf, sip, sg->grp); + if (!parent) + return NULL; + } + + vxlan_sg = zebra_vxlan_sg_new(zvrf, sg); + + zebra_vxlan_sg_send(zvrf, sg, vxlan_sg->sg_str, + ZEBRA_VXLAN_SG_ADD); + + return vxlan_sg; +} + +static void zebra_vxlan_sg_del(struct zebra_vxlan_sg *vxlan_sg) +{ + struct in_addr sip; + struct zebra_vrf *zvrf; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + /* On SG entry deletion remove the reference to its parent XG + * entry + */ + if (vxlan_sg->sg.src.s_addr != INADDR_ANY) { + memset(&sip, 0, sizeof(sip)); + zebra_vxlan_sg_do_deref(zvrf, sip, vxlan_sg->sg.grp); + } + + zebra_vxlan_sg_send(zvrf, &vxlan_sg->sg, + vxlan_sg->sg_str, ZEBRA_VXLAN_SG_DEL); + + hash_release(vxlan_sg->zvrf->vxlan_sg_table, vxlan_sg); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VXLAN SG %s deleted", vxlan_sg->sg_str); + + XFREE(MTYPE_ZVXLAN_SG, vxlan_sg); +} + +static void zebra_vxlan_sg_do_deref(struct zebra_vrf *zvrf, + struct in_addr sip, struct in_addr mcast_grp) +{ + struct zebra_vxlan_sg *vxlan_sg; + struct prefix_sg sg; + + sg.family = AF_INET; + sg.prefixlen = IPV4_MAX_BYTELEN; + sg.src = sip; + sg.grp = mcast_grp; + vxlan_sg = zebra_vxlan_sg_find(zvrf, &sg); + if (!vxlan_sg) + return; + + if (vxlan_sg->ref_cnt) + --vxlan_sg->ref_cnt; + + if (!vxlan_sg->ref_cnt) + zebra_vxlan_sg_del(vxlan_sg); +} + +static struct zebra_vxlan_sg *zebra_vxlan_sg_do_ref(struct zebra_vrf *zvrf, + struct in_addr sip, + struct in_addr mcast_grp) +{ + struct zebra_vxlan_sg *vxlan_sg; + struct prefix_sg sg; + + sg.family = AF_INET; + sg.prefixlen = IPV4_MAX_BYTELEN; + sg.src = sip; + sg.grp = mcast_grp; + vxlan_sg = zebra_vxlan_sg_add(zvrf, &sg); + if (vxlan_sg) + ++vxlan_sg->ref_cnt; + + return vxlan_sg; +} + +static void zebra_vxlan_sg_deref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp) +{ + struct zebra_vrf *zvrf; + + if (local_vtep_ip.s_addr == INADDR_ANY + || mcast_grp.s_addr == INADDR_ANY) + return; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + zebra_vxlan_sg_do_deref(zvrf, local_vtep_ip, mcast_grp); +} + +static void zebra_vxlan_sg_ref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp) +{ + struct zebra_vrf *zvrf; + + if (local_vtep_ip.s_addr == INADDR_ANY + || mcast_grp.s_addr == INADDR_ANY) + return; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + zebra_vxlan_sg_do_ref(zvrf, local_vtep_ip, mcast_grp); +} + +static void zebra_vxlan_xg_pre_cleanup(struct hash_bucket *bucket, void *arg) +{ + struct zebra_vxlan_sg *vxlan_sg = (struct zebra_vxlan_sg *)bucket->data; + + /* increment the ref count against (*,G) to prevent them from being + * deleted + */ + if (vxlan_sg->sg.src.s_addr == INADDR_ANY) + ++vxlan_sg->ref_cnt; +} + +static void zebra_vxlan_xg_post_cleanup(struct hash_bucket *bucket, void *arg) +{ + struct zebra_vxlan_sg *vxlan_sg = (struct zebra_vxlan_sg *)bucket->data; + + /* decrement the dummy ref count against (*,G) to delete them */ + if (vxlan_sg->sg.src.s_addr == INADDR_ANY) { + if (vxlan_sg->ref_cnt) + --vxlan_sg->ref_cnt; + if (!vxlan_sg->ref_cnt) + zebra_vxlan_sg_del(vxlan_sg); + } +} + +static void zebra_vxlan_sg_cleanup(struct hash_bucket *bucket, void *arg) +{ + struct zebra_vxlan_sg *vxlan_sg = (struct zebra_vxlan_sg *)bucket->data; + + zebra_vxlan_sg_del(vxlan_sg); +} + +static void zebra_vxlan_cleanup_sg_table(struct zebra_vrf *zvrf) +{ + /* increment the ref count against (*,G) to prevent them from being + * deleted + */ + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_xg_pre_cleanup, NULL); + + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL); + + /* decrement the dummy ref count against the XG entries */ + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_xg_post_cleanup, NULL); +} + +static void zebra_vxlan_sg_replay_send(struct hash_bucket *bucket, void *arg) +{ + struct zebra_vxlan_sg *vxlan_sg = (struct zebra_vxlan_sg *)bucket->data; + + zebra_vxlan_sg_send(vxlan_sg->zvrf, &vxlan_sg->sg, + vxlan_sg->sg_str, ZEBRA_VXLAN_SG_ADD); +} + +/* Handle message from client to replay vxlan SG entries */ +void zebra_vxlan_sg_replay(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VxLAN SG updates to PIM, start"); + + SET_FLAG(zvrf->flags, ZEBRA_PIM_SEND_VXLAN_SG); + + if (!EVPN_ENABLED(zvrf)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VxLAN SG replay request on unexpected vrf %d", + zvrf->vrf->vrf_id); + return; + } + + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_replay_send, NULL); +} + + +/* Cleanup EVPN configuration of a specific VRF */ +static void zebra_evpn_vrf_cfg_cleanup(struct zebra_vrf *zvrf) +{ + struct zebra_l3vni *zl3vni = NULL; + + zvrf->advertise_all_vni = 0; + zvrf->advertise_gw_macip = 0; + zvrf->advertise_svi_macip = 0; + zvrf->vxlan_flood_ctrl = VXLAN_FLOOD_HEAD_END_REPL; + + hash_iterate(zvrf->evpn_table, zebra_evpn_cfg_cleanup, NULL); + + if (zvrf->l3vni) + zl3vni = zl3vni_lookup(zvrf->l3vni); + if (zl3vni) { + /* delete and uninstall all rmacs */ + hash_iterate(zl3vni->rmac_table, zl3vni_del_rmac_hash_entry, + zl3vni); + /* delete and uninstall all next-hops */ + hash_iterate(zl3vni->nh_table, zl3vni_del_nh_hash_entry, + zl3vni); + } +} + +/* Cleanup BGP EVPN configuration upon client disconnect */ +static int zebra_evpn_bgp_cfg_clean_up(struct zserv *client) +{ + struct vrf *vrf; + struct zebra_vrf *zvrf; + + RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { + zvrf = vrf->info; + if (zvrf) + zebra_evpn_vrf_cfg_cleanup(zvrf); + } + + return 0; +} + +static int zebra_evpn_pim_cfg_clean_up(struct zserv *client) +{ + struct zebra_vrf *zvrf = zebra_vrf_get_evpn(); + + if (CHECK_FLAG(zvrf->flags, ZEBRA_PIM_SEND_VXLAN_SG)) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VxLAN SG updates to PIM, stop"); + UNSET_FLAG(zvrf->flags, ZEBRA_PIM_SEND_VXLAN_SG); + } + + return 0; +} + +static int zebra_evpn_cfg_clean_up(struct zserv *client) +{ + if (client->proto == ZEBRA_ROUTE_BGP) + return zebra_evpn_bgp_cfg_clean_up(client); + + if (client->proto == ZEBRA_ROUTE_PIM) + return zebra_evpn_pim_cfg_clean_up(client); + + return 0; +} + +/* + * Handle results for vxlan dataplane operations. + */ +extern void zebra_vxlan_handle_result(struct zebra_dplane_ctx *ctx) +{ + return; +} + +/* Cleanup BGP EVPN configuration upon client disconnect */ +extern void zebra_evpn_init(void) +{ + hook_register(zserv_client_close, zebra_evpn_cfg_clean_up); +} diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h new file mode 100644 index 0000000..757c65d --- /dev/null +++ b/zebra/zebra_vxlan.h @@ -0,0 +1,232 @@ +/* + * Zebra VxLAN (EVPN) Data structures and definitions + * These are public definitions referenced by other files. + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_VXLAN_H +#define _ZEBRA_VXLAN_H + +#include <zebra.h> +#include <zebra/zebra_router.h> + +#include "linklist.h" +#include "if.h" +#include "vlan.h" +#include "vxlan.h" + +#include "lib/json.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zserv.h" +#include "zebra/zebra_dplane.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Is EVPN enabled? */ +#define EVPN_ENABLED(zvrf) (zvrf)->advertise_all_vni +static inline int is_evpn_enabled(void) +{ + return EVPN_ENABLED(zebra_vrf_get_evpn()); +} + +static inline int +is_vxlan_flooding_head_end(void) +{ + struct zebra_vrf *zvrf = zebra_vrf_get_evpn(); + return (zvrf->vxlan_flood_ctrl == VXLAN_FLOOD_HEAD_END_REPL); +} + +/* VxLAN interface change flags of interest. */ +#define ZEBRA_VXLIF_LOCAL_IP_CHANGE (1 << 0) +#define ZEBRA_VXLIF_MASTER_CHANGE (1 << 1) +#define ZEBRA_VXLIF_VLAN_CHANGE (1 << 2) +#define ZEBRA_VXLIF_MCAST_GRP_CHANGE (1 << 3) +#define ZEBRA_VXLIF_MASTER_MAC_CHANGE (1 << 4) + + +#define VNI_STR_LEN 32 + +/* ZAPI message handlers */ +extern void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_remote_macip_del(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_remote_vtep_add_zapi(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_remote_vtep_del_zapi(ZAPI_HANDLER_ARGS); +void zebra_vxlan_remote_vtep_add(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip, int flood_control); +extern void zebra_vxlan_remote_vtep_del(vrf_id_t vrf_id, vni_t vni, + struct in_addr vtep_ip); +extern void zebra_vxlan_flood_control(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_advertise_subnet(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_advertise_svi_macip(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_advertise_gw_macip(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_dup_addr_detection(ZAPI_HANDLER_ARGS); +extern void zebra_vxlan_sg_replay(ZAPI_HANDLER_ARGS); + +extern int is_l3vni_for_prefix_routes_only(vni_t vni); +extern ifindex_t get_l3vni_svi_ifindex(vrf_id_t vrf_id); +extern int zebra_vxlan_vrf_delete(struct zebra_vrf *zvrf); +extern int zebra_vxlan_vrf_enable(struct zebra_vrf *zvrf); +extern int zebra_vxlan_vrf_disable(struct zebra_vrf *zvrf); +extern int zebra_vxlan_vrf_delete(struct zebra_vrf *zvrf); +extern void zebra_vxlan_print_specific_nh_l3vni(struct vty *vty, vni_t l3vni, + struct ipaddr *ip, bool uj); +extern void zebra_vxlan_print_evpn(struct vty *vty, bool uj); +extern void zebra_vxlan_print_specific_rmac_l3vni(struct vty *vty, vni_t l3vni, + struct ethaddr *rmac, + bool use_json); +extern void zebra_vxlan_print_macs_vni(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, bool use_json); +extern void zebra_vxlan_print_macs_all_vni(struct vty *vty, + struct zebra_vrf *zvrf, + bool print_dup, + bool use_json); +extern void zebra_vxlan_print_macs_all_vni_detail(struct vty *vty, + struct zebra_vrf *zvrf, + bool print_dup, + bool use_json); +extern void zebra_vxlan_print_macs_all_vni_vtep(struct vty *vty, + struct zebra_vrf *zvrf, + struct in_addr vtep_ip, + bool use_json); +extern void zebra_vxlan_print_specific_mac_vni(struct vty *vty, + struct zebra_vrf *zvrf, + vni_t vni, struct ethaddr *mac, + bool use_json); +extern void zebra_vxlan_print_macs_vni_vtep(struct vty *vty, + struct zebra_vrf *zvrf, vni_t vni, + struct in_addr vtep_ip, + bool use_json); +extern void zebra_vxlan_print_macs_vni_dad(struct vty *vty, + struct zebra_vrf *zvrf, vni_t vni, + bool use_json); +extern void zebra_vxlan_print_neigh_vni(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, bool use_json); +extern void zebra_vxlan_print_neigh_all_vni(struct vty *vty, + struct zebra_vrf *zvrf, + bool print_dup, + bool use_json); +extern void zebra_vxlan_print_neigh_all_vni_detail(struct vty *vty, + struct zebra_vrf *zvrf, + bool print_dup, + bool use_json); +extern void zebra_vxlan_print_specific_neigh_vni(struct vty *vty, + struct zebra_vrf *zvrf, + vni_t vni, struct ipaddr *ip, + bool use_json); +extern void zebra_vxlan_print_neigh_vni_vtep(struct vty *vty, + struct zebra_vrf *zvrf, vni_t vni, + struct in_addr vtep_ip, + bool use_json); +extern void zebra_vxlan_print_neigh_vni_dad(struct vty *vty, + struct zebra_vrf *zvrf, vni_t vni, + bool use_json); +extern void zebra_vxlan_print_vni(struct vty *vty, struct zebra_vrf *zvrf, + vni_t vni, bool use_json, + json_object *json_array); +extern void zebra_vxlan_print_vnis(struct vty *vty, struct zebra_vrf *zvrf, + bool use_json); +extern void zebra_vxlan_print_vnis_detail(struct vty *vty, + struct zebra_vrf *zvrf, + bool use_json); +extern void zebra_vxlan_print_rmacs_l3vni(struct vty *vty, vni_t vni, + bool use_json); +extern void zebra_vxlan_print_rmacs_all_l3vni(struct vty *vty, bool use_json); +extern void zebra_vxlan_print_nh_l3vni(struct vty *vty, vni_t vni, + bool use_json); +extern void zebra_vxlan_print_nh_all_l3vni(struct vty *vty, bool use_json); +extern void zebra_vxlan_print_l3vni(struct vty *vty, vni_t vni, bool use_json); +extern void zebra_vxlan_print_vrf_vni(struct vty *vty, struct zebra_vrf *zvrf, + json_object *json_vrfs); +extern int zebra_vxlan_add_del_gw_macip(struct interface *ifp, + const struct prefix *p, int add); +extern int zebra_vxlan_svi_up(struct interface *ifp, struct interface *link_if); +extern int zebra_vxlan_svi_down(struct interface *ifp, + struct interface *link_if); +extern int zebra_vxlan_handle_kernel_neigh_update( + struct interface *ifp, struct interface *link_if, struct ipaddr *ip, + struct ethaddr *macaddr, uint16_t state, bool is_ext, + bool is_router, bool local_inactive, bool dp_static); +extern int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp, + struct interface *link_if, + struct ipaddr *ip); +extern int zebra_vxlan_local_mac_add_update(struct interface *ifp, + struct interface *br_if, + struct ethaddr *mac, vlanid_t vid, + bool sticky, bool local_inactive, + bool dp_static); +extern int zebra_vxlan_local_mac_del(struct interface *ifp, + struct interface *br_if, + struct ethaddr *mac, vlanid_t vid); +extern int zebra_vxlan_check_readd_vtep(struct interface *ifp, + struct in_addr vtep_ip); +extern int zebra_vxlan_if_up(struct interface *ifp); +extern int zebra_vxlan_if_down(struct interface *ifp); +extern int zebra_vxlan_if_add(struct interface *ifp); +extern int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags); +extern int zebra_vxlan_if_del(struct interface *ifp); +extern int zebra_vxlan_process_vrf_vni_cmd(struct zebra_vrf *zvrf, vni_t vni, + char *err, int err_str_sz, + int filter, int add); +extern void zebra_vxlan_init_tables(struct zebra_vrf *zvrf); +extern void zebra_vxlan_close_tables(struct zebra_vrf *); +extern void zebra_vxlan_cleanup_tables(struct zebra_vrf *); +extern void zebra_vxlan_init(void); +extern void zebra_vxlan_disable(void); +extern void zebra_vxlan_evpn_vrf_route_add(vrf_id_t vrf_id, + const struct ethaddr *rmac, + const struct ipaddr *ip, + const struct prefix *host_prefix); +extern void zebra_vxlan_evpn_vrf_route_del(vrf_id_t vrf_id, + struct ipaddr *vtep_ip, + struct prefix *host_prefix); +extern int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, + vni_t vni, + struct ethaddr *macaddr, + char *errmsg, + size_t errmsg_len); +extern int zebra_vxlan_clear_dup_detect_vni_ip(struct zebra_vrf *zvrf, + vni_t vni, struct ipaddr *ip, + char *errmsg, size_t errmsg_len); +extern int zebra_vxlan_clear_dup_detect_vni_all(struct zebra_vrf *zvrf); +extern int zebra_vxlan_clear_dup_detect_vni(struct zebra_vrf *zvrf, vni_t vni); +extern void zebra_vxlan_handle_result(struct zebra_dplane_ctx *ctx); + +extern void zebra_evpn_init(void); +extern void zebra_vxlan_macvlan_up(struct interface *ifp); +extern void zebra_vxlan_macvlan_down(struct interface *ifp); +extern int vni_list_cmp(void *p1, void *p2); +extern int zebra_vxlan_dp_network_mac_add(struct interface *ifp, + struct interface *br_if, + struct ethaddr *macaddr, vlanid_t vid, + uint32_t nhg_id, bool sticky, + bool dp_static); +extern int zebra_vxlan_dp_network_mac_del(struct interface *ifp, + struct interface *br_if, + struct ethaddr *macaddr, + vlanid_t vid); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_VXLAN_H */ diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h new file mode 100644 index 0000000..fb17dac --- /dev/null +++ b/zebra/zebra_vxlan_private.h @@ -0,0 +1,264 @@ +/* + * Zebra VxLAN (EVPN) Data structures and definitions + * These are "internal" to this function. + * Copyright (C) 2016, 2017 Cumulus Networks, Inc. + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_VXLAN_PRIVATE_H +#define _ZEBRA_VXLAN_PRIVATE_H + +#include <zebra.h> + +#include "if.h" +#include "linklist.h" +#include "zebra_vxlan.h" +#include "zebra_evpn.h" +#include "zebra_evpn_mac.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ERR_STR_SZ 256 + +/* L3 VNI hash table */ +struct zebra_l3vni { + + /* VNI key */ + vni_t vni; + + /* vrf_id */ + vrf_id_t vrf_id; + + uint32_t filter; +#define PREFIX_ROUTES_ONLY (1 << 0) /* l3-vni used for prefix routes only */ + + /* Local IP */ + struct in_addr local_vtep_ip; + + /* kernel interface for l3vni */ + struct interface *vxlan_if; + + /* SVI interface corresponding to the l3vni */ + struct interface *svi_if; + + struct interface *mac_vlan_if; + + /* list of L2 VNIs associated with the L3 VNI */ + struct list *l2vnis; + + /* list of remote router-macs */ + struct hash *rmac_table; + + /* list of remote vtep-ip neigh */ + struct hash *nh_table; +}; + +/* get the vx-intf name for l3vni */ +static inline const char *zl3vni_vxlan_if_name(struct zebra_l3vni *zl3vni) +{ + return zl3vni->vxlan_if ? zl3vni->vxlan_if->name : "None"; +} + +/* get the svi intf name for l3vni */ +static inline const char *zl3vni_svi_if_name(struct zebra_l3vni *zl3vni) +{ + return zl3vni->svi_if ? zl3vni->svi_if->name : "None"; +} + +/* get the vrf name for l3vni */ +static inline const char *zl3vni_vrf_name(struct zebra_l3vni *zl3vni) +{ + return vrf_id_to_name(zl3vni->vrf_id); +} + +/* get the rmac string */ +static inline const char *zl3vni_rmac2str(struct zebra_l3vni *zl3vni, char *buf, + int size) +{ + char *ptr; + + if (!buf) + ptr = XMALLOC(MTYPE_TMP, ETHER_ADDR_STRLEN * sizeof(char)); + else { + assert(size >= ETHER_ADDR_STRLEN); + ptr = buf; + } + + if (zl3vni->mac_vlan_if) + snprintf(ptr, (ETHER_ADDR_STRLEN), + "%02x:%02x:%02x:%02x:%02x:%02x", + (uint8_t)zl3vni->mac_vlan_if->hw_addr[0], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[1], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[2], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[3], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[4], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[5]); + else if (zl3vni->svi_if) + snprintf(ptr, (ETHER_ADDR_STRLEN), + "%02x:%02x:%02x:%02x:%02x:%02x", + (uint8_t)zl3vni->svi_if->hw_addr[0], + (uint8_t)zl3vni->svi_if->hw_addr[1], + (uint8_t)zl3vni->svi_if->hw_addr[2], + (uint8_t)zl3vni->svi_if->hw_addr[3], + (uint8_t)zl3vni->svi_if->hw_addr[4], + (uint8_t)zl3vni->svi_if->hw_addr[5]); + else + snprintf(ptr, ETHER_ADDR_STRLEN, "None"); + + return ptr; +} + +/* get the sys mac string */ +static inline const char *zl3vni_sysmac2str(struct zebra_l3vni *zl3vni, + char *buf, int size) +{ + char *ptr; + + if (!buf) + ptr = XMALLOC(MTYPE_TMP, ETHER_ADDR_STRLEN * sizeof(char)); + else { + assert(size >= ETHER_ADDR_STRLEN); + ptr = buf; + } + + if (zl3vni->svi_if) + snprintf(ptr, (ETHER_ADDR_STRLEN), + "%02x:%02x:%02x:%02x:%02x:%02x", + (uint8_t)zl3vni->svi_if->hw_addr[0], + (uint8_t)zl3vni->svi_if->hw_addr[1], + (uint8_t)zl3vni->svi_if->hw_addr[2], + (uint8_t)zl3vni->svi_if->hw_addr[3], + (uint8_t)zl3vni->svi_if->hw_addr[4], + (uint8_t)zl3vni->svi_if->hw_addr[5]); + else + snprintf(ptr, ETHER_ADDR_STRLEN, "None"); + + return ptr; +} + +/* + * l3-vni is oper up when: + * 0. if EVPN is enabled (advertise-all-vni cfged) + * 1. it is associated to a vxlan-intf + * 2. Associated vxlan-intf is oper up + * 3. it is associated to an SVI + * 4. associated SVI is oper up + */ +static inline int is_l3vni_oper_up(struct zebra_l3vni *zl3vni) +{ + return (is_evpn_enabled() && zl3vni && (zl3vni->vrf_id != VRF_UNKNOWN) + && zl3vni->vxlan_if && if_is_operative(zl3vni->vxlan_if) + && zl3vni->svi_if && if_is_operative(zl3vni->svi_if)); +} + +static inline const char *zl3vni_state2str(struct zebra_l3vni *zl3vni) +{ + if (!zl3vni) + return NULL; + + if (is_l3vni_oper_up(zl3vni)) + return "Up"; + else + return "Down"; + + return NULL; +} + +static inline vrf_id_t zl3vni_vrf_id(struct zebra_l3vni *zl3vni) +{ + return zl3vni->vrf_id; +} + +static inline void zl3vni_get_svi_rmac(struct zebra_l3vni *zl3vni, + struct ethaddr *rmac) +{ + if (!zl3vni) + return; + + if (!is_l3vni_oper_up(zl3vni)) + return; + + if (zl3vni->svi_if && if_is_operative(zl3vni->svi_if)) + memcpy(rmac->octet, zl3vni->svi_if->hw_addr, ETH_ALEN); +} + + +/* context for neigh hash walk - update l3vni and rmac */ +struct neigh_l3info_walk_ctx { + + struct zebra_evpn *zevpn; + struct zebra_l3vni *zl3vni; + int add; +}; + +struct nh_walk_ctx { + + struct vty *vty; + struct json_object *json; +}; + +extern struct zebra_l3vni *zl3vni_from_vrf(vrf_id_t vrf_id); +extern struct interface *zl3vni_map_to_vxlan_if(struct zebra_l3vni *zl3vni); +extern struct interface *zl3vni_map_to_svi_if(struct zebra_l3vni *zl3vni); +extern struct interface *zl3vni_map_to_mac_vlan_if(struct zebra_l3vni *zl3vni); +extern struct zebra_l3vni *zl3vni_lookup(vni_t vni); +extern vni_t vni_id_from_svi(struct interface *ifp, struct interface *br_if); + +DECLARE_HOOK(zebra_rmac_update, + (struct zebra_mac * rmac, struct zebra_l3vni *zl3vni, bool delete, + const char *reason), + (rmac, zl3vni, delete, reason)); + + +#ifdef __cplusplus +} +#endif + +/* + * Multicast hash table. + * + * This table contains - + * 1. The (S, G) entries used for encapsulating and forwarding BUM traffic. + * S is the local VTEP-IP and G is a BUM mcast group address. + * 2. The (X, G) entries used for terminating a BUM flow. + * Multiple L2-VNIs can share the same MDT hence the need to maintain + * an aggregated table that pimd can consume without much + * re-interpretation. + */ +struct zebra_vxlan_sg { + struct zebra_vrf *zvrf; + + struct prefix_sg sg; + char sg_str[PREFIX_SG_STR_LEN]; + + /* For SG - num of L2 VNIs using this entry for sending BUM traffic */ + /* For XG - num of SG using this as parent */ + uint32_t ref_cnt; +}; + +extern struct zebra_evpn *zevpn_lookup(vni_t vni); +extern void zebra_vxlan_sync_mac_dp_install(struct zebra_mac *mac, + bool set_inactive, + bool force_clear_static, + const char *caller); +extern bool zebra_evpn_do_dup_addr_detect(struct zebra_vrf *zvrf); + +#endif /* _ZEBRA_VXLAN_PRIVATE_H */ diff --git a/zebra/zserv.c b/zebra/zserv.c new file mode 100644 index 0000000..a99dce0 --- /dev/null +++ b/zebra/zserv.c @@ -0,0 +1,1324 @@ +/* + * Zebra API server. + * Portions: + * Copyright (C) 1997-1999 Kunihiro Ishiguro + * Copyright (C) 2015-2018 Cumulus Networks, Inc. + * et al. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +/* clang-format off */ +#include <errno.h> /* for errno */ +#include <netinet/in.h> /* for sockaddr_in */ +#include <stdint.h> /* for uint8_t */ +#include <stdio.h> /* for snprintf */ +#include <sys/socket.h> /* for sockaddr_storage, AF_UNIX, accept... */ +#include <sys/stat.h> /* for umask, mode_t */ +#include <sys/un.h> /* for sockaddr_un */ +#include <time.h> /* for NULL, tm, gmtime, time_t */ +#include <unistd.h> /* for close, unlink, ssize_t */ + +#include "lib/buffer.h" /* for BUFFER_EMPTY, BUFFER_ERROR, BUFFE... */ +#include "lib/command.h" /* for vty, install_element, CMD_SUCCESS... */ +#include "lib/hook.h" /* for DEFINE_HOOK, DEFINE_KOOH, hook_call */ +#include "lib/linklist.h" /* for ALL_LIST_ELEMENTS_RO, ALL_LIST_EL... */ +#include "lib/libfrr.h" /* for frr_zclient_addr */ +#include "lib/log.h" /* for zlog_warn, zlog_debug, safe_strerror */ +#include "lib/memory.h" /* for MTYPE_TMP, XCALLOC, XFREE */ +#include "lib/monotime.h" /* for monotime, ONE_DAY_SECOND, ONE_WEE... */ +#include "lib/network.h" /* for set_nonblocking */ +#include "lib/privs.h" /* for zebra_privs_t, ZPRIVS_LOWER, ZPRI... */ +#include "lib/route_types.h" /* for ZEBRA_ROUTE_MAX */ +#include "lib/sockopt.h" /* for setsockopt_so_recvbuf, setsockopt... */ +#include "lib/sockunion.h" /* for sockopt_reuseaddr, sockopt_reuseport */ +#include "lib/stream.h" /* for STREAM_SIZE, stream (ptr only), ... */ +#include "lib/thread.h" /* for thread (ptr only), THREAD_ARG, ... */ +#include "lib/vrf.h" /* for vrf_info_lookup, VRF_DEFAULT */ +#include "lib/vty.h" /* for vty_out, vty (ptr only) */ +#include "lib/zclient.h" /* for zmsghdr, ZEBRA_HEADER_SIZE, ZEBRA... */ +#include "lib/frr_pthread.h" /* for frr_pthread_new, frr_pthread_stop... */ +#include "lib/frratomic.h" /* for atomic_load_explicit, atomic_stor... */ +#include "lib/lib_errors.h" /* for generic ferr ids */ +#include "lib/printfrr.h" /* for string functions */ + +#include "zebra/debug.h" /* for various debugging macros */ +#include "zebra/rib.h" /* for rib_score_proto */ +#include "zebra/zapi_msg.h" /* for zserv_handle_commands */ +#include "zebra/zebra_vrf.h" /* for zebra_vrf_lookup_by_id, zvrf */ +#include "zebra/zserv.h" /* for zserv */ +#include "zebra/zebra_router.h" +#include "zebra/zebra_errors.h" /* for error messages */ +/* clang-format on */ + +/* privileges */ +extern struct zebra_privs_t zserv_privs; + +/* The listener socket for clients connecting to us */ +static int zsock; + +/* The lock that protects access to zapi client objects */ +static pthread_mutex_t client_mutex; + +static struct zserv *find_client_internal(uint8_t proto, + unsigned short instance, + uint32_t session_id); + + +/* + * Client thread events. + * + * These are used almost exclusively by client threads to drive their own event + * loops. The only exception is in zserv_client_create(), which pushes an + * initial ZSERV_CLIENT_READ event to start the API handler loop. + */ +enum zserv_client_event { + /* Schedule a socket read */ + ZSERV_CLIENT_READ, + /* Schedule a buffer write */ + ZSERV_CLIENT_WRITE, +}; + +/* + * Main thread events. + * + * These are used by client threads to notify the main thread about various + * events and to make processing requests. + */ +enum zserv_event { + /* Schedule listen job on Zebra API socket */ + ZSERV_ACCEPT, + /* The calling client has packets on its input buffer */ + ZSERV_PROCESS_MESSAGES, + /* The calling client wishes to be killed */ + ZSERV_HANDLE_CLIENT_FAIL, +}; + +/* + * Zebra server event driver for all client threads. + * + * This is essentially a wrapper around thread_add_event() that centralizes + * those scheduling calls into one place. + * + * All calls to this function schedule an event on the pthread running the + * provided client. + * + * client + * the client in question, and thread target + * + * event + * the event to notify them about + */ +static void zserv_client_event(struct zserv *client, + enum zserv_client_event event); + +/* + * Zebra server event driver for the main thread. + * + * This is essentially a wrapper around thread_add_event() that centralizes + * those scheduling calls into one place. + * + * All calls to this function schedule an event on Zebra's main pthread. + * + * client + * the client in question + * + * event + * the event to notify the main thread about + */ +static void zserv_event(struct zserv *client, enum zserv_event event); + + +/* Client thread lifecycle -------------------------------------------------- */ + +/* + * Log zapi message to zlog. + * + * errmsg (optional) + * Debugging message + * + * msg + * The message + * + * hdr (optional) + * The message header + */ +void zserv_log_message(const char *errmsg, struct stream *msg, + struct zmsghdr *hdr) +{ + zlog_debug("Rx'd ZAPI message"); + if (errmsg) + zlog_debug("%s", errmsg); + if (hdr) { + zlog_debug(" Length: %d", hdr->length); + zlog_debug("Command: %s", zserv_command_string(hdr->command)); + zlog_debug(" VRF: %u", hdr->vrf_id); + } + stream_hexdump(msg); +} + +/* + * Gracefuly shut down a client connection. + * + * Cancel any pending tasks for the client's thread. Then schedule a task on + * the main thread to shut down the calling thread. + * + * It is not safe to close the client socket in this function. The socket is + * owned by the main thread. + * + * Must be called from the client pthread, never the main thread. + */ +static void zserv_client_fail(struct zserv *client) +{ + flog_warn(EC_ZEBRA_CLIENT_IO_ERROR, + "Client '%s' encountered an error and is shutting down.", + zebra_route_string(client->proto)); + + atomic_store_explicit(&client->pthread->running, false, + memory_order_relaxed); + + THREAD_OFF(client->t_read); + THREAD_OFF(client->t_write); + zserv_event(client, ZSERV_HANDLE_CLIENT_FAIL); +} + +/* + * Write all pending messages to client socket. + * + * This function first attempts to flush any buffered data. If unsuccessful, + * the function reschedules itself and returns. If successful, it pops all + * available messages from the output queue and continues to write data + * directly to the socket until the socket would block. If the socket never + * blocks and all data is written, the function returns without rescheduling + * itself. If the socket ends up throwing EWOULDBLOCK, the remaining data is + * buffered and the function reschedules itself. + * + * The utility of the buffer is that it allows us to vastly reduce lock + * contention by allowing us to pop *all* messages off the output queue at once + * instead of locking and unlocking each time we want to pop a single message + * off the queue. The same thing could arguably be accomplished faster by + * allowing the main thread to write directly into the buffer instead of + * enqueuing packets onto an intermediary queue, but the intermediary queue + * allows us to expose information about input and output queues to the user in + * terms of number of packets rather than size of data. + */ +static void zserv_write(struct thread *thread) +{ + struct zserv *client = THREAD_ARG(thread); + struct stream *msg; + uint32_t wcmd = 0; + struct stream_fifo *cache; + uint64_t time_now = monotime(NULL); + + /* If we have any data pending, try to flush it first */ + switch (buffer_flush_all(client->wb, client->sock)) { + case BUFFER_ERROR: + goto zwrite_fail; + case BUFFER_PENDING: + frr_with_mutex (&client->stats_mtx) { + client->last_write_time = time_now; + } + zserv_client_event(client, ZSERV_CLIENT_WRITE); + return; + case BUFFER_EMPTY: + break; + } + + cache = stream_fifo_new(); + + frr_with_mutex (&client->obuf_mtx) { + while (stream_fifo_head(client->obuf_fifo)) + stream_fifo_push(cache, + stream_fifo_pop(client->obuf_fifo)); + } + + if (cache->tail) { + msg = cache->tail; + stream_set_getp(msg, 0); + wcmd = stream_getw_from(msg, ZAPI_HEADER_CMD_LOCATION); + } + + while (stream_fifo_head(cache)) { + msg = stream_fifo_pop(cache); + buffer_put(client->wb, STREAM_DATA(msg), stream_get_endp(msg)); + stream_free(msg); + } + + stream_fifo_free(cache); + + /* If we have any data pending, try to flush it first */ + switch (buffer_flush_all(client->wb, client->sock)) { + case BUFFER_ERROR: + goto zwrite_fail; + case BUFFER_PENDING: + frr_with_mutex (&client->stats_mtx) { + client->last_write_time = time_now; + } + zserv_client_event(client, ZSERV_CLIENT_WRITE); + return; + case BUFFER_EMPTY: + break; + } + + frr_with_mutex (&client->stats_mtx) { + client->last_write_cmd = wcmd; + client->last_write_time = time_now; + } + return; + +zwrite_fail: + flog_warn(EC_ZEBRA_CLIENT_WRITE_FAILED, + "%s: could not write to %s [fd = %d], closing.", __func__, + zebra_route_string(client->proto), client->sock); + zserv_client_fail(client); +} + +/* + * Read and process data from a client socket. + * + * The responsibilities here are to read raw data from the client socket, + * validate the header, encapsulate it into a single stream object, push it + * onto the input queue and then notify the main thread that there is new data + * available. + * + * This function first looks for any data in the client structure's working + * input buffer. If data is present, it is assumed that reading stopped in a + * previous invocation of this task and needs to be resumed to finish a message. + * Otherwise, the socket data stream is assumed to be at the beginning of a new + * ZAPI message (specifically at the header). The header is read and validated. + * If the header passed validation then the length field found in the header is + * used to compute the total length of the message. That much data is read (but + * not inspected), appended to the header, placed into a stream and pushed onto + * the client's input queue. A task is then scheduled on the main thread to + * process the client's input queue. Finally, if all of this was successful, + * this task reschedules itself. + * + * Any failure in any of these actions is handled by terminating the client. + */ +static void zserv_read(struct thread *thread) +{ + struct zserv *client = THREAD_ARG(thread); + int sock; + size_t already; + struct stream_fifo *cache; + uint32_t p2p_orig; + + uint32_t p2p; + struct zmsghdr hdr; + + p2p_orig = atomic_load_explicit(&zrouter.packets_to_process, + memory_order_relaxed); + cache = stream_fifo_new(); + p2p = p2p_orig; + sock = THREAD_FD(thread); + + while (p2p) { + ssize_t nb; + bool hdrvalid; + char errmsg[256]; + + already = stream_get_endp(client->ibuf_work); + + /* Read length and command (if we don't have it already). */ + if (already < ZEBRA_HEADER_SIZE) { + nb = stream_read_try(client->ibuf_work, sock, + ZEBRA_HEADER_SIZE - already); + if ((nb == 0 || nb == -1)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("connection closed socket [%d]", + sock); + goto zread_fail; + } + if (nb != (ssize_t)(ZEBRA_HEADER_SIZE - already)) { + /* Try again later. */ + break; + } + already = ZEBRA_HEADER_SIZE; + } + + /* Reset to read from the beginning of the incoming packet. */ + stream_set_getp(client->ibuf_work, 0); + + /* Fetch header values */ + hdrvalid = zapi_parse_header(client->ibuf_work, &hdr); + + if (!hdrvalid) { + snprintf(errmsg, sizeof(errmsg), + "%s: Message has corrupt header", __func__); + zserv_log_message(errmsg, client->ibuf_work, NULL); + goto zread_fail; + } + + /* Validate header */ + if (hdr.marker != ZEBRA_HEADER_MARKER + || hdr.version != ZSERV_VERSION) { + snprintf( + errmsg, sizeof(errmsg), + "Message has corrupt header\n%s: socket %d version mismatch, marker %d, version %d", + __func__, sock, hdr.marker, hdr.version); + zserv_log_message(errmsg, client->ibuf_work, &hdr); + goto zread_fail; + } + if (hdr.length < ZEBRA_HEADER_SIZE) { + snprintf( + errmsg, sizeof(errmsg), + "Message has corrupt header\n%s: socket %d message length %u is less than header size %d", + __func__, sock, hdr.length, ZEBRA_HEADER_SIZE); + zserv_log_message(errmsg, client->ibuf_work, &hdr); + goto zread_fail; + } + if (hdr.length > STREAM_SIZE(client->ibuf_work)) { + snprintf( + errmsg, sizeof(errmsg), + "Message has corrupt header\n%s: socket %d message length %u exceeds buffer size %lu", + __func__, sock, hdr.length, + (unsigned long)STREAM_SIZE(client->ibuf_work)); + zserv_log_message(errmsg, client->ibuf_work, &hdr); + goto zread_fail; + } + + /* Read rest of data. */ + if (already < hdr.length) { + nb = stream_read_try(client->ibuf_work, sock, + hdr.length - already); + if ((nb == 0 || nb == -1)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "connection closed [%d] when reading zebra data", + sock); + goto zread_fail; + } + if (nb != (ssize_t)(hdr.length - already)) { + /* Try again later. */ + break; + } + } + + /* Debug packet information. */ + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("zebra message[%s:%u:%u] comes from socket [%d]", + zserv_command_string(hdr.command), + hdr.vrf_id, hdr.length, + sock); + + stream_set_getp(client->ibuf_work, 0); + struct stream *msg = stream_dup(client->ibuf_work); + + stream_fifo_push(cache, msg); + stream_reset(client->ibuf_work); + p2p--; + } + + if (p2p < p2p_orig) { + uint64_t time_now = monotime(NULL); + + /* update session statistics */ + frr_with_mutex (&client->stats_mtx) { + client->last_read_time = time_now; + client->last_read_cmd = hdr.command; + } + + /* publish read packets on client's input queue */ + frr_with_mutex (&client->ibuf_mtx) { + while (cache->head) + stream_fifo_push(client->ibuf_fifo, + stream_fifo_pop(cache)); + } + + /* Schedule job to process those packets */ + zserv_event(client, ZSERV_PROCESS_MESSAGES); + + } + + if (IS_ZEBRA_DEBUG_PACKET) + zlog_debug("Read %d packets from client: %s", p2p_orig - p2p, + zebra_route_string(client->proto)); + + /* Reschedule ourselves */ + zserv_client_event(client, ZSERV_CLIENT_READ); + + stream_fifo_free(cache); + + return; + +zread_fail: + stream_fifo_free(cache); + zserv_client_fail(client); +} + +static void zserv_client_event(struct zserv *client, + enum zserv_client_event event) +{ + switch (event) { + case ZSERV_CLIENT_READ: + thread_add_read(client->pthread->master, zserv_read, client, + client->sock, &client->t_read); + break; + case ZSERV_CLIENT_WRITE: + thread_add_write(client->pthread->master, zserv_write, client, + client->sock, &client->t_write); + break; + } +} + +/* Main thread lifecycle ---------------------------------------------------- */ + +/* + * Read and process messages from a client. + * + * This task runs on the main pthread. It is scheduled by client pthreads when + * they have new messages available on their input queues. The client is passed + * as the task argument. + * + * Each message is popped off the client's input queue and the action associated + * with the message is executed. This proceeds until there are no more messages, + * an error occurs, or the processing limit is reached. + * + * The client's I/O thread can push at most zrouter.packets_to_process messages + * onto the input buffer before notifying us there are packets to read. As long + * as we always process zrouter.packets_to_process messages here, then we can + * rely on the read thread to handle queuing this task enough times to process + * everything on the input queue. + */ +static void zserv_process_messages(struct thread *thread) +{ + struct zserv *client = THREAD_ARG(thread); + struct stream *msg; + struct stream_fifo *cache = stream_fifo_new(); + uint32_t p2p = zrouter.packets_to_process; + bool need_resched = false; + + frr_with_mutex (&client->ibuf_mtx) { + uint32_t i; + for (i = 0; i < p2p && stream_fifo_head(client->ibuf_fifo); + ++i) { + msg = stream_fifo_pop(client->ibuf_fifo); + stream_fifo_push(cache, msg); + } + + msg = NULL; + + /* Need to reschedule processing work if there are still + * packets in the fifo. + */ + if (stream_fifo_head(client->ibuf_fifo)) + need_resched = true; + } + + /* Process the batch of messages */ + if (stream_fifo_head(cache)) + zserv_handle_commands(client, cache); + + stream_fifo_free(cache); + + /* Reschedule ourselves if necessary */ + if (need_resched) + zserv_event(client, ZSERV_PROCESS_MESSAGES); +} + +int zserv_send_message(struct zserv *client, struct stream *msg) +{ + frr_with_mutex (&client->obuf_mtx) { + stream_fifo_push(client->obuf_fifo, msg); + } + + zserv_client_event(client, ZSERV_CLIENT_WRITE); + + return 0; +} + +/* + * Send a batch of messages to a connected Zebra API client. + */ +int zserv_send_batch(struct zserv *client, struct stream_fifo *fifo) +{ + struct stream *msg; + + frr_with_mutex (&client->obuf_mtx) { + msg = stream_fifo_pop(fifo); + while (msg) { + stream_fifo_push(client->obuf_fifo, msg); + msg = stream_fifo_pop(fifo); + } + } + + zserv_client_event(client, ZSERV_CLIENT_WRITE); + + return 0; +} + +/* Hooks for client connect / disconnect */ +DEFINE_HOOK(zserv_client_connect, (struct zserv *client), (client)); +DEFINE_KOOH(zserv_client_close, (struct zserv *client), (client)); + +/* + * Deinitialize zebra client. + * + * - Deregister and deinitialize related internal resources + * - Gracefuly close socket + * - Free associated resources + * - Free client structure + * + * This does *not* take any action on the struct thread * fields. These are + * managed by the owning pthread and any tasks associated with them must have + * been stopped prior to invoking this function. + */ +static void zserv_client_free(struct zserv *client) +{ + if (client == NULL) + return; + + hook_call(zserv_client_close, client); + + /* Close file descriptor. */ + if (client->sock) { + unsigned long nroutes; + unsigned long nnhgs; + + close(client->sock); + + if (DYNAMIC_CLIENT_GR_DISABLED(client)) { + zebra_mpls_client_cleanup_vrf_label(client->proto); + + nroutes = rib_score_proto(client->proto, + client->instance); + zlog_notice( + "client %d disconnected %lu %s routes removed from the rib", + client->sock, nroutes, + zebra_route_string(client->proto)); + + /* Not worrying about instance for now */ + nnhgs = zebra_nhg_score_proto(client->proto); + zlog_notice( + "client %d disconnected %lu %s nhgs removed from the rib", + client->sock, nnhgs, + zebra_route_string(client->proto)); + } + client->sock = -1; + } + + /* Free stream buffers. */ + if (client->ibuf_work) + stream_free(client->ibuf_work); + if (client->obuf_work) + stream_free(client->obuf_work); + if (client->ibuf_fifo) + stream_fifo_free(client->ibuf_fifo); + if (client->obuf_fifo) + stream_fifo_free(client->obuf_fifo); + if (client->wb) + buffer_free(client->wb); + + /* Free buffer mutexes */ + pthread_mutex_destroy(&client->stats_mtx); + pthread_mutex_destroy(&client->obuf_mtx); + pthread_mutex_destroy(&client->ibuf_mtx); + + /* Free bitmaps. */ + for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) { + for (int i = 0; i < ZEBRA_ROUTE_MAX; i++) { + vrf_bitmap_free(client->redist[afi][i]); + redist_del_all_instances(&client->mi_redist[afi][i]); + } + + vrf_bitmap_free(client->redist_default[afi]); + vrf_bitmap_free(client->ridinfo[afi]); + vrf_bitmap_free(client->nhrp_neighinfo[afi]); + } + + /* + * If any instance are graceful restart enabled, + * client is not deleted + */ + if (DYNAMIC_CLIENT_GR_DISABLED(client)) { + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: Deleting client %s", __func__, + zebra_route_string(client->proto)); + XFREE(MTYPE_TMP, client); + } else { + /* Handle cases where client has GR instance. */ + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: client %s restart enabled", __func__, + zebra_route_string(client->proto)); + if (zebra_gr_client_disconnect(client) < 0) + zlog_err( + "%s: GR enabled but could not handle disconnect event", + __func__); + } +} + +void zserv_close_client(struct zserv *client) +{ + bool free_p = true; + + if (client->pthread) { + /* synchronously stop and join pthread */ + frr_pthread_stop(client->pthread, NULL); + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("Closing client '%s'", + zebra_route_string(client->proto)); + + thread_cancel_event(zrouter.master, client); + THREAD_OFF(client->t_cleanup); + THREAD_OFF(client->t_process); + + /* destroy pthread */ + frr_pthread_destroy(client->pthread); + client->pthread = NULL; + } + + /* + * Final check in case the client struct is in use in another + * pthread: if not in-use, continue and free the client + */ + frr_with_mutex (&client_mutex) { + if (client->busy_count <= 0) { + /* remove from client list */ + listnode_delete(zrouter.client_list, client); + } else { + /* + * The client session object may be in use, although + * the associated pthread is gone. Defer final + * cleanup. + */ + client->is_closed = true; + free_p = false; + } + } + + /* delete client */ + if (free_p) + zserv_client_free(client); +} + +/* + * This task is scheduled by a ZAPI client pthread on the main pthread when it + * wants to stop itself. When this executes, the client connection should + * already have been closed and the thread will most likely have died, but its + * resources still need to be cleaned up. + */ +static void zserv_handle_client_fail(struct thread *thread) +{ + struct zserv *client = THREAD_ARG(thread); + + zserv_close_client(client); +} + +/* + * Create a new client. + * + * This is called when a new connection is accept()'d on the ZAPI socket. It + * initializes new client structure, notifies any subscribers of the connection + * event and spawns the client's thread. + * + * sock + * client's socket file descriptor + */ +static struct zserv *zserv_client_create(int sock) +{ + struct zserv *client; + size_t stream_size = + MAX(ZEBRA_MAX_PACKET_SIZ, sizeof(struct zapi_route)); + int i; + afi_t afi; + + client = XCALLOC(MTYPE_TMP, sizeof(struct zserv)); + + /* Make client input/output buffer. */ + client->sock = sock; + client->ibuf_fifo = stream_fifo_new(); + client->obuf_fifo = stream_fifo_new(); + client->ibuf_work = stream_new(stream_size); + client->obuf_work = stream_new(stream_size); + client->connect_time = monotime(NULL); + pthread_mutex_init(&client->ibuf_mtx, NULL); + pthread_mutex_init(&client->obuf_mtx, NULL); + pthread_mutex_init(&client->stats_mtx, NULL); + client->wb = buffer_new(0); + TAILQ_INIT(&(client->gr_info_queue)); + + /* Initialize flags */ + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) + client->redist[afi][i] = vrf_bitmap_init(); + client->redist_default[afi] = vrf_bitmap_init(); + client->ridinfo[afi] = vrf_bitmap_init(); + client->nhrp_neighinfo[afi] = vrf_bitmap_init(); + } + + /* Add this client to linked list. */ + frr_with_mutex (&client_mutex) { + listnode_add(zrouter.client_list, client); + } + + struct frr_pthread_attr zclient_pthr_attrs = { + .start = frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop + }; + client->pthread = + frr_pthread_new(&zclient_pthr_attrs, "Zebra API client thread", + "zebra_apic"); + + /* start read loop */ + zserv_client_event(client, ZSERV_CLIENT_READ); + + /* call callbacks */ + hook_call(zserv_client_connect, client); + + /* start pthread */ + frr_pthread_run(client->pthread, NULL); + + return client; +} + +/* + * Retrieve a client object by the complete tuple of + * {protocol, instance, session}. This version supports use + * from a different pthread: the object will be returned marked + * in-use. The caller *must* release the client object with the + * release_client() api, to ensure that the in-use marker is cleared properly. + */ +struct zserv *zserv_acquire_client(uint8_t proto, unsigned short instance, + uint32_t session_id) +{ + struct zserv *client = NULL; + + frr_with_mutex (&client_mutex) { + client = find_client_internal(proto, instance, session_id); + if (client) { + /* Don't return a dead/closed client object */ + if (client->is_closed) + client = NULL; + else + client->busy_count++; + } + } + + return client; +} + +/* + * Release a client object that was acquired with the acquire_client() api. + * After this has been called, the caller must not use the client pointer - + * it may be freed if the client has closed. + */ +void zserv_release_client(struct zserv *client) +{ + /* + * Once we've decremented the client object's refcount, it's possible + * for it to be deleted as soon as we release the lock, so we won't + * touch the object again. + */ + frr_with_mutex (&client_mutex) { + client->busy_count--; + + if (client->busy_count <= 0) { + /* + * No more users of the client object. If the client + * session is closed, schedule cleanup on the zebra + * main pthread. + */ + if (client->is_closed) + thread_add_event(zrouter.master, + zserv_handle_client_fail, + client, 0, &client->t_cleanup); + } + } + + /* + * Cleanup must take place on the zebra main pthread, so we've + * scheduled an event. + */ +} + +/* + * Accept socket connection. + */ +static void zserv_accept(struct thread *thread) +{ + int accept_sock; + int client_sock; + struct sockaddr_in client; + socklen_t len; + + accept_sock = THREAD_FD(thread); + + /* Reregister myself. */ + zserv_event(NULL, ZSERV_ACCEPT); + + len = sizeof(struct sockaddr_in); + client_sock = accept(accept_sock, (struct sockaddr *)&client, &len); + + if (client_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "Can't accept zebra socket: %s", + safe_strerror(errno)); + return; + } + + /* Make client socket non-blocking. */ + set_nonblocking(client_sock); + + /* Create new zebra client. */ + zserv_client_create(client_sock); +} + +void zserv_close(void) +{ + /* + * On shutdown, let's close the socket down + * so that long running processes of killing the + * routing table doesn't leave us in a bad + * state where a client tries to reconnect + */ + close(zsock); + zsock = -1; + + /* Free client list's mutex */ + pthread_mutex_destroy(&client_mutex); +} + +void zserv_start(char *path) +{ + int ret; + mode_t old_mask; + struct sockaddr_storage sa; + socklen_t sa_len; + + if (!frr_zclient_addr(&sa, &sa_len, path)) + /* should be caught in zebra main() */ + return; + + /* Set umask */ + old_mask = umask(0077); + + /* Make UNIX domain socket. */ + zsock = socket(sa.ss_family, SOCK_STREAM, 0); + if (zsock < 0) { + flog_err_sys(EC_LIB_SOCKET, "Can't create zserv socket: %s", + safe_strerror(errno)); + return; + } + + if (sa.ss_family != AF_UNIX) { + sockopt_reuseaddr(zsock); + sockopt_reuseport(zsock); + } else { + struct sockaddr_un *suna = (struct sockaddr_un *)&sa; + if (suna->sun_path[0]) + unlink(suna->sun_path); + } + + setsockopt_so_recvbuf(zsock, 1048576); + setsockopt_so_sendbuf(zsock, 1048576); + + frr_with_privs((sa.ss_family != AF_UNIX) ? &zserv_privs : NULL) { + ret = bind(zsock, (struct sockaddr *)&sa, sa_len); + } + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, "Can't bind zserv socket on %s: %s", + path, safe_strerror(errno)); + close(zsock); + zsock = -1; + return; + } + + ret = listen(zsock, 5); + if (ret < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't listen to zserv socket %s: %s", path, + safe_strerror(errno)); + close(zsock); + zsock = -1; + return; + } + + umask(old_mask); + + zserv_event(NULL, ZSERV_ACCEPT); +} + +void zserv_event(struct zserv *client, enum zserv_event event) +{ + switch (event) { + case ZSERV_ACCEPT: + thread_add_read(zrouter.master, zserv_accept, NULL, zsock, + NULL); + break; + case ZSERV_PROCESS_MESSAGES: + thread_add_event(zrouter.master, zserv_process_messages, client, + 0, &client->t_process); + break; + case ZSERV_HANDLE_CLIENT_FAIL: + thread_add_event(zrouter.master, zserv_handle_client_fail, + client, 0, &client->t_cleanup); + } +} + + +/* General purpose ---------------------------------------------------------- */ + +#define ZEBRA_TIME_BUF 32 +static char *zserv_time_buf(time_t *time1, char *buf, int buflen) +{ + time_t now; + + assert(buf != NULL); + assert(buflen >= ZEBRA_TIME_BUF); + assert(time1 != NULL); + + if (!*time1) { + snprintf(buf, buflen, "never "); + return (buf); + } + + now = monotime(NULL); + now -= *time1; + + frrtime_to_interval(now, buf, buflen); + + return buf; +} + +/* Display client info details */ +static void zebra_show_client_detail(struct vty *vty, struct zserv *client) +{ + char cbuf[ZEBRA_TIME_BUF], rbuf[ZEBRA_TIME_BUF]; + char wbuf[ZEBRA_TIME_BUF], nhbuf[ZEBRA_TIME_BUF], mbuf[ZEBRA_TIME_BUF]; + time_t connect_time, last_read_time, last_write_time; + uint32_t last_read_cmd, last_write_cmd; + + vty_out(vty, "Client: %s", zebra_route_string(client->proto)); + if (client->instance) + vty_out(vty, " Instance: %u", client->instance); + if (client->session_id) + vty_out(vty, " [%u]", client->session_id); + vty_out(vty, "\n"); + + vty_out(vty, "------------------------ \n"); + vty_out(vty, "FD: %d \n", client->sock); + + frr_with_mutex (&client->stats_mtx) { + connect_time = client->connect_time; + last_read_time = client->last_read_time; + last_write_time = client->last_write_time; + + last_read_cmd = client->last_read_cmd; + last_write_cmd = client->last_write_cmd; + } + + vty_out(vty, "Connect Time: %s \n", + zserv_time_buf(&connect_time, cbuf, ZEBRA_TIME_BUF)); + if (client->nh_reg_time) { + vty_out(vty, "Nexthop Registry Time: %s \n", + zserv_time_buf(&client->nh_reg_time, nhbuf, + ZEBRA_TIME_BUF)); + if (client->nh_last_upd_time) + vty_out(vty, "Nexthop Last Update Time: %s \n", + zserv_time_buf(&client->nh_last_upd_time, mbuf, + ZEBRA_TIME_BUF)); + else + vty_out(vty, "No Nexthop Update sent\n"); + } else + vty_out(vty, "Not registered for Nexthop Updates\n"); + + vty_out(vty, "Client will %sbe notified about it's routes status\n", + client->notify_owner ? "" : "Not "); + + vty_out(vty, "Last Msg Rx Time: %s \n", + zserv_time_buf(&last_read_time, rbuf, ZEBRA_TIME_BUF)); + vty_out(vty, "Last Msg Tx Time: %s \n", + zserv_time_buf(&last_write_time, wbuf, ZEBRA_TIME_BUF)); + if (last_read_cmd) + vty_out(vty, "Last Rcvd Cmd: %s \n", + zserv_command_string(last_read_cmd)); + if (last_write_cmd) + vty_out(vty, "Last Sent Cmd: %s \n", + zserv_command_string(last_write_cmd)); + vty_out(vty, "\n"); + + vty_out(vty, "Type Add Update Del \n"); + vty_out(vty, "================================================== \n"); + vty_out(vty, "IPv4 %-12u%-12u%-12u\n", client->v4_route_add_cnt, + client->v4_route_upd8_cnt, client->v4_route_del_cnt); + vty_out(vty, "IPv6 %-12u%-12u%-12u\n", client->v6_route_add_cnt, + client->v6_route_upd8_cnt, client->v6_route_del_cnt); + vty_out(vty, "Redist:v4 %-12u%-12u%-12u\n", client->redist_v4_add_cnt, + 0, client->redist_v4_del_cnt); + vty_out(vty, "Redist:v6 %-12u%-12u%-12u\n", client->redist_v6_add_cnt, + 0, client->redist_v6_del_cnt); + vty_out(vty, "VRF %-12u%-12u%-12u\n", client->vrfadd_cnt, 0, + client->vrfdel_cnt); + vty_out(vty, "Connected %-12u%-12u%-12u\n", client->ifadd_cnt, 0, + client->ifdel_cnt); + vty_out(vty, "Interface %-12u%-12u%-12u\n", client->ifup_cnt, 0, + client->ifdown_cnt); + vty_out(vty, "Intf Addr %-12u%-12u%-12u\n", + client->connected_rt_add_cnt, 0, client->connected_rt_del_cnt); + vty_out(vty, "BFD peer %-12u%-12u%-12u\n", client->bfd_peer_add_cnt, + client->bfd_peer_upd8_cnt, client->bfd_peer_del_cnt); + vty_out(vty, "NHT v4 %-12u%-12u%-12u\n", + client->v4_nh_watch_add_cnt, 0, client->v4_nh_watch_rem_cnt); + vty_out(vty, "NHT v6 %-12u%-12u%-12u\n", + client->v6_nh_watch_add_cnt, 0, client->v6_nh_watch_rem_cnt); + vty_out(vty, "VxLAN SG %-12u%-12u%-12u\n", client->vxlan_sg_add_cnt, + 0, client->vxlan_sg_del_cnt); + vty_out(vty, "VNI %-12u%-12u%-12u\n", client->vniadd_cnt, 0, + client->vnidel_cnt); + vty_out(vty, "L3-VNI %-12u%-12u%-12u\n", client->l3vniadd_cnt, 0, + client->l3vnidel_cnt); + vty_out(vty, "MAC-IP %-12u%-12u%-12u\n", client->macipadd_cnt, 0, + client->macipdel_cnt); + vty_out(vty, "ES %-12u%-12u%-12u\n", client->local_es_add_cnt, + 0, client->local_es_del_cnt); + vty_out(vty, "ES-EVI %-12u%-12u%-12u\n", + client->local_es_evi_add_cnt, 0, client->local_es_evi_del_cnt); + vty_out(vty, "Errors: %u\n", client->error_cnt); + +#if defined DEV_BUILD + vty_out(vty, "Input Fifo: %zu:%zu Output Fifo: %zu:%zu\n", + client->ibuf_fifo->count, client->ibuf_fifo->max_count, + client->obuf_fifo->count, client->obuf_fifo->max_count); +#endif + vty_out(vty, "\n"); +} + +/* Display stale client information */ +static void zebra_show_stale_client_detail(struct vty *vty, + struct zserv *client) +{ + char buf[PREFIX2STR_BUFFER]; + time_t uptime; + struct client_gr_info *info = NULL; + struct zserv *s = NULL; + bool first_p = true; + + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (first_p) { + vty_out(vty, "Stale Client Information\n"); + vty_out(vty, "------------------------\n"); + + if (client->instance) + vty_out(vty, " Instance: %u", client->instance); + if (client->session_id) + vty_out(vty, " [%u]", client->session_id); + + first_p = false; + } + + vty_out(vty, "VRF : %s\n", vrf_id_to_name(info->vrf_id)); + vty_out(vty, "Capabilities : "); + switch (info->capabilities) { + case ZEBRA_CLIENT_GR_CAPABILITIES: + vty_out(vty, "Graceful Restart(%u seconds)\n", + info->stale_removal_time); + break; + case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE: + case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING: + case ZEBRA_CLIENT_GR_DISABLE: + case ZEBRA_CLIENT_RIB_STALE_TIME: + vty_out(vty, "None\n"); + break; + } + + if (ZEBRA_CLIENT_GR_ENABLED(info->capabilities)) { + if (info->stale_client_ptr) { + s = (struct zserv *)(info->stale_client_ptr); + uptime = monotime(NULL); + uptime -= s->restart_time; + + frrtime_to_interval(uptime, buf, sizeof(buf)); + + vty_out(vty, "Last restart time : %s ago\n", + buf); + + vty_out(vty, "Stalepath removal time: %d sec\n", + info->stale_removal_time); + if (info->t_stale_removal) { + vty_out(vty, + "Stale delete timer: %ld sec\n", + thread_timer_remain_second( + info->t_stale_removal)); + } + } + vty_out(vty, "Current AFI : %d\n", info->current_afi); + if (info->current_prefix) + vty_out(vty, "Current prefix : %pFX\n", + info->current_prefix); + } + } + vty_out(vty, "\n"); + return; +} + +static void zebra_show_client_brief(struct vty *vty, struct zserv *client) +{ + char client_string[80]; + char cbuf[ZEBRA_TIME_BUF], rbuf[ZEBRA_TIME_BUF]; + char wbuf[ZEBRA_TIME_BUF]; + time_t connect_time, last_read_time, last_write_time; + + frr_with_mutex (&client->stats_mtx) { + connect_time = client->connect_time; + last_read_time = client->last_read_time; + last_write_time = client->last_write_time; + } + + if (client->instance || client->session_id) + snprintfrr(client_string, sizeof(client_string), "%s[%u:%u]", + zebra_route_string(client->proto), client->instance, + client->session_id); + else + snprintfrr(client_string, sizeof(client_string), "%s", + zebra_route_string(client->proto)); + + vty_out(vty, "%-10s%12s %12s%12s %10d/%-10d %10d/%-10d\n", + client_string, + zserv_time_buf(&connect_time, cbuf, ZEBRA_TIME_BUF), + zserv_time_buf(&last_read_time, rbuf, ZEBRA_TIME_BUF), + zserv_time_buf(&last_write_time, wbuf, ZEBRA_TIME_BUF), + client->v4_route_add_cnt + client->v4_route_upd8_cnt, + client->v4_route_del_cnt, + client->v6_route_add_cnt + client->v6_route_upd8_cnt, + client->v6_route_del_cnt); +} + +/* + * Common logic that searches the client list for a zapi client; this + * MUST be called holding the client list mutex. + */ +static struct zserv *find_client_internal(uint8_t proto, + unsigned short instance, + uint32_t session_id) +{ + struct listnode *node, *nnode; + struct zserv *client = NULL; + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (client->proto == proto && client->instance == instance && + client->session_id == session_id) + break; + } + + return client; +} + +/* + * Public api that searches for a client session; this version is + * used from the zebra main pthread. + */ +struct zserv *zserv_find_client(uint8_t proto, unsigned short instance) +{ + struct zserv *client; + + frr_with_mutex (&client_mutex) { + client = find_client_internal(proto, instance, 0); + } + + return client; +} + +/* + * Retrieve a client by its protocol, instance number, and session id. + */ +struct zserv *zserv_find_client_session(uint8_t proto, unsigned short instance, + uint32_t session_id) +{ + struct zserv *client; + + frr_with_mutex (&client_mutex) { + client = find_client_internal(proto, instance, session_id); + } + + return client; + +} + +/* This command is for debugging purpose. */ +DEFUN (show_zebra_client, + show_zebra_client_cmd, + "show zebra client", + SHOW_STR + ZEBRA_STR + "Client information\n") +{ + struct listnode *node; + struct zserv *client; + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + zebra_show_client_detail(vty, client); + /* Show GR info if present */ + zebra_show_stale_client_detail(vty, client); + } + + return CMD_SUCCESS; +} + +/* This command is for debugging purpose. */ +DEFUN (show_zebra_client_summary, + show_zebra_client_summary_cmd, + "show zebra client summary", + SHOW_STR + ZEBRA_STR + "Client information brief\n" + "Brief Summary\n") +{ + struct listnode *node; + struct zserv *client; + + vty_out(vty, + "Name Connect Time Last Read Last Write IPv4 Routes IPv6 Routes\n"); + vty_out(vty, + "------------------------------------------------------------------------------------------\n"); + + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) + zebra_show_client_brief(vty, client); + + vty_out(vty, "Routes column shows (added+updated)/deleted\n"); + return CMD_SUCCESS; +} + +static int zserv_client_close_cb(struct zserv *closed_client) +{ + struct listnode *node, *nnode; + struct zserv *client = NULL; + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (client->proto == closed_client->proto) + continue; + + zsend_client_close_notify(client, closed_client); + } + + return 0; +} + +void zserv_init(void) +{ + /* Client list init. */ + zrouter.client_list = list_new(); + zrouter.stale_client_list = list_new(); + + /* Misc init. */ + zsock = -1; + pthread_mutex_init(&client_mutex, NULL); + + install_element(ENABLE_NODE, &show_zebra_client_cmd); + install_element(ENABLE_NODE, &show_zebra_client_summary_cmd); + + hook_register(zserv_client_close, zserv_client_close_cb); +} diff --git a/zebra/zserv.h b/zebra/zserv.h new file mode 100644 index 0000000..36030d9 --- /dev/null +++ b/zebra/zserv.h @@ -0,0 +1,403 @@ +/* + * Zebra API server. + * Portions: + * Copyright (C) 1997-1999 Kunihiro Ishiguro + * Copyright (C) 2015-2018 Cumulus Networks, Inc. + * et al. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _ZEBRA_ZSERV_H +#define _ZEBRA_ZSERV_H + +/* clang-format off */ +#include <stdint.h> /* for uint32_t, uint8_t */ +#include <time.h> /* for time_t */ + +#include "lib/route_types.h" /* for ZEBRA_ROUTE_MAX */ +#include "lib/zebra.h" /* for AFI_MAX */ +#include "lib/vrf.h" /* for vrf_bitmap_t */ +#include "lib/zclient.h" /* for redist_proto */ +#include "lib/stream.h" /* for stream, stream_fifo */ +#include "lib/thread.h" /* for thread, thread_master */ +#include "lib/linklist.h" /* for list */ +#include "lib/workqueue.h" /* for work_queue */ +#include "lib/hook.h" /* for DECLARE_HOOK, DECLARE_KOOH */ +/* clang-format on */ + +#ifdef __cplusplus +extern "C" { +#endif + +struct zebra_vrf; + +/* Default port information. */ +#define ZEBRA_VTY_PORT 2601 + +/* Default configuration filename. */ +#define DEFAULT_CONFIG_FILE "zebra.conf" + +#define ZEBRA_RMAP_DEFAULT_UPDATE_TIMER 5 /* disabled by default */ + + +/* Stale route marker timer */ +#define ZEBRA_DEFAULT_STALE_UPDATE_DELAY 1 + +/* Count of stale routes processed in timer context */ +#define ZEBRA_MAX_STALE_ROUTE_COUNT 50000 + +/* Graceful Restart information */ +struct client_gr_info { + /* VRF for which GR enabled */ + vrf_id_t vrf_id; + + /* AFI */ + afi_t current_afi; + + /* Stale time and GR cap */ + uint32_t stale_removal_time; + enum zserv_client_capabilities capabilities; + + /* GR commands */ + bool do_delete; + bool gr_enable; + bool stale_client; + + /* Route sync and enable flags for AFI/SAFI */ + bool af_enabled[AFI_MAX][SAFI_MAX]; + bool route_sync[AFI_MAX][SAFI_MAX]; + + /* Book keeping */ + struct prefix *current_prefix; + void *stale_client_ptr; + struct thread *t_stale_removal; + + TAILQ_ENTRY(client_gr_info) gr_info; +}; + +/* Client structure. */ +struct zserv { + /* Client pthread */ + struct frr_pthread *pthread; + + /* Client file descriptor. */ + int sock; + + /* Attributes used to permit access to zapi clients from + * other pthreads: the client has a busy counter, and a + * 'closed' flag. These attributes are managed using a + * lock, via the acquire_client() and release_client() apis. + */ + int busy_count; + bool is_closed; + + /* Input/output buffer to the client. */ + pthread_mutex_t ibuf_mtx; + struct stream_fifo *ibuf_fifo; + pthread_mutex_t obuf_mtx; + struct stream_fifo *obuf_fifo; + + /* Private I/O buffers */ + struct stream *ibuf_work; + struct stream *obuf_work; + + /* Buffer of data waiting to be written to client. */ + struct buffer *wb; + + /* Threads for read/write. */ + struct thread *t_read; + struct thread *t_write; + + /* Event for message processing, for the main pthread */ + struct thread *t_process; + + /* Event for the main pthread */ + struct thread *t_cleanup; + + /* This client's redistribute flag. */ + struct redist_proto mi_redist[AFI_MAX][ZEBRA_ROUTE_MAX]; + vrf_bitmap_t redist[AFI_MAX][ZEBRA_ROUTE_MAX]; + + /* Redistribute default route flag. */ + vrf_bitmap_t redist_default[AFI_MAX]; + + /* Router-id information. */ + vrf_bitmap_t ridinfo[AFI_MAX]; + + /* Router-id information. */ + vrf_bitmap_t nhrp_neighinfo[AFI_MAX]; + + bool notify_owner; + + /* Indicates if client is synchronous. */ + bool synchronous; + + /* client's protocol and session info */ + uint8_t proto; + uint16_t instance; + uint32_t session_id; + + /* + * Interested for MLAG Updates, and also stores the client + * interested message mask + */ + bool mlag_updates_interested; + uint32_t mlag_reg_mask1; + + /* Statistics */ + uint32_t redist_v4_add_cnt; + uint32_t redist_v4_del_cnt; + uint32_t redist_v6_add_cnt; + uint32_t redist_v6_del_cnt; + uint32_t v4_route_add_cnt; + uint32_t v4_route_upd8_cnt; + uint32_t v4_route_del_cnt; + uint32_t v6_route_add_cnt; + uint32_t v6_route_del_cnt; + uint32_t v6_route_upd8_cnt; + uint32_t connected_rt_add_cnt; + uint32_t connected_rt_del_cnt; + uint32_t ifup_cnt; + uint32_t ifdown_cnt; + uint32_t ifadd_cnt; + uint32_t ifdel_cnt; + uint32_t if_bfd_cnt; + uint32_t bfd_peer_add_cnt; + uint32_t bfd_peer_upd8_cnt; + uint32_t bfd_peer_del_cnt; + uint32_t bfd_peer_replay_cnt; + uint32_t vrfadd_cnt; + uint32_t vrfdel_cnt; + uint32_t if_vrfchg_cnt; + uint32_t bfd_client_reg_cnt; + uint32_t vniadd_cnt; + uint32_t vnidel_cnt; + uint32_t l3vniadd_cnt; + uint32_t l3vnidel_cnt; + uint32_t macipadd_cnt; + uint32_t macipdel_cnt; + uint32_t prefixadd_cnt; + uint32_t prefixdel_cnt; + uint32_t v4_nh_watch_add_cnt; + uint32_t v4_nh_watch_rem_cnt; + uint32_t v6_nh_watch_add_cnt; + uint32_t v6_nh_watch_rem_cnt; + uint32_t vxlan_sg_add_cnt; + uint32_t vxlan_sg_del_cnt; + uint32_t local_es_add_cnt; + uint32_t local_es_del_cnt; + uint32_t local_es_evi_add_cnt; + uint32_t local_es_evi_del_cnt; + uint32_t error_cnt; + + time_t nh_reg_time; + time_t nh_dereg_time; + time_t nh_last_upd_time; + + /* + * Session information. + * + * These are not synchronous with respect to each other. For instance, + * last_read_cmd may contain a value that has been read in the future + * relative to last_read_time. + */ + + pthread_mutex_t stats_mtx; + /* BEGIN covered by stats_mtx */ + + /* monotime of client creation */ + uint64_t connect_time; + /* monotime of last message received */ + uint64_t last_read_time; + /* monotime of last message sent */ + uint64_t last_write_time; + /* command code of last message read */ + uint64_t last_read_cmd; + /* command code of last message written */ + uint64_t last_write_cmd; + + /* END covered by stats_mtx */ + + /* + * Number of instances configured with + * graceful restart + */ + uint32_t gr_instance_count; + time_t restart_time; + + /* + * Graceful restart information for + * each instance + */ + TAILQ_HEAD(info_list, client_gr_info) gr_info_queue; +}; + +#define ZAPI_HANDLER_ARGS \ + struct zserv *client, struct zmsghdr *hdr, struct stream *msg, \ + struct zebra_vrf *zvrf + +/* Hooks for client connect / disconnect */ +DECLARE_HOOK(zserv_client_connect, (struct zserv *client), (client)); +DECLARE_KOOH(zserv_client_close, (struct zserv *client), (client)); + +#define DYNAMIC_CLIENT_GR_DISABLED(_client) \ + ((_client->proto <= ZEBRA_ROUTE_CONNECT) \ + || !(_client->gr_instance_count)) + +/* + * Initialize Zebra API server. + * + * Installs CLI commands and creates the client list. + */ +extern void zserv_init(void); + +/* + * Stop the Zebra API server. + * + * closes the socket + */ +extern void zserv_close(void); + +/* + * Start Zebra API server. + * + * Allocates resources, creates the server socket and begins listening on the + * socket. + * + * path + * where to place the Unix domain socket + */ +extern void zserv_start(char *path); + +/* + * Send a message to a connected Zebra API client. + * + * client + * the client to send to + * + * msg + * the message to send + */ +extern int zserv_send_message(struct zserv *client, struct stream *msg); + +/* + * Send a batch of messages to a connected Zebra API client. + * + * client + * the client to send to + * + * fifo + * the list of messages to send + */ +extern int zserv_send_batch(struct zserv *client, struct stream_fifo *fifo); + +/* + * Retrieve a client by its protocol and instance number. + * + * proto + * protocol number + * + * instance + * instance number + * + * Returns: + * The Zebra API client. + */ +extern struct zserv *zserv_find_client(uint8_t proto, unsigned short instance); + +/* + * Retrieve a client by its protocol, instance number, and session id. + * + * proto + * protocol number + * + * instance + * instance number + * + * session_id + * session id + * + * Returns: + * The Zebra API client. + */ +struct zserv *zserv_find_client_session(uint8_t proto, unsigned short instance, + uint32_t session_id); + +/* + * Retrieve a client object by the complete tuple of + * {protocol, instance, session}. This version supports use + * from a different pthread: the object will be returned marked + * in-use. The caller *must* release the client object with the + * release_client() api, to ensure that the in-use marker is cleared properly. + * + * Returns: + * The Zebra API client. + */ +extern struct zserv *zserv_acquire_client(uint8_t proto, + unsigned short instance, + uint32_t session_id); + +/* + * Release a client object that was acquired with the acquire_client() api. + * After this has been called, the pointer must not be used - it may be freed + * in another pthread if the client has closed. + */ +extern void zserv_release_client(struct zserv *client); + +/* + * Close a client. + * + * Kills a client's thread, removes the client from the client list and cleans + * up its resources. + * + * client + * the client to close + */ +extern void zserv_close_client(struct zserv *client); + +/* + * Log a ZAPI message hexdump. + * + * errmsg + * Error message to include with packet hexdump + * + * msg + * Message to log + * + * hdr + * Message header + */ +void zserv_log_message(const char *errmsg, struct stream *msg, + struct zmsghdr *hdr); + +/* TODO */ +__attribute__((__noreturn__)) void zebra_finalize(struct thread *event); + +/* + * Graceful restart functions. + */ +extern int zebra_gr_client_disconnect(struct zserv *client); +extern void zebra_gr_client_reconnect(struct zserv *client); +extern void zebra_gr_stale_client_cleanup(struct list *client_list); +extern void zread_client_capabilities(struct zserv *client, struct zmsghdr *hdr, + struct stream *msg, + struct zebra_vrf *zvrf); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZEBRA_ZEBRA_H */ |