Adding upstream version 9.1.upstream/9.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-09 13:16:35 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-09 13:16:35 +0000
commit: e2bbf175a2184bd76f6c54ccf8456babeb1a46fc (patch)
tree: f0b76550d6e6f500ada964a3a4ee933a45e5a6f1 /bgpd/bgp_nht.c
parent: Initial commit. (diff)
download: frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.tar.xz
frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.zip
1 files changed, 1647 insertions, 0 deletions
diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c
new file mode 100644
index 0000000..60d6f74
--- /dev/null
+++ b/bgpd/bgp_nht.c
@@ -0,0 +1,1647 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* BGP Nexthop tracking
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "frrevent.h"
+#include "prefix.h"
+#include "zclient.h"
+#include "stream.h"
+#include "network.h"
+#include "log.h"
+#include "memory.h"
+#include "nexthop.h"
+#include "vrf.h"
+#include "filter.h"
+#include "nexthop_group.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_nexthop.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_errors.h"
+#include "bgpd/bgp_nht.h"
+#include "bgpd/bgp_fsm.h"
+#include "bgpd/bgp_zebra.h"
+#include "bgpd/bgp_flowspec_util.h"
+#include "bgpd/bgp_evpn.h"
+#include "bgpd/bgp_rd.h"
+#include "bgpd/bgp_mplsvpn.h"
+#include "bgpd/bgp_ecommunity.h"
+
+extern struct zclient *zclient;
+
+static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
+static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
+static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
+static void bgp_nht_ifp_initial(struct event *thread);
+
+static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
+{
+	return (bgp_zebra_num_connects() == 0
+		|| (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
+		    && bnc->nexthop_num > 0));
+}
+
+static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
+					struct bgp_path_info *path)
+{
+	struct interface *ifp = NULL;
+	struct nexthop *nexthop;
+	struct bgp_interface *iifp;
+	struct peer *peer;
+
+	if (!path->extra || !path->extra->vrfleak ||
+	    !path->extra->vrfleak->peer_orig)
+		return false;
+
+	peer = path->extra->vrfleak->peer_orig;
+
+	/* only connected ebgp peers are valid */
+	if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
+	    CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
+	    CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
+		return false;
+
+	for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
+		if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
+		    nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
+		    nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
+			ifp = if_lookup_by_index(bnc->ifindex_ipv6_ll
+							 ? bnc->ifindex_ipv6_ll
+							 : nexthop->ifindex,
+						 bnc->bgp->vrf_id);
+		}
+		if (!ifp)
+			continue;
+		iifp = ifp->info;
+		if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
+			return true;
+	}
+	return false;
+}
+
+static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
+					       struct bgp_path_info *path)
+{
+	struct interface *ifp = NULL;
+	struct nexthop *nexthop;
+
+	for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
+		if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
+			ifp = if_lookup_by_index(bnc->ifindex_ipv6_ll
+							 ? bnc->ifindex_ipv6_ll
+							 : nexthop->ifindex,
+						 bnc->bgp->vrf_id);
+			if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
+				    ifp->ll_type == ZEBRA_LLT_IP6GRE))
+				break;
+		}
+	}
+	if (!ifp)
+		return false;
+
+	if (CHECK_FLAG(path->attr->rmap_change_flags,
+		       BATTR_RMAP_L3VPN_ACCEPT_GRE))
+		return true;
+
+	return false;
+}
+
+static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
+					struct bgp_path_info *path)
+{
+	/*
+	 * - In the case of MPLS-VPN, the label is learned from LDP or other
+	 * protocols, and nexthop tracking is enabled for the label.
+	 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
+	 * - In the case of SRv6-VPN, we need to track the reachability to the
+	 * SID (in other words, IPv6 address). As in MPLS, we need to record
+	 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
+	 * currently not implemented, and this function assumes that all
+	 * Transit routes for SRv6-VPN are valid.
+	 * - Otherwise check for mpls-gre acceptance
+	 */
+	return (bgp_zebra_num_connects() == 0 ||
+		(bnc && (bnc->nexthop_num > 0 &&
+			 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
+			  CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
+			  bnc->bgp->srv6_enabled ||
+			  bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
+			  bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
+}
+
+static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
+{
+	if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
+				   &bnc->prefix, bnc->ifindex_ipv6_ll,
+				   bnc->srte_color, bnc->bgp->name_pretty);
+		/* only unregister if this is the last nh for this prefix*/
+		if (!bnc_existing_for_prefix(bnc))
+			unregister_zebra_rnh(bnc);
+		bnc_free(bnc);
+	}
+}
+
+void bgp_unlink_nexthop(struct bgp_path_info *path)
+{
+	struct bgp_nexthop_cache *bnc = path->nexthop;
+
+	bgp_mplsvpn_path_nh_label_unlink(path);
+	bgp_mplsvpn_path_nh_label_bind_unlink(path);
+
+	if (!bnc)
+		return;
+
+	path_nh_map(path, NULL, false);
+
+	bgp_unlink_nexthop_check(bnc);
+}
+
+void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
+{
+	struct prefix pp;
+	struct prefix pt;
+	struct bgp_nexthop_cache *bncp, *bnct;
+	afi_t afi;
+	ifindex_t ifindex = 0;
+
+	if (!sockunion2hostprefix(&from->connection->su, &pp))
+		return;
+
+	/*
+	 * Gather the ifindex for if up/down events to be
+	 * tagged into this fun
+	 */
+	if (from->conf_if &&
+	    IN6_IS_ADDR_LINKLOCAL(&from->connection->su.sin6.sin6_addr))
+		ifindex = from->connection->su.sin6.sin6_scope_id;
+
+	afi = family2afi(pp.family);
+	bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
+
+	if (!sockunion2hostprefix(&to->connection->su, &pt))
+		return;
+
+	/*
+	 * Gather the ifindex for if up/down events to be
+	 * tagged into this fun
+	 */
+	ifindex = 0;
+	if (to->conf_if &&
+	    IN6_IS_ADDR_LINKLOCAL(&to->connection->su.sin6.sin6_addr))
+		ifindex = to->connection->su.sin6.sin6_scope_id;
+	bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
+
+	if (bnct != bncp)
+		return;
+
+	if (bnct)
+		bnct->nht_info = to;
+}
+
+/*
+ * Returns the bnc whose bnc->nht_info matches the LL peer by
+ * looping through the IPv6 nexthop table
+ */
+static struct bgp_nexthop_cache *
+bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
+{
+	struct bgp_nexthop_cache *bnc;
+
+	frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
+		  bnc) {
+		if (bnc->nht_info == peer) {
+			if (BGP_DEBUG(nht, NHT)) {
+				zlog_debug(
+					"Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
+					&bnc->prefix, bnc->ifindex_ipv6_ll,
+					bnc->srte_color, bnc, peer->host,
+					peer->bgp->name_pretty, peer);
+			}
+			return bnc;
+		}
+	}
+
+	if (BGP_DEBUG(nht, NHT))
+		zlog_debug(
+			"Could not find bnc for peer %s(%s) %p in v6 nexthop table",
+			peer->host, peer->bgp->name_pretty, peer);
+
+	return NULL;
+}
+
+void bgp_unlink_nexthop_by_peer(struct peer *peer)
+{
+	struct prefix p;
+	struct bgp_nexthop_cache *bnc;
+	afi_t afi = family2afi(peer->connection->su.sa.sa_family);
+	ifindex_t ifindex = 0;
+
+	if (!sockunion2hostprefix(&peer->connection->su, &p)) {
+		/*
+		 * In scenarios where unnumbered BGP session is brought
+		 * down by shutting down the interface before unconfiguring
+		 * the BGP neighbor, neighbor information in peer->su.sa
+		 * will be cleared when the interface is shutdown. So
+		 * during the deletion of unnumbered bgp peer, above check
+		 * will return true. Therefore, in this case,BGP needs to
+		 * find the bnc whose bnc->nht_info matches the
+		 * peer being deleted and free it.
+		 */
+		bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
+	} else {
+		/*
+		 * Gather the ifindex for if up/down events to be
+		 * tagged into this fun
+		 */
+		if (afi == AFI_IP6 &&
+		    IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
+			ifindex = peer->connection->su.sin6.sin6_scope_id;
+		bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
+			       ifindex);
+	}
+
+	if (!bnc)
+		return;
+
+	/* cleanup the peer reference */
+	bnc->nht_info = NULL;
+
+	bgp_unlink_nexthop_check(bnc);
+}
+
+/*
+ * A route and its nexthop might belong to different VRFs. Therefore,
+ * we need both the bgp_route and bgp_nexthop pointers.
+ */
+int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
+			    afi_t afi, safi_t safi, struct bgp_path_info *pi,
+			    struct peer *peer, int connected,
+			    const struct prefix *orig_prefix)
+{
+	struct bgp_nexthop_cache_head *tree = NULL;
+	struct bgp_nexthop_cache *bnc;
+	struct bgp_path_info *bpi_ultimate;
+	struct prefix p;
+	uint32_t srte_color = 0;
+	int is_bgp_static_route = 0;
+	ifindex_t ifindex = 0;
+
+	if (pi) {
+		is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
+				       && (pi->sub_type == BGP_ROUTE_STATIC))
+					      ? 1
+					      : 0;
+
+		/* Since Extended Next-hop Encoding (RFC5549) support, we want
+		   to derive
+		   address-family from the next-hop. */
+		if (!is_bgp_static_route)
+			afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
+								    : AFI_IP;
+
+		/* Validation for the ipv4 mapped ipv6 nexthop. */
+		if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
+			afi = AFI_IP;
+		}
+
+		/* This will return true if the global IPv6 NH is a link local
+		 * addr */
+		if (make_prefix(afi, pi, &p) < 0)
+			return 1;
+
+		/*
+		 * If it's a V6 nexthop, path is learnt from a v6 LL peer,
+		 * and if the NH prefix matches peer's LL address then
+		 * set the ifindex to peer's interface index so that
+		 * correct nexthop can be found in nexthop tree.
+		 *
+		 * NH could be set to different v6 LL address (compared to
+		 * peer's LL) using route-map. In such a scenario, do not set
+		 * the ifindex.
+		 */
+		if (afi == AFI_IP6 &&
+		    IN6_IS_ADDR_LINKLOCAL(
+			    &pi->peer->connection->su.sin6.sin6_addr) &&
+		    IPV6_ADDR_SAME(&pi->peer->connection->su.sin6.sin6_addr,
+				   &p.u.prefix6))
+			ifindex = pi->peer->connection->su.sin6.sin6_scope_id;
+
+		if (!is_bgp_static_route && orig_prefix
+		    && prefix_same(&p, orig_prefix)) {
+			if (BGP_DEBUG(nht, NHT)) {
+				zlog_debug(
+					"%s(%pFX): prefix loops through itself",
+					__func__, &p);
+			}
+			return 0;
+		}
+
+		if (CHECK_FLAG(pi->attr->flag,
+			       ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR)))
+			srte_color = bgp_attr_get_color(pi->attr);
+
+	} else if (peer) {
+		/*
+		 * Gather the ifindex for if up/down events to be
+		 * tagged into this fun
+		 */
+		if (afi == AFI_IP6 && peer->conf_if &&
+		    IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr)) {
+			ifindex = peer->connection->su.sin6.sin6_scope_id;
+			if (ifindex == 0) {
+				if (BGP_DEBUG(nht, NHT)) {
+					zlog_debug(
+						"%s: Unable to locate ifindex, waiting till we have one",
+						peer->conf_if);
+				}
+				return 0;
+			}
+		}
+
+		if (!sockunion2hostprefix(&peer->connection->su, &p)) {
+			if (BGP_DEBUG(nht, NHT)) {
+				zlog_debug(
+					"%s: Attempting to register with unknown AFI %d (not %d or %d)",
+					__func__, afi, AFI_IP, AFI_IP6);
+			}
+			return 0;
+		}
+	} else
+		return 0;
+
+	if (is_bgp_static_route)
+		tree = &bgp_nexthop->import_check_table[afi];
+	else
+		tree = &bgp_nexthop->nexthop_cache_table[afi];
+
+	bnc = bnc_find(tree, &p, srte_color, ifindex);
+	if (!bnc) {
+		bnc = bnc_new(tree, &p, srte_color, ifindex);
+		bnc->bgp = bgp_nexthop;
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
+				   &bnc->prefix, bnc->ifindex_ipv6_ll,
+				   bnc->srte_color, bnc->bgp->name_pretty,
+				   peer);
+	} else {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
+				&bnc->prefix, bnc->ifindex_ipv6_ll,
+				bnc->bgp->name_pretty, bnc->flags,
+				bnc->ifindex_ipv6_ll, bnc->path_count,
+				bnc->nht_info);
+	}
+
+	if (pi && is_route_parent_evpn(pi))
+		bnc->is_evpn_gwip_nexthop = true;
+
+	if (is_bgp_static_route) {
+		SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
+
+		/* If we're toggling the type, re-register */
+		if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
+		    && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
+			SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+		} else if ((!CHECK_FLAG(bgp_route->flags,
+					BGP_FLAG_IMPORT_CHECK))
+			   && CHECK_FLAG(bnc->flags,
+					 BGP_STATIC_ROUTE_EXACT_MATCH)) {
+			UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+		}
+	}
+	/* When nexthop is already known, but now requires 'connected'
+	 * resolution,
+	 * re-register it. The reverse scenario where the nexthop currently
+	 * requires
+	 * 'connected' resolution does not need a re-register (i.e., we treat
+	 * 'connected-required' as an override) except in the scenario where
+	 * this
+	 * is actually a case of tracking a peer for connectivity (e.g., after
+	 * disable connected-check).
+	 * NOTE: We don't track the number of paths separately for 'connected-
+	 * required' vs 'connected-not-required' as this change is not a common
+	 * scenario.
+	 */
+	else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
+		SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+	} else if (peer && !connected
+		   && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+	}
+	if (peer && (bnc->ifindex_ipv6_ll != ifindex)) {
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+		bnc->ifindex_ipv6_ll = ifindex;
+	}
+	if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
+		SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+		SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+	} else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
+		   && !is_default_host_route(&bnc->prefix))
+		register_zebra_rnh(bnc);
+
+	if (pi && pi->nexthop != bnc) {
+		/* Unlink from existing nexthop cache, if any. This will also
+		 * free
+		 * the nexthop cache entry, if appropriate.
+		 */
+		bgp_unlink_nexthop(pi);
+
+		/* updates NHT pi list reference */
+		path_nh_map(pi, bnc, true);
+
+		bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
+		if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
+			(bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
+				bnc->metric;
+		else if (bpi_ultimate->extra)
+			bpi_ultimate->extra->igpmetric = 0;
+	} else if (peer) {
+		/*
+		 * Let's not accidentally save the peer data for a peer
+		 * we are going to throw away in a second or so.
+		 * When we come back around we'll fix up this
+		 * data properly in replace_nexthop_by_peer
+		 */
+		if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
+			bnc->nht_info = (void *)peer; /* NHT peer reference */
+	}
+
+	/*
+	 * We are cheating here.  Views have no associated underlying
+	 * ability to detect nexthops.  So when we have a view
+	 * just tell everyone the nexthop is valid
+	 */
+	if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
+		return 1;
+	else if (safi == SAFI_UNICAST && pi &&
+		 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
+		 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
+		return bgp_isvalid_nexthop_for_mpls(bnc, pi);
+	else if (safi == SAFI_MPLS_VPN && pi &&
+		 pi->sub_type != BGP_ROUTE_IMPORTED)
+		/* avoid not redistributing mpls vpn routes */
+		return 1;
+	else
+		/* mpls-vpn routes with BGP_ROUTE_IMPORTED subtype */
+		return (bgp_isvalid_nexthop(bnc));
+}
+
+void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
+{
+	struct bgp_nexthop_cache *bnc;
+	struct prefix p;
+	ifindex_t ifindex = 0;
+
+	if (!peer)
+		return;
+
+	/*
+	 * In case the below check evaluates true and if
+	 * the bnc has not been freed at this point, then
+	 * we might have to do something similar to what's
+	 * done in bgp_unlink_nexthop_by_peer(). Since
+	 * bgp_unlink_nexthop_by_peer() loops through the
+	 * nodes of V6 nexthop cache to find the bnc, it is
+	 * currently not being called here.
+	 */
+	if (!sockunion2hostprefix(&peer->connection->su, &p))
+		return;
+	/*
+	 * Gather the ifindex for if up/down events to be
+	 * tagged into this fun
+	 */
+	if (afi == AFI_IP6 &&
+	    IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
+		ifindex = peer->connection->su.sin6.sin6_scope_id;
+	bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
+		       &p, 0, ifindex);
+	if (!bnc) {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"Cannot find connected NHT node for peer %s(%s)",
+				peer->host, peer->bgp->name_pretty);
+		return;
+	}
+
+	if (bnc->nht_info != peer) {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"Connected NHT %p node for peer %s(%s) points to %p",
+				bnc, peer->host, bnc->bgp->name_pretty,
+				bnc->nht_info);
+		return;
+	}
+
+	bnc->nht_info = NULL;
+
+	if (LIST_EMPTY(&(bnc->paths))) {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"Freeing connected NHT node %p for peer %s(%s)",
+				bnc, peer->host, bnc->bgp->name_pretty);
+		unregister_zebra_rnh(bnc);
+		bnc_free(bnc);
+	}
+}
+
+static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
+				       struct zapi_route *nhr,
+				       bool import_check)
+{
+	struct nexthop *nexthop;
+	struct nexthop *oldnh;
+	struct nexthop *nhlist_head = NULL;
+	struct nexthop *nhlist_tail = NULL;
+	int i;
+	bool evpn_resolved = false;
+
+	bnc->last_update = monotime(NULL);
+	bnc->change_flags = 0;
+
+	/* debug print the input */
+	if (BGP_DEBUG(nht, NHT)) {
+		char bnc_buf[BNC_FLAG_DUMP_SIZE];
+
+		zlog_debug(
+			"%s(%u): Rcvd NH update %pFX(%u)(%u) - metric %d/%d #nhops %d/%d flags %s",
+			bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
+			bnc->ifindex_ipv6_ll, bnc->srte_color, nhr->metric,
+			bnc->metric, nhr->nexthop_num, bnc->nexthop_num,
+			bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
+						   sizeof(bnc_buf)));
+	}
+
+	if (nhr->metric != bnc->metric)
+		bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
+
+	if (nhr->nexthop_num != bnc->nexthop_num)
+		bnc->change_flags |= BGP_NEXTHOP_CHANGED;
+
+	if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
+			     !prefix_same(&bnc->prefix, &nhr->prefix))) {
+		SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
+
+		bnc_nexthop_free(bnc);
+		bnc->nexthop = NULL;
+
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
+				__func__, &bnc->prefix, &nhr->prefix);
+	} else if (nhr->nexthop_num) {
+		struct peer *peer = bnc->nht_info;
+
+		/* notify bgp fsm if nbr ip goes from invalid->valid */
+		if (!bnc->nexthop_num)
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
+
+		if (!bnc->is_evpn_gwip_nexthop)
+			bnc->flags |= BGP_NEXTHOP_VALID;
+		bnc->metric = nhr->metric;
+		bnc->nexthop_num = nhr->nexthop_num;
+
+		bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
+
+		for (i = 0; i < nhr->nexthop_num; i++) {
+			int num_labels = 0;
+
+			nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
+
+			/*
+			 * Turn on RA for the v6 nexthops
+			 * we receive from bgp.  This is to allow us
+			 * to work with v4 routing over v6 nexthops
+			 */
+			if (peer && !peer->ifp
+			    && CHECK_FLAG(peer->flags,
+					  PEER_FLAG_CAPABILITY_ENHE)
+			    && nhr->prefix.family == AF_INET6
+			    && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
+				struct interface *ifp;
+
+				ifp = if_lookup_by_index(nexthop->ifindex,
+							 nexthop->vrf_id);
+				if (ifp)
+					zclient_send_interface_radv_req(
+						zclient, nexthop->vrf_id, ifp,
+						true,
+						BGP_UNNUM_DEFAULT_RA_INTERVAL);
+			}
+			/* There is at least one label-switched path */
+			if (nexthop->nh_label &&
+				nexthop->nh_label->num_labels) {
+
+				bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
+				num_labels = nexthop->nh_label->num_labels;
+			}
+
+			if (BGP_DEBUG(nht, NHT)) {
+				char buf[NEXTHOP_STRLEN];
+				zlog_debug(
+					"    nhop via %s (%d labels)",
+					nexthop2str(nexthop, buf, sizeof(buf)),
+					num_labels);
+			}
+
+			if (nhlist_tail) {
+				nhlist_tail->next = nexthop;
+				nhlist_tail = nexthop;
+			} else {
+				nhlist_tail = nexthop;
+				nhlist_head = nexthop;
+			}
+
+			/* No need to evaluate the nexthop if we have already
+			 * determined
+			 * that there has been a change.
+			 */
+			if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
+				continue;
+
+			for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
+				if (nexthop_same(oldnh, nexthop))
+					break;
+
+			if (!oldnh)
+				bnc->change_flags |= BGP_NEXTHOP_CHANGED;
+		}
+		bnc_nexthop_free(bnc);
+		bnc->nexthop = nhlist_head;
+
+		/*
+		 * Gateway IP nexthop is L3 reachable. Mark it as
+		 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
+		 * remote EVPN RT-2.
+		 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
+		 * When its mapping with EVPN RT-2 is established, unset
+		 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
+		 */
+		if (bnc->is_evpn_gwip_nexthop) {
+			evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
+
+			if (BGP_DEBUG(nht, NHT))
+				zlog_debug(
+					"EVPN gateway IP %pFX recursive MAC/IP lookup %s",
+					&bnc->prefix,
+					(evpn_resolved ? "successful"
+						       : "failed"));
+
+			if (evpn_resolved) {
+				bnc->flags |= BGP_NEXTHOP_VALID;
+				bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
+				bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
+			} else {
+				bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
+				bnc->flags &= ~BGP_NEXTHOP_VALID;
+			}
+		}
+	} else {
+		bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
+		bnc->flags &= ~BGP_NEXTHOP_VALID;
+		bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
+		bnc->nexthop_num = nhr->nexthop_num;
+
+		/* notify bgp fsm if nbr ip goes from valid->invalid */
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
+
+		bnc_nexthop_free(bnc);
+		bnc->nexthop = NULL;
+	}
+
+	evaluate_paths(bnc);
+}
+
+static void bgp_nht_ifp_table_handle(struct bgp *bgp,
+				     struct bgp_nexthop_cache_head *table,
+				     struct interface *ifp, bool up)
+{
+	struct bgp_nexthop_cache *bnc;
+	struct nexthop *nhop;
+	uint8_t other_nh_count;
+	bool nhop_ll_found = false;
+	bool nhop_found = false;
+
+	if (ifp->ifindex == IFINDEX_INTERNAL) {
+		zlog_warn("%s: The interface %s ignored", __func__, ifp->name);
+		return;
+	}
+
+	frr_each (bgp_nexthop_cache, table, bnc) {
+		other_nh_count = 0;
+		nhop_ll_found = bnc->ifindex_ipv6_ll == ifp->ifindex;
+		for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
+			if (nhop->ifindex == bnc->ifindex_ipv6_ll)
+				continue;
+
+			if (nhop->ifindex != ifp->ifindex) {
+				other_nh_count++;
+				continue;
+			}
+			if (nhop->vrf_id != ifp->vrf->vrf_id) {
+				other_nh_count++;
+				continue;
+			}
+			nhop_found = true;
+		}
+
+		if (!nhop_found && !nhop_ll_found)
+			/* The event interface does not match the nexthop cache
+			 * entry */
+			continue;
+
+		if (!up && other_nh_count > 0)
+			/* Down event ignored in case of multiple next-hop
+			 * interfaces. The other might interfaces might be still
+			 * up. The cases where all interfaces are down or a bnc
+			 * is invalid are processed by a separate zebra rnh
+			 * messages.
+			 */
+			continue;
+
+		if (!nhop_ll_found) {
+			evaluate_paths(bnc);
+			continue;
+		}
+
+		bnc->last_update = monotime(NULL);
+		bnc->change_flags = 0;
+
+		/*
+		 * For interface based routes ( ala the v6 LL routes
+		 * that this was written for ) the metric received
+		 * for the connected route is 0 not 1.
+		 */
+		bnc->metric = 0;
+		if (up) {
+			SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+			SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+			/* change nexthop number only for ll */
+			bnc->nexthop_num = 1;
+		} else {
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+			SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+			bnc->nexthop_num = 0;
+		}
+
+		evaluate_paths(bnc);
+	}
+}
+static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
+{
+	struct bgp *bgp;
+
+	bgp = ifp->vrf->info;
+	if (!bgp)
+		return;
+
+	bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
+				 up);
+	bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
+				 up);
+	bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
+				 up);
+	bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
+				 up);
+}
+
+void bgp_nht_ifp_up(struct interface *ifp)
+{
+	bgp_nht_ifp_handle(ifp, true);
+}
+
+void bgp_nht_ifp_down(struct interface *ifp)
+{
+	bgp_nht_ifp_handle(ifp, false);
+}
+
+static void bgp_nht_ifp_initial(struct event *thread)
+{
+	ifindex_t ifindex = EVENT_VAL(thread);
+	struct bgp *bgp = EVENT_ARG(thread);
+	struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
+
+	if (!ifp)
+		return;
+
+	if (BGP_DEBUG(nht, NHT))
+		zlog_debug(
+			"Handle NHT initial update for Intf %s(%d) status %s",
+			ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
+
+	if (if_is_up(ifp))
+		bgp_nht_ifp_up(ifp);
+	else
+		bgp_nht_ifp_down(ifp);
+}
+
+/*
+ * So the bnc code has the ability to handle interface up/down
+ * events to properly handle v6 LL peering.
+ * What is happening here:
+ * The event system for peering expects the nht code to
+ * report on the tracking events after we move to active
+ * So let's give the system a chance to report on that event
+ * in a manner that is expected.
+ */
+void bgp_nht_interface_events(struct peer *peer)
+{
+	struct bgp *bgp = peer->bgp;
+	struct bgp_nexthop_cache_head *table;
+	struct bgp_nexthop_cache *bnc;
+	struct prefix p;
+	ifindex_t ifindex = 0;
+
+	if (!IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
+		return;
+
+	if (!sockunion2hostprefix(&peer->connection->su, &p))
+		return;
+	/*
+	 * Gather the ifindex for if up/down events to be
+	 * tagged into this fun
+	 */
+	if (peer->conf_if &&
+	    IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
+		ifindex = peer->connection->su.sin6.sin6_scope_id;
+
+	table = &bgp->nexthop_cache_table[AFI_IP6];
+	bnc = bnc_find(table, &p, 0, ifindex);
+	if (!bnc)
+		return;
+
+	if (bnc->ifindex_ipv6_ll)
+		event_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
+				bnc->ifindex_ipv6_ll, NULL);
+}
+
+void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
+{
+	struct bgp_nexthop_cache_head *tree = NULL;
+	struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
+	struct bgp *bgp;
+	struct prefix match;
+	struct zapi_route nhr;
+	afi_t afi;
+
+	bgp = bgp_lookup_by_vrf_id(vrf_id);
+	if (!bgp) {
+		flog_err(
+			EC_BGP_NH_UPD,
+			"parse nexthop update: instance not found for vrf_id %u",
+			vrf_id);
+		return;
+	}
+
+	if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
+		zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
+			 bgp->name_pretty);
+		return;
+	}
+
+	afi = family2afi(match.family);
+	tree = &bgp->nexthop_cache_table[afi];
+
+	bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
+	if (!bnc_nhc) {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"parse nexthop update %pFX(%u)(%s): bnc info not found for nexthop cache",
+				&nhr.prefix, nhr.srte_color, bgp->name_pretty);
+	} else
+		bgp_process_nexthop_update(bnc_nhc, &nhr, false);
+
+	tree = &bgp->import_check_table[afi];
+
+	bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
+	if (!bnc_import) {
+		if (BGP_DEBUG(nht, NHT))
+			zlog_debug(
+				"parse nexthop update %pFX(%u)(%s): bnc info not found for import check",
+				&nhr.prefix, nhr.srte_color, bgp->name_pretty);
+	} else
+		bgp_process_nexthop_update(bnc_import, &nhr, true);
+
+	/*
+	 * HACK: if any BGP route is dependant on an SR-policy that doesn't
+	 * exist, zebra will never send NH updates relative to that policy. In
+	 * that case, whenever we receive an update about a colorless NH, update
+	 * the corresponding colorful NHs that share the same endpoint but that
+	 * are inactive. This ugly hack should work around the problem at the
+	 * cost of a performance pernalty. Long term, what should be done is to
+	 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
+	 * which should provide a better infrastructure to solve this issue in
+	 * a more efficient and elegant way.
+	 */
+	if (nhr.srte_color == 0 && bnc_nhc) {
+		struct bgp_nexthop_cache *bnc_iter;
+
+		frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
+			  bnc_iter) {
+			if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
+			    bnc_iter->srte_color == 0 ||
+			    CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
+				continue;
+
+			bgp_process_nexthop_update(bnc_iter, &nhr, false);
+		}
+	}
+}
+
+/*
+ * Cleanup nexthop registration and status information for BGP nexthops
+ * pertaining to this VRF. This is invoked upon VRF deletion.
+ */
+void bgp_cleanup_nexthops(struct bgp *bgp)
+{
+	for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
+		struct bgp_nexthop_cache *bnc;
+
+		frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
+			  bnc) {
+			/* Clear relevant flags. */
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
+			UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
+		}
+	}
+}
+
+/**
+ * make_prefix - make a prefix structure from the path (essentially
+ * path's node.
+ */
+static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
+{
+
+	int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
+			     && (pi->sub_type == BGP_ROUTE_STATIC))
+				    ? 1
+				    : 0;
+	struct bgp_dest *net = pi->net;
+	const struct prefix *p_orig = bgp_dest_get_prefix(net);
+	struct in_addr ipv4;
+
+	if (p_orig->family == AF_FLOWSPEC) {
+		if (!pi->peer)
+			return -1;
+		return bgp_flowspec_get_first_nh(pi->peer->bgp,
+						 pi, p, afi);
+	}
+	memset(p, 0, sizeof(struct prefix));
+	switch (afi) {
+	case AFI_IP:
+		p->family = AF_INET;
+		if (is_bgp_static) {
+			p->u.prefix4 = p_orig->u.prefix4;
+			p->prefixlen = p_orig->prefixlen;
+		} else {
+			if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
+				ipv4_mapped_ipv6_to_ipv4(
+					&pi->attr->mp_nexthop_global, &ipv4);
+				p->u.prefix4 = ipv4;
+				p->prefixlen = IPV4_MAX_BITLEN;
+			} else {
+				if (p_orig->family == AF_EVPN)
+					p->u.prefix4 =
+						pi->attr->mp_nexthop_global_in;
+				else
+					p->u.prefix4 = pi->attr->nexthop;
+				p->prefixlen = IPV4_MAX_BITLEN;
+			}
+		}
+		break;
+	case AFI_IP6:
+		p->family = AF_INET6;
+
+		if (is_bgp_static) {
+			p->u.prefix6 = p_orig->u.prefix6;
+			p->prefixlen = p_orig->prefixlen;
+		} else {
+			/* If we receive MP_REACH nexthop with ::(LL)
+			 * or LL(LL), use LL address as nexthop cache.
+			 */
+			if (pi->attr->mp_nexthop_len
+				    == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
+			    && (IN6_IS_ADDR_UNSPECIFIED(
+					&pi->attr->mp_nexthop_global)
+				|| IN6_IS_ADDR_LINKLOCAL(
+					&pi->attr->mp_nexthop_global)))
+				p->u.prefix6 = pi->attr->mp_nexthop_local;
+			/* If we receive MR_REACH with (GA)::(LL)
+			 * then check for route-map to choose GA or LL
+			 */
+			else if (pi->attr->mp_nexthop_len
+				 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
+				if (pi->attr->mp_nexthop_prefer_global)
+					p->u.prefix6 =
+						pi->attr->mp_nexthop_global;
+				else
+					p->u.prefix6 =
+						pi->attr->mp_nexthop_local;
+			} else
+				p->u.prefix6 = pi->attr->mp_nexthop_global;
+			p->prefixlen = IPV6_MAX_BITLEN;
+		}
+		break;
+	default:
+		if (BGP_DEBUG(nht, NHT)) {
+			zlog_debug(
+				"%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
+				__func__, afi, AFI_IP, AFI_IP6);
+		}
+		break;
+	}
+	return 0;
+}
+
+/**
+ * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
+ *   command to Zebra.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ *   int command -- command to send to zebra
+ * RETURNS:
+ *   void.
+ */
+static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
+{
+	bool exact_match = false;
+	bool resolve_via_default = false;
+	int ret;
+
+	if (!zclient)
+		return;
+
+	/* Don't try to register if Zebra doesn't know of this instance. */
+	if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
+		if (BGP_DEBUG(zebra, ZEBRA))
+			zlog_debug(
+				"%s: No zebra instance to talk to, not installing NHT entry",
+				__func__);
+		return;
+	}
+
+	if (!bgp_zebra_num_connects()) {
+		if (BGP_DEBUG(zebra, ZEBRA))
+			zlog_debug(
+				"%s: We have not connected yet, cannot send nexthops",
+				__func__);
+	}
+	if (command == ZEBRA_NEXTHOP_REGISTER) {
+		if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
+			exact_match = true;
+		if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
+			resolve_via_default = true;
+	}
+
+	if (BGP_DEBUG(zebra, ZEBRA))
+		zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
+			   zserv_command_string(command), &bnc->prefix,
+			   bnc->bgp->name_pretty);
+
+	ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
+			       exact_match, resolve_via_default,
+			       bnc->bgp->vrf_id);
+	if (ret == ZCLIENT_SEND_FAILURE) {
+		flog_warn(EC_BGP_ZEBRA_SEND,
+			  "sendmsg_nexthop: zclient_send_message() failed");
+		return;
+	}
+
+	if (command == ZEBRA_NEXTHOP_REGISTER)
+		SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+	else if (command == ZEBRA_NEXTHOP_UNREGISTER)
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+	return;
+}
+
+/**
+ * register_zebra_rnh - register a NH/route with Zebra for notification
+ *    when the route or the route to the nexthop changes.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc
+ * RETURNS:
+ *   void.
+ */
+static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
+{
+	/* Check if we have already registered */
+	if (bnc->flags & BGP_NEXTHOP_REGISTERED)
+		return;
+
+	if (bnc->ifindex_ipv6_ll) {
+		SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+		return;
+	}
+
+	sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
+}
+
+/**
+ * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc
+ * RETURNS:
+ *   void.
+ */
+static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
+{
+	/* Check if we have already registered */
+	if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
+		return;
+
+	if (bnc->ifindex_ipv6_ll) {
+		UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+		return;
+	}
+
+	sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
+}
+
+/**
+ * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ *   void.
+ */
+void evaluate_paths(struct bgp_nexthop_cache *bnc)
+{
+	struct bgp_dest *dest;
+	struct bgp_path_info *path;
+	struct bgp_path_info *bpi_ultimate;
+	int afi;
+	struct peer *peer = (struct peer *)bnc->nht_info;
+	struct bgp_table *table;
+	safi_t safi;
+	struct bgp *bgp_path;
+	const struct prefix *p;
+
+	if (BGP_DEBUG(nht, NHT)) {
+		char bnc_buf[BNC_FLAG_DUMP_SIZE];
+		char chg_buf[BNC_FLAG_DUMP_SIZE];
+
+		zlog_debug(
+			"NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
+			&bnc->prefix, bnc->ifindex_ipv6_ll, bnc->srte_color,
+			bnc->bgp->name_pretty,
+			bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
+						   sizeof(bnc_buf)),
+			bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
+							  sizeof(bnc_buf)));
+	}
+
+	LIST_FOREACH (path, &(bnc->paths), nh_thread) {
+		if (path->type == ZEBRA_ROUTE_BGP &&
+		    (path->sub_type == BGP_ROUTE_NORMAL ||
+		     path->sub_type == BGP_ROUTE_STATIC ||
+		     path->sub_type == BGP_ROUTE_IMPORTED))
+			/* evaluate the path */
+			;
+		else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE) {
+			/* evaluate the path for redistributed routes
+			 * except those from VNC
+			 */
+			if ((path->type == ZEBRA_ROUTE_VNC) ||
+			    (path->type == ZEBRA_ROUTE_VNC_DIRECT))
+				continue;
+		} else
+			/* don't evaluate the path */
+			continue;
+
+		dest = path->net;
+		assert(dest && bgp_dest_table(dest));
+		p = bgp_dest_get_prefix(dest);
+		afi = family2afi(p->family);
+		table = bgp_dest_table(dest);
+		safi = table->safi;
+
+		/*
+		 * handle routes from other VRFs (they can have a
+		 * nexthop in THIS VRF). bgp_path is the bgp instance
+		 * that owns the route referencing this nexthop.
+		 */
+		bgp_path = table->bgp;
+
+		/*
+		 * Path becomes valid/invalid depending on whether the nexthop
+		 * reachable/unreachable.
+		 *
+		 * In case of unicast routes that were imported from vpn
+		 * and that have labels, they are valid only if there are
+		 * nexthops with labels
+		 *
+		 * If the nexthop is EVPN gateway-IP,
+		 * do not check for a valid label.
+		 */
+
+		bool bnc_is_valid_nexthop = false;
+		bool path_valid = false;
+
+		if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
+		    && path->extra && path->extra->num_labels
+		    && (path->attr->evpn_overlay.type
+			!= OVERLAY_INDEX_GATEWAY_IP)) {
+			bnc_is_valid_nexthop =
+				bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
+									: false;
+		} else if (safi == SAFI_MPLS_VPN &&
+			   path->sub_type != BGP_ROUTE_IMPORTED) {
+			/* avoid not redistributing mpls vpn routes */
+			bnc_is_valid_nexthop = true;
+		} else {
+			/* mpls-vpn routes with BGP_ROUTE_IMPORTED subtype */
+			if (bgp_update_martian_nexthop(
+				    bnc->bgp, afi, safi, path->type,
+				    path->sub_type, path->attr, dest)) {
+				if (BGP_DEBUG(nht, NHT))
+					zlog_debug(
+						"%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
+						__func__, dest, bgp_path->name);
+			} else
+				bnc_is_valid_nexthop =
+					bgp_isvalid_nexthop(bnc) ? true : false;
+		}
+
+		if (BGP_DEBUG(nht, NHT)) {
+
+			if (dest->pdest) {
+				char rd_buf[RD_ADDRSTRLEN];
+
+				prefix_rd2str(
+					(struct prefix_rd *)bgp_dest_get_prefix(
+						dest->pdest),
+					rd_buf, sizeof(rd_buf),
+					bgp_get_asnotation(bnc->bgp));
+				zlog_debug(
+					"... eval path %d/%d %pBD RD %s %s flags 0x%x",
+					afi, safi, dest, rd_buf,
+					bgp_path->name_pretty, path->flags);
+			} else
+				zlog_debug(
+					"... eval path %d/%d %pBD %s flags 0x%x",
+					afi, safi, dest, bgp_path->name_pretty,
+					path->flags);
+		}
+
+		/* Skip paths marked for removal or as history. */
+		if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
+		    || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
+			continue;
+
+		/* Copy the metric to the path. Will be used for bestpath
+		 * computation */
+		bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
+		if (bgp_isvalid_nexthop(bnc) && bnc->metric)
+			(bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
+				bnc->metric;
+		else if (bpi_ultimate->extra)
+			bpi_ultimate->extra->igpmetric = 0;
+
+		if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) ||
+		    CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED) ||
+		    bgp_attr_get_color(path->attr))
+			SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
+
+		path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
+		if (path->type == ZEBRA_ROUTE_BGP &&
+		    path->sub_type == BGP_ROUTE_STATIC &&
+		    !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
+			/* static routes with 'no bgp network import-check' are
+			 * always valid. if nht is called with static routes,
+			 * the vpn exportation needs to be triggered
+			 */
+			vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
+						 path);
+		else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE &&
+			 safi == SAFI_UNICAST &&
+			 (bgp_path->inst_type == BGP_INSTANCE_TYPE_VRF ||
+			  bgp_path->inst_type == BGP_INSTANCE_TYPE_DEFAULT))
+			/* redistribute routes are always valid
+			 * if nht is called with redistribute routes, the vpn
+			 * exportation needs to be triggered
+			 */
+			vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
+						 path);
+		else if (path_valid != bnc_is_valid_nexthop) {
+			if (path_valid) {
+				/* No longer valid, clear flag; also for EVPN
+				 * routes, unimport from VRFs if needed.
+				 */
+				bgp_aggregate_decrement(bgp_path, p, path, afi,
+							safi);
+				bgp_path_info_unset_flag(dest, path,
+							 BGP_PATH_VALID);
+				if (safi == SAFI_EVPN &&
+				    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
+					bgp_evpn_unimport_route(bgp_path,
+						afi, safi, bgp_dest_get_prefix(dest), path);
+				if (safi == SAFI_UNICAST &&
+				    (bgp_path->inst_type !=
+				     BGP_INSTANCE_TYPE_VIEW))
+					vpn_leak_from_vrf_withdraw(
+						bgp_get_default(), bgp_path,
+						path);
+			} else {
+				/* Path becomes valid, set flag; also for EVPN
+				 * routes, import from VRFs if needed.
+				 */
+				bgp_path_info_set_flag(dest, path,
+						       BGP_PATH_VALID);
+				bgp_aggregate_increment(bgp_path, p, path, afi,
+							safi);
+				if (safi == SAFI_EVPN &&
+				    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
+					bgp_evpn_import_route(bgp_path,
+						afi, safi, bgp_dest_get_prefix(dest), path);
+				if (safi == SAFI_UNICAST &&
+				    (bgp_path->inst_type !=
+				     BGP_INSTANCE_TYPE_VIEW))
+					vpn_leak_from_vrf_update(
+						bgp_get_default(), bgp_path,
+						path);
+			}
+		}
+
+		bgp_process(bgp_path, dest, afi, safi);
+	}
+
+	if (peer) {
+		int valid_nexthops = bgp_isvalid_nexthop(bnc);
+
+		if (valid_nexthops) {
+			/*
+			 * Peering cannot occur across a blackhole nexthop
+			 */
+			if (bnc->nexthop_num == 1 && bnc->nexthop
+			    && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
+				peer->last_reset = PEER_DOWN_WAITING_NHT;
+				valid_nexthops = 0;
+			} else
+				peer->last_reset = PEER_DOWN_WAITING_OPEN;
+		} else
+			peer->last_reset = PEER_DOWN_WAITING_NHT;
+
+		if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
+			if (BGP_DEBUG(nht, NHT))
+				zlog_debug(
+					"%s: Updating peer (%s(%s)) status with NHT nexthops %d",
+					__func__, peer->host,
+					peer->bgp->name_pretty,
+					!!valid_nexthops);
+			bgp_fsm_nht_update(peer->connection, peer,
+					   !!valid_nexthops);
+			SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
+		}
+	}
+
+	RESET_FLAG(bnc->change_flags);
+}
+
+/**
+ * path_nh_map - make or break path-to-nexthop association.
+ * ARGUMENTS:
+ *   path - pointer to the path structure
+ *   bnc - pointer to the nexthop structure
+ *   make - if set, make the association. if unset, just break the existing
+ *          association.
+ */
+void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
+		 bool make)
+{
+	if (path->nexthop) {
+		LIST_REMOVE(path, nh_thread);
+		path->nexthop->path_count--;
+		path->nexthop = NULL;
+	}
+	if (make) {
+		LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
+		path->nexthop = bnc;
+		path->nexthop->path_count++;
+	}
+}
+
+/*
+ * This function is called to register nexthops to zebra
+ * as that we may have tried to install the nexthops
+ * before we actually have a zebra connection
+ */
+void bgp_nht_register_nexthops(struct bgp *bgp)
+{
+	for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
+		struct bgp_nexthop_cache *bnc;
+
+		frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
+			  bnc) {
+			register_zebra_rnh(bnc);
+		}
+	}
+}
+
+void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
+{
+	struct bgp *bgp;
+	struct bgp_nexthop_cache *bnc;
+	struct nexthop *nhop;
+	struct interface *ifp;
+	struct prefix p;
+	ifindex_t ifindex = 0;
+
+	if (peer->ifp)
+		return;
+
+	bgp = peer->bgp;
+	if (!sockunion2hostprefix(&peer->connection->su, &p)) {
+		zlog_warn("%s: Unable to convert sockunion to prefix for %s",
+			  __func__, peer->host);
+		return;
+	}
+
+	if (p.family != AF_INET6)
+		return;
+	/*
+	 * Gather the ifindex for if up/down events to be
+	 * tagged into this fun
+	 */
+	if (peer->conf_if &&
+	    IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
+		ifindex = peer->connection->su.sin6.sin6_scope_id;
+
+	bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
+	if (!bnc)
+		return;
+
+	if (peer != bnc->nht_info)
+		return;
+
+	for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
+		ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
+
+		if (!ifp)
+			continue;
+
+		zclient_send_interface_radv_req(zclient,
+						nhop->vrf_id,
+						ifp, true,
+						BGP_UNNUM_DEFAULT_RA_INTERVAL);
+	}
+}
+
+void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
+{
+	struct bgp *bgp;
+	struct bgp_nexthop_cache *bnc;
+	struct nexthop *nhop;
+	struct interface *ifp;
+	struct prefix p;
+	ifindex_t ifindex = 0;
+
+	if (peer->ifp)
+		return;
+
+	bgp = peer->bgp;
+
+	if (!sockunion2hostprefix(&peer->connection->su, &p)) {
+		zlog_warn("%s: Unable to convert sockunion to prefix for %s",
+			  __func__, peer->host);
+		return;
+	}
+
+	if (p.family != AF_INET6)
+		return;
+	/*
+	 * Gather the ifindex for if up/down events to be
+	 * tagged into this fun
+	 */
+	if (peer->conf_if &&
+	    IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
+		ifindex = peer->connection->su.sin6.sin6_scope_id;
+
+	bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
+	if (!bnc)
+		return;
+
+	if (peer != bnc->nht_info)
+		return;
+
+	for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
+		ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
+
+		if (!ifp)
+			continue;
+
+		zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
+						0);
+	}
+}
+
+/****************************************************************************
+ * L3 NHGs are used for fast failover of nexthops in the dplane. These are
+ * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
+ * left to the application using it.
+ * PS: Currently EVPN host routes is the only app using L3 NHG for fast
+ * failover of remote ES links.
+ ***************************************************************************/
+static bitfield_t bgp_nh_id_bitmap;
+static uint32_t bgp_l3nhg_start;
+
+/* XXX - currently we do nothing on the callbacks */
+static void bgp_l3nhg_add_cb(const char *name)
+{
+}
+
+static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
+{
+}
+
+static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
+				     const struct nexthop *nhop)
+{
+}
+
+static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
+				     const struct nexthop *nhop)
+{
+}
+
+static void bgp_l3nhg_del_cb(const char *name)
+{
+}
+
+static void bgp_l3nhg_zebra_init(void)
+{
+	static bool bgp_l3nhg_zebra_inited;
+	if (bgp_l3nhg_zebra_inited)
+		return;
+
+	bgp_l3nhg_zebra_inited = true;
+	bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
+	nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
+			   bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
+			   bgp_l3nhg_del_cb);
+}
+
+
+void bgp_l3nhg_init(void)
+{
+	uint32_t id_max;
+
+	id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
+	bf_init(bgp_nh_id_bitmap, id_max);
+	bf_assign_zero_index(bgp_nh_id_bitmap);
+
+	if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+		zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
+			   bgp_l3nhg_start + id_max);
+}
+
+void bgp_l3nhg_finish(void)
+{
+	bf_free(bgp_nh_id_bitmap);
+}
+
+uint32_t bgp_l3nhg_id_alloc(void)
+{
+	uint32_t nhg_id = 0;
+
+	bgp_l3nhg_zebra_init();
+	bf_assign_index(bgp_nh_id_bitmap, nhg_id);
+	if (nhg_id)
+		nhg_id += bgp_l3nhg_start;
+
+	return nhg_id;
+}
+
+void bgp_l3nhg_id_free(uint32_t nhg_id)
+{
+	if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
+		return;
+
+	nhg_id -= bgp_l3nhg_start;
+
+	bf_release_index(bgp_nh_id_bitmap, nhg_id);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-09 13:16:35 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-09 13:16:35 +0000
commit	e2bbf175a2184bd76f6c54ccf8456babeb1a46fc (patch)
tree	f0b76550d6e6f500ada964a3a4ee933a45e5a6f1 /bgpd/bgp_nht.c
parent	Initial commit. (diff)
download	frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.tar.xz frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.zip