summaryrefslogtreecommitdiffstats
path: root/bgpd/bgp_evpn_mh.c
diff options
context:
space:
mode:
Diffstat (limited to 'bgpd/bgp_evpn_mh.c')
-rw-r--r--bgpd/bgp_evpn_mh.c4979
1 files changed, 4979 insertions, 0 deletions
diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c
new file mode 100644
index 0000000..27a5bc9
--- /dev/null
+++ b/bgpd/bgp_evpn_mh.c
@@ -0,0 +1,4979 @@
+/* EVPN Multihoming procedures
+ *
+ * Copyright (C) 2019 Cumulus Networks, Inc.
+ * Anuradha Karuppiah
+ *
+ * This file is part of FRR.
+ *
+ * FRRouting is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRRouting is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "filter.h"
+#include "prefix.h"
+#include "log.h"
+#include "memory.h"
+#include "stream.h"
+#include "hash.h"
+#include "jhash.h"
+#include "zclient.h"
+
+#include "lib/printfrr.h"
+
+#include "bgpd/bgp_attr_evpn.h"
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_mplsvpn.h"
+#include "bgpd/bgp_evpn.h"
+#include "bgpd/bgp_evpn_private.h"
+#include "bgpd/bgp_evpn_mh.h"
+#include "bgpd/bgp_ecommunity.h"
+#include "bgpd/bgp_encap_types.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_errors.h"
+#include "bgpd/bgp_aspath.h"
+#include "bgpd/bgp_zebra.h"
+#include "bgpd/bgp_addpath.h"
+#include "bgpd/bgp_label.h"
+#include "bgpd/bgp_nht.h"
+#include "bgpd/bgp_mpath.h"
+#include "bgpd/bgp_trace.h"
+
+static void bgp_evpn_local_es_down(struct bgp *bgp,
+ struct bgp_evpn_es *es);
+static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp,
+ struct bgp_evpn_es *es);
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
+ struct bgp_evpn_es *es,
+ struct in_addr vtep_ip,
+ bool esr, uint8_t df_alg,
+ uint16_t df_pref);
+static void bgp_evpn_es_vtep_del(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr);
+static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es);
+static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es);
+static struct bgp_evpn_es_evi *
+bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi);
+static uint32_t bgp_evpn_es_get_active_vtep_cnt(struct bgp_evpn_es *es);
+static void bgp_evpn_l3nhg_update_on_vtep_chg(struct bgp_evpn_es *es);
+static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi);
+static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller);
+static void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info);
+static void bgp_evpn_mac_update_on_es_local_chg(struct bgp_evpn_es *es,
+ bool is_local);
+
+esi_t zero_esi_buf, *zero_esi = &zero_esi_buf;
+static void bgp_evpn_run_consistency_checks(struct thread *t);
+static void bgp_evpn_path_nh_info_free(struct bgp_path_evpn_nh_info *nh_info);
+static void bgp_evpn_path_nh_unlink(struct bgp_path_evpn_nh_info *nh_info);
+
+/******************************************************************************
+ * per-ES (Ethernet Segment) routing table
+ *
+ * Following routes are added to the ES's routing table -
+ * 1. Local and remote ESR (Type-4)
+ * 2. Local EAD-per-ES (Type-1).
+ *
+ * Key for these routes is {ESI, VTEP-IP} so the path selection is practically
+ * a no-op i.e. all paths lead to same VTEP-IP (i.e. result in the same VTEP
+ * being added to same ES).
+ *
+ * Note the following routes go into the VNI routing table (instead of the
+ * ES routing table) -
+ * 1. Remote EAD-per-ES
+ * 2. Local and remote EAD-per-EVI
+ */
+
+/* Calculate the best path for a multi-homing (Type-1 or Type-4) route
+ * installed in the ES's routing table.
+ */
+static int bgp_evpn_es_route_select_install(struct bgp *bgp,
+ struct bgp_evpn_es *es,
+ struct bgp_dest *dest)
+{
+ int ret = 0;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp_path_info *old_select; /* old best */
+ struct bgp_path_info *new_select; /* new best */
+ struct bgp_path_info_pair old_and_new;
+
+ /* Compute the best path. */
+ bgp_best_selection(bgp, dest, &bgp->maxpaths[afi][safi], &old_and_new,
+ afi, safi);
+ old_select = old_and_new.old;
+ new_select = old_and_new.new;
+
+ /*
+ * If the best path hasn't changed - see if something needs to be
+ * updated
+ */
+ if (old_select && old_select == new_select
+ && old_select->type == ZEBRA_ROUTE_BGP
+ && old_select->sub_type == BGP_ROUTE_IMPORTED
+ && !CHECK_FLAG(dest->flags, BGP_NODE_USER_CLEAR)
+ && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
+ && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) {
+ if (bgp_zebra_has_route_changed(old_select)) {
+ bgp_evpn_es_vtep_add(bgp, es, old_select->attr->nexthop,
+ true /*esr*/,
+ old_select->attr->df_alg,
+ old_select->attr->df_pref);
+ }
+ UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG);
+ bgp_zebra_clear_route_change_flags(dest);
+ return ret;
+ }
+
+ /* If the user did a "clear" this flag will be set */
+ UNSET_FLAG(dest->flags, BGP_NODE_USER_CLEAR);
+
+ /* bestpath has changed; update relevant fields and install or uninstall
+ * into the zebra RIB.
+ */
+ if (old_select || new_select)
+ bgp_bump_version(dest);
+
+ if (old_select)
+ bgp_path_info_unset_flag(dest, old_select, BGP_PATH_SELECTED);
+ if (new_select) {
+ bgp_path_info_set_flag(dest, new_select, BGP_PATH_SELECTED);
+ bgp_path_info_unset_flag(dest, new_select,
+ BGP_PATH_ATTR_CHANGED);
+ UNSET_FLAG(new_select->flags, BGP_PATH_MULTIPATH_CHG);
+ }
+
+ if (new_select && new_select->type == ZEBRA_ROUTE_BGP
+ && new_select->sub_type == BGP_ROUTE_IMPORTED) {
+ bgp_evpn_es_vtep_add(bgp, es, new_select->attr->nexthop,
+ true /*esr */, new_select->attr->df_alg,
+ new_select->attr->df_pref);
+ } else {
+ if (old_select && old_select->type == ZEBRA_ROUTE_BGP
+ && old_select->sub_type == BGP_ROUTE_IMPORTED)
+ bgp_evpn_es_vtep_del(
+ bgp, es, old_select->attr->nexthop,
+ true /*esr*/);
+ }
+
+ /* Clear any route change flags. */
+ bgp_zebra_clear_route_change_flags(dest);
+
+ /* Reap old select bgp_path_info, if it has been removed */
+ if (old_select && CHECK_FLAG(old_select->flags, BGP_PATH_REMOVED))
+ bgp_path_info_reap(dest, old_select);
+
+ return ret;
+}
+
+/* Install Type-1/Type-4 route entry in the per-ES routing table */
+static int bgp_evpn_es_route_install(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p,
+ struct bgp_path_info *parent_pi)
+{
+ int ret = 0;
+ struct bgp_dest *dest = NULL;
+ struct bgp_path_info *pi = NULL;
+ struct attr *attr_new = NULL;
+
+ /* Create (or fetch) route within the VNI.
+ * NOTE: There is no RD here.
+ */
+ dest = bgp_node_get(es->route_table, (struct prefix *)p);
+
+ /* Check if route entry is already present. */
+ for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
+ if (pi->extra &&
+ (struct bgp_path_info *)pi->extra->parent == parent_pi)
+ break;
+
+ if (!pi) {
+ /* Add (or update) attribute to hash. */
+ attr_new = bgp_attr_intern(parent_pi->attr);
+
+ /* Create new route with its attribute. */
+ pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0,
+ parent_pi->peer, attr_new, dest);
+ SET_FLAG(pi->flags, BGP_PATH_VALID);
+ bgp_path_info_extra_get(pi);
+ pi->extra->parent = bgp_path_info_lock(parent_pi);
+ bgp_dest_lock_node((struct bgp_dest *)parent_pi->net);
+ bgp_path_info_add(dest, pi);
+ } else {
+ if (attrhash_cmp(pi->attr, parent_pi->attr)
+ && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
+ bgp_dest_unlock_node(dest);
+ return 0;
+ }
+ /* The attribute has changed. */
+ /* Add (or update) attribute to hash. */
+ attr_new = bgp_attr_intern(parent_pi->attr);
+
+ /* Restore route, if needed. */
+ if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED))
+ bgp_path_info_restore(dest, pi);
+
+ /* Mark if nexthop has changed. */
+ if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop))
+ SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED);
+
+ /* Unintern existing, set to new. */
+ bgp_attr_unintern(&pi->attr);
+ pi->attr = attr_new;
+ pi->uptime = monotime(NULL);
+ }
+
+ /* Perform route selection and update zebra, if required. */
+ ret = bgp_evpn_es_route_select_install(bgp, es, dest);
+
+ bgp_dest_unlock_node(dest);
+
+ return ret;
+}
+
+/* Uninstall Type-1/Type-4 route entry from the ES routing table */
+static int bgp_evpn_es_route_uninstall(struct bgp *bgp, struct bgp_evpn_es *es,
+ struct prefix_evpn *p, struct bgp_path_info *parent_pi)
+{
+ int ret;
+ struct bgp_dest *dest;
+ struct bgp_path_info *pi;
+
+ if (!es->route_table)
+ return 0;
+
+ /* Locate route within the ESI.
+ * NOTE: There is no RD here.
+ */
+ dest = bgp_node_lookup(es->route_table, (struct prefix *)p);
+ if (!dest)
+ return 0;
+
+ /* Find matching route entry. */
+ for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
+ if (pi->extra
+ && (struct bgp_path_info *)pi->extra->parent ==
+ parent_pi)
+ break;
+
+ if (!pi) {
+ bgp_dest_unlock_node(dest);
+ return 0;
+ }
+
+ /* Mark entry for deletion */
+ bgp_path_info_delete(dest, pi);
+
+ /* Perform route selection and update zebra, if required. */
+ ret = bgp_evpn_es_route_select_install(bgp, es, dest);
+
+ /* Unlock route node. */
+ bgp_dest_unlock_node(dest);
+
+ return ret;
+}
+
+/* Install or unistall a Type-4 route in the per-ES routing table */
+int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, struct bgp_evpn_es *es,
+ afi_t afi, safi_t safi, struct prefix_evpn *evp,
+ struct bgp_path_info *pi, int install)
+{
+ int ret = 0;
+
+ if (install)
+ ret = bgp_evpn_es_route_install(bgp, es, evp, pi);
+ else
+ ret = bgp_evpn_es_route_uninstall(bgp, es, evp, pi);
+
+ if (ret) {
+ flog_err(
+ EC_BGP_EVPN_FAIL,
+ "%u: Failed to %s EVPN %s route in ESI %s",
+ bgp->vrf_id,
+ install ? "install" : "uninstall",
+ "ES", es->esi_str);
+ return ret;
+ }
+ return 0;
+}
+
+/* Delete (and withdraw) local routes for specified ES from global and ES table.
+ * Also remove all remote routes from the per ES table. Invoked when ES
+ * is deleted.
+ */
+static void bgp_evpn_es_route_del_all(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ struct bgp_dest *dest;
+ struct bgp_path_info *pi, *nextpi;
+
+ /* de-activate the ES */
+ bgp_evpn_local_es_down(bgp, es);
+ bgp_evpn_local_type1_evi_route_del(bgp, es);
+
+ /* Walk this ES's routing table and delete all routes. */
+ for (dest = bgp_table_top(es->route_table); dest;
+ dest = bgp_route_next(dest)) {
+ for (pi = bgp_dest_get_bgp_path_info(dest);
+ (pi != NULL) && (nextpi = pi->next, 1); pi = nextpi) {
+ bgp_path_info_delete(dest, pi);
+ bgp_path_info_reap(dest, pi);
+ }
+ }
+}
+
+/*****************************************************************************
+ * Base APIs for creating MH routes (Type-1 or Type-4) on local ethernet
+ * segment updates.
+ */
+
+/* create or update local EVPN type1/type4 route entry.
+ *
+ * This could be in -
+ * the ES table if ESR/EAD-ES (or)
+ * the VNI table if EAD-EVI (or)
+ * the global table if ESR/EAD-ES/EAD-EVI
+ *
+ * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and
+ * ESR).
+ */
+int bgp_evpn_mh_route_update(struct bgp *bgp, struct bgp_evpn_es *es,
+ struct bgpevpn *vpn, afi_t afi, safi_t safi,
+ struct bgp_dest *dest, struct attr *attr,
+ struct bgp_path_info **ri, int *route_changed)
+{
+ struct bgp_path_info *tmp_pi = NULL;
+ struct bgp_path_info *local_pi = NULL; /* local route entry if any */
+ struct bgp_path_info *remote_pi = NULL; /* remote route entry if any */
+ struct attr *attr_new = NULL;
+ struct prefix_evpn *evp;
+
+ *ri = NULL;
+ evp = (struct prefix_evpn *)bgp_dest_get_prefix(dest);
+ *route_changed = 1;
+
+ /* locate the local and remote entries if any */
+ for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi;
+ tmp_pi = tmp_pi->next) {
+ if (tmp_pi->peer == bgp->peer_self
+ && tmp_pi->type == ZEBRA_ROUTE_BGP
+ && tmp_pi->sub_type == BGP_ROUTE_STATIC)
+ local_pi = tmp_pi;
+ if (tmp_pi->type == ZEBRA_ROUTE_BGP
+ && tmp_pi->sub_type == BGP_ROUTE_IMPORTED
+ && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID))
+ remote_pi = tmp_pi;
+ }
+
+ /* we don't expect to see a remote_pi at this point as
+ * an ES route has {esi, vtep_ip} as the key in the ES-rt-table
+ * in the VNI-rt-table.
+ */
+ if (remote_pi) {
+ flog_err(
+ EC_BGP_ES_INVALID,
+ "%u ERROR: local es route for ESI: %s vtep %pI4 also learnt from remote",
+ bgp->vrf_id, es ? es->esi_str : "Null",
+ es ? &es->originator_ip : NULL);
+ return -1;
+ }
+
+ /* create or update the entry */
+ if (!local_pi) {
+
+ /* Add or update attribute to hash */
+ attr_new = bgp_attr_intern(attr);
+
+ /* Create new route with its attribute. */
+ tmp_pi = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0,
+ bgp->peer_self, attr_new, dest);
+ SET_FLAG(tmp_pi->flags, BGP_PATH_VALID);
+
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) {
+ bgp_path_info_extra_get(tmp_pi);
+ tmp_pi->extra->num_labels = 1;
+ if (vpn)
+ vni2label(vpn->vni, &tmp_pi->extra->label[0]);
+ else
+ tmp_pi->extra->label[0] = 0;
+ }
+
+ /* add the newly created path to the route-node */
+ bgp_path_info_add(dest, tmp_pi);
+ } else {
+ tmp_pi = local_pi;
+ if (attrhash_cmp(tmp_pi->attr, attr)
+ && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED))
+ *route_changed = 0;
+ else {
+ /* The attribute has changed.
+ * Add (or update) attribute to hash.
+ */
+ attr_new = bgp_attr_intern(attr);
+ bgp_path_info_set_flag(dest, tmp_pi,
+ BGP_PATH_ATTR_CHANGED);
+
+ /* Restore route, if needed. */
+ if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED))
+ bgp_path_info_restore(dest, tmp_pi);
+
+ /* Unintern existing, set to new. */
+ bgp_attr_unintern(&tmp_pi->attr);
+ tmp_pi->attr = attr_new;
+ tmp_pi->uptime = monotime(NULL);
+ }
+ }
+
+ if (*route_changed) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug(
+ "local ES %s vni %u route-type %s nexthop %pI4 updated",
+ es ? es->esi_str : "Null", vpn ? vpn->vni : 0,
+ evp->prefix.route_type == BGP_EVPN_ES_ROUTE
+ ? "esr"
+ : (vpn ? "ead-evi" : "ead-es"),
+ &attr->mp_nexthop_global_in);
+ }
+
+ /* Return back the route entry. */
+ *ri = tmp_pi;
+ return 0;
+}
+
+/* Delete local EVPN ESR (type-4) and EAD (type-1) route
+ *
+ * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and
+ * ESR).
+ */
+static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es,
+ struct bgpevpn *vpn,
+ struct bgp_evpn_es_frag *es_frag,
+ struct prefix_evpn *p)
+{
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp_path_info *pi;
+ struct bgp_dest *dest = NULL; /* dest in esi table */
+ struct bgp_dest *global_dest = NULL; /* dest in global table */
+ struct bgp_table *rt_table;
+ struct prefix_rd *prd;
+
+ if (vpn) {
+ rt_table = vpn->route_table;
+ prd = &vpn->prd;
+ } else {
+ rt_table = es->route_table;
+ prd = &es_frag->prd;
+ }
+
+ /* First, locate the route node within the ESI or VNI.
+ * If it doesn't exist, ther is nothing to do.
+ * Note: there is no RD here.
+ */
+ dest = bgp_node_lookup(rt_table, (struct prefix *)p);
+ if (!dest)
+ return 0;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug(
+ "local ES %s vni %u route-type %s nexthop %pI4 delete",
+ es->esi_str, vpn ? vpn->vni : 0,
+ p->prefix.route_type == BGP_EVPN_ES_ROUTE
+ ? "esr"
+ : (vpn ? "ead-evi" : "ead-es"),
+ &es->originator_ip);
+
+ /* Next, locate route node in the global EVPN routing table.
+ * Note that this table is a 2-level tree (RD-level + Prefix-level)
+ */
+ global_dest =
+ bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)p, prd);
+ if (global_dest) {
+
+ /* Delete route entry in the global EVPN table. */
+ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi);
+
+ /* Schedule for processing - withdraws to peers happen from
+ * this table.
+ */
+ if (pi)
+ bgp_process(bgp, global_dest, afi, safi);
+ bgp_dest_unlock_node(global_dest);
+ }
+
+ /*
+ * Delete route entry in the ESI or VNI routing table.
+ * This can just be removed.
+ */
+ delete_evpn_route_entry(bgp, afi, safi, dest, &pi);
+ if (pi)
+ bgp_path_info_reap(dest, pi);
+ bgp_dest_unlock_node(dest);
+ return 0;
+}
+
+/*
+ * This function is called when the VNI RD changes.
+ * Delete all EAD/EVI local routes for this VNI from the global routing table.
+ * These routes are scheduled for withdraw from peers.
+ */
+int delete_global_ead_evi_routes(struct bgp *bgp, struct bgpevpn *vpn)
+{
+ afi_t afi;
+ safi_t safi;
+ struct bgp_dest *rdrn, *rn;
+ struct bgp_table *table;
+ struct bgp_path_info *pi;
+
+ afi = AFI_L2VPN;
+ safi = SAFI_EVPN;
+
+ /* Find the RD node for the VNI in the global table */
+ rdrn = bgp_node_lookup(bgp->rib[afi][safi], (struct prefix *)&vpn->prd);
+ if (rdrn && bgp_dest_has_bgp_path_info_data(rdrn)) {
+ table = bgp_dest_get_bgp_table_info(rdrn);
+
+ /*
+ * Iterate over all the routes in this table and delete EAD/EVI
+ * routes
+ */
+ for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
+ struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
+
+ if (evp->prefix.route_type != BGP_EVPN_AD_ROUTE)
+ continue;
+
+ delete_evpn_route_entry(bgp, afi, safi, rn, &pi);
+ if (pi)
+ bgp_process(bgp, rn, afi, safi);
+ }
+ }
+
+ /* Unlock RD node. */
+ if (rdrn)
+ bgp_dest_unlock_node(rdrn);
+
+ return 0;
+}
+
+/*****************************************************************************
+ * Ethernet Segment (Type-4) Routes
+ * ESRs are used for DF election. Currently service-carving described in
+ * RFC 7432 is NOT supported. Instead preference based DF election is
+ * used by default.
+ * Reference: draft-ietf-bess-evpn-pref-df
+ */
+/* Build extended community for EVPN ES (type-4) route */
+static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es,
+ struct attr *attr)
+{
+ struct ecommunity ecom_encap;
+ struct ecommunity ecom_es_rt;
+ struct ecommunity ecom_df;
+ struct ecommunity_val eval;
+ struct ecommunity_val eval_es_rt;
+ struct ecommunity_val eval_df;
+ bgp_encap_types tnl_type;
+ struct ethaddr mac;
+
+ /* Encap */
+ tnl_type = BGP_ENCAP_TYPE_VXLAN;
+ memset(&ecom_encap, 0, sizeof(ecom_encap));
+ encode_encap_extcomm(tnl_type, &eval);
+ ecom_encap.size = 1;
+ ecom_encap.unit_size = ECOMMUNITY_SIZE;
+ ecom_encap.val = (uint8_t *)eval.val;
+ bgp_attr_set_ecommunity(attr, ecommunity_dup(&ecom_encap));
+
+ /* ES import RT */
+ memset(&mac, 0, sizeof(mac));
+ memset(&ecom_es_rt, 0, sizeof(ecom_es_rt));
+ es_get_system_mac(&es->esi, &mac);
+ encode_es_rt_extcomm(&eval_es_rt, &mac);
+ ecom_es_rt.size = 1;
+ ecom_es_rt.unit_size = ECOMMUNITY_SIZE;
+ ecom_es_rt.val = (uint8_t *)eval_es_rt.val;
+ bgp_attr_set_ecommunity(
+ attr,
+ ecommunity_merge(bgp_attr_get_ecommunity(attr), &ecom_es_rt));
+
+ /* DF election extended community */
+ memset(&ecom_df, 0, sizeof(ecom_df));
+ encode_df_elect_extcomm(&eval_df, es->df_pref);
+ ecom_df.size = 1;
+ ecom_df.val = (uint8_t *)eval_df.val;
+ bgp_attr_set_ecommunity(
+ attr,
+ ecommunity_merge(bgp_attr_get_ecommunity(attr), &ecom_df));
+}
+
+/* Create or update local type-4 route */
+static int bgp_evpn_type4_route_update(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p)
+{
+ int ret = 0;
+ int route_changed = 0;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct attr attr;
+ struct attr *attr_new = NULL;
+ struct bgp_dest *dest = NULL;
+ struct bgp_path_info *pi = NULL;
+
+ memset(&attr, 0, sizeof(attr));
+
+ /* Build path-attribute for this route. */
+ bgp_attr_default_set(&attr, bgp, BGP_ORIGIN_IGP);
+ attr.nexthop = es->originator_ip;
+ attr.mp_nexthop_global_in = es->originator_ip;
+ attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+
+ /* Set up extended community. */
+ bgp_evpn_type4_route_extcomm_build(es, &attr);
+
+ /* First, create (or fetch) route node within the ESI. */
+ /* NOTE: There is no RD here. */
+ dest = bgp_node_get(es->route_table, (struct prefix *)p);
+
+ /* Create or update route entry. */
+ ret = bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, dest, &attr,
+ &pi, &route_changed);
+ if (ret != 0)
+ flog_err(
+ EC_BGP_ES_INVALID,
+ "%u ERROR: Failed to updated ES route ESI: %s VTEP %pI4",
+ bgp->vrf_id, es->esi_str, &es->originator_ip);
+
+ assert(pi);
+ attr_new = pi->attr;
+
+ /* Perform route selection;
+ * this is just to set the flags correctly
+ * as local route in the ES always wins.
+ */
+ bgp_evpn_es_route_select_install(bgp, es, dest);
+ bgp_dest_unlock_node(dest);
+
+ /* If this is a new route or some attribute has changed, export the
+ * route to the global table. The route will be advertised to peers
+ * from there. Note that this table is a 2-level tree (RD-level +
+ * Prefix-level) similar to L3VPN routes.
+ */
+ if (route_changed) {
+ struct bgp_path_info *global_pi;
+
+ dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
+ p, &es->es_base_frag->prd);
+ bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, dest,
+ attr_new, &global_pi, &route_changed);
+
+ /* Schedule for processing and unlock node. */
+ bgp_process(bgp, dest, afi, safi);
+ bgp_dest_unlock_node(dest);
+ }
+
+ /* Unintern temporary. */
+ aspath_unintern(&attr.aspath);
+ return 0;
+}
+
+/* Delete local type-4 route */
+static int bgp_evpn_type4_route_delete(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p)
+{
+ if (!es->es_base_frag)
+ return -1;
+
+ return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */,
+ es->es_base_frag, p);
+}
+
+/* Process remote/received EVPN type-4 route (advertise or withdraw) */
+int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi,
+ struct attr *attr, uint8_t *pfx, int psize,
+ uint32_t addpath_id)
+{
+ int ret;
+ esi_t esi;
+ uint8_t ipaddr_len;
+ struct in_addr vtep_ip;
+ struct prefix_rd prd;
+ struct prefix_evpn p;
+
+ /* Type-4 route should be either 23 or 35 bytes
+ * RD (8), ESI (10), ip-len (1), ip (4 or 16)
+ */
+ if (psize != BGP_EVPN_TYPE4_V4_PSIZE &&
+ psize != BGP_EVPN_TYPE4_V6_PSIZE) {
+ flog_err(EC_BGP_EVPN_ROUTE_INVALID,
+ "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d",
+ peer->bgp->vrf_id, peer->host, psize);
+ return -1;
+ }
+
+ /* Make prefix_rd */
+ prd.family = AF_UNSPEC;
+ prd.prefixlen = 64;
+ memcpy(&prd.val, pfx, RD_BYTES);
+ pfx += RD_BYTES;
+
+ /* get the ESI */
+ memcpy(&esi, pfx, ESI_BYTES);
+ pfx += ESI_BYTES;
+
+
+ /* Get the IP. */
+ ipaddr_len = *pfx++;
+ if (ipaddr_len == IPV4_MAX_BITLEN) {
+ memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN);
+ } else {
+ flog_err(
+ EC_BGP_EVPN_ROUTE_INVALID,
+ "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d",
+ peer->bgp->vrf_id, peer->host, ipaddr_len);
+ return -1;
+ }
+
+ build_evpn_type4_prefix(&p, &esi, vtep_ip);
+ /* Process the route. */
+ if (attr) {
+ ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, 0, NULL);
+ } else {
+ ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, NULL);
+ }
+ return ret;
+}
+
+/* Check if a prefix belongs to the local ES */
+static bool bgp_evpn_type4_prefix_match(struct prefix_evpn *p,
+ struct bgp_evpn_es *es)
+{
+ return (p->prefix.route_type == BGP_EVPN_ES_ROUTE) &&
+ !memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t));
+}
+
+/* Import remote ESRs on local ethernet segment add */
+static int bgp_evpn_type4_remote_routes_import(struct bgp *bgp,
+ struct bgp_evpn_es *es, bool install)
+{
+ int ret;
+ afi_t afi;
+ safi_t safi;
+ struct bgp_dest *rd_dest, *dest;
+ struct bgp_table *table;
+ struct bgp_path_info *pi;
+
+ afi = AFI_L2VPN;
+ safi = SAFI_EVPN;
+
+ /* Walk entire global routing table and evaluate routes which could be
+ * imported into this Ethernet Segment.
+ */
+ for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest;
+ rd_dest = bgp_route_next(rd_dest)) {
+ table = bgp_dest_get_bgp_table_info(rd_dest);
+ if (!table)
+ continue;
+
+ for (dest = bgp_table_top(table); dest;
+ dest = bgp_route_next(dest)) {
+ struct prefix_evpn *evp =
+ (struct prefix_evpn *)bgp_dest_get_prefix(dest);
+
+ for (pi = bgp_dest_get_bgp_path_info(dest); pi;
+ pi = pi->next) {
+ /*
+ * Consider "valid" remote routes applicable for
+ * this ES.
+ */
+ if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID)
+ && pi->type == ZEBRA_ROUTE_BGP
+ && pi->sub_type == BGP_ROUTE_NORMAL))
+ continue;
+
+ if (!bgp_evpn_type4_prefix_match(evp, es))
+ continue;
+
+ if (install)
+ ret = bgp_evpn_es_route_install(
+ bgp, es, evp, pi);
+ else
+ ret = bgp_evpn_es_route_uninstall(
+ bgp, es, evp, pi);
+
+ if (ret) {
+ flog_err(
+ EC_BGP_EVPN_FAIL,
+ "Failed to %s EVPN %pFX route in ESI %s",
+ install ? "install"
+ : "uninstall",
+ evp, es->esi_str);
+
+ bgp_dest_unlock_node(rd_dest);
+ bgp_dest_unlock_node(dest);
+ return ret;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/*****************************************************************************
+ * Ethernet Auto Discovery (EAD/Type-1) route handling
+ * There are two types of EAD routes -
+ * 1. EAD-per-ES - Key: {ESI, ET=0xffffffff}
+ * 2. EAD-per-EVI - Key: {ESI, ET=0}
+ */
+
+/* Extended communities associated with EAD-per-ES */
+static void
+bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es_frag *es_frag,
+ struct attr *attr)
+{
+ struct ecommunity ecom_encap;
+ struct ecommunity ecom_esi_label;
+ struct ecommunity_val eval;
+ struct ecommunity_val eval_esi_label;
+ bgp_encap_types tnl_type;
+ struct listnode *evi_node, *rt_node;
+ struct ecommunity *ecom;
+ struct bgp_evpn_es_evi *es_evi;
+
+ /* Encap */
+ tnl_type = BGP_ENCAP_TYPE_VXLAN;
+ memset(&ecom_encap, 0, sizeof(ecom_encap));
+ encode_encap_extcomm(tnl_type, &eval);
+ ecom_encap.size = 1;
+ ecom_encap.unit_size = ECOMMUNITY_SIZE;
+ ecom_encap.val = (uint8_t *)eval.val;
+ bgp_attr_set_ecommunity(attr, ecommunity_dup(&ecom_encap));
+
+ /* ESI label */
+ encode_esi_label_extcomm(&eval_esi_label,
+ false /*single_active*/);
+ ecom_esi_label.size = 1;
+ ecom_esi_label.unit_size = ECOMMUNITY_SIZE;
+ ecom_esi_label.val = (uint8_t *)eval_esi_label.val;
+ bgp_attr_set_ecommunity(attr,
+ ecommunity_merge(bgp_attr_get_ecommunity(attr),
+ &ecom_esi_label));
+
+ /* Add export RTs for all L2-VNIs associated with this ES */
+ /* XXX - suppress EAD-ES advertisment if there are no EVIs associated
+ * with it.
+ */
+ if (listcount(bgp_mh_info->ead_es_export_rtl)) {
+ for (ALL_LIST_ELEMENTS_RO(bgp_mh_info->ead_es_export_rtl,
+ rt_node, ecom))
+ bgp_attr_set_ecommunity(
+ attr, ecommunity_merge(attr->ecommunity, ecom));
+ } else {
+ for (ALL_LIST_ELEMENTS_RO(es_frag->es_evi_frag_list, evi_node,
+ es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ continue;
+ for (ALL_LIST_ELEMENTS_RO(es_evi->vpn->export_rtl,
+ rt_node, ecom))
+ bgp_attr_set_ecommunity(
+ attr, ecommunity_merge(attr->ecommunity,
+ ecom));
+ }
+ }
+}
+
+/* Extended communities associated with EAD-per-EVI */
+static void bgp_evpn_type1_evi_route_extcomm_build(struct bgp_evpn_es *es,
+ struct bgpevpn *vpn, struct attr *attr)
+{
+ struct ecommunity ecom_encap;
+ struct ecommunity_val eval;
+ bgp_encap_types tnl_type;
+ struct listnode *rt_node;
+ struct ecommunity *ecom;
+
+ /* Encap */
+ tnl_type = BGP_ENCAP_TYPE_VXLAN;
+ memset(&ecom_encap, 0, sizeof(ecom_encap));
+ encode_encap_extcomm(tnl_type, &eval);
+ ecom_encap.size = 1;
+ ecom_encap.unit_size = ECOMMUNITY_SIZE;
+ ecom_encap.val = (uint8_t *)eval.val;
+ bgp_attr_set_ecommunity(attr, ecommunity_dup(&ecom_encap));
+
+ /* Add export RTs for the L2-VNI */
+ for (ALL_LIST_ELEMENTS_RO(vpn->export_rtl, rt_node, ecom))
+ bgp_attr_set_ecommunity(
+ attr,
+ ecommunity_merge(bgp_attr_get_ecommunity(attr), ecom));
+}
+
+/* Update EVPN EAD (type-1) route -
+ * vpn - valid for EAD-EVI routes and NULL for EAD-ES routes
+ */
+static int bgp_evpn_type1_route_update(struct bgp *bgp, struct bgp_evpn_es *es,
+ struct bgpevpn *vpn,
+ struct bgp_evpn_es_frag *es_frag,
+ struct prefix_evpn *p)
+{
+ int ret = 0;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct attr attr;
+ struct attr *attr_new = NULL;
+ struct bgp_dest *dest = NULL;
+ struct bgp_path_info *pi = NULL;
+ int route_changed = 0;
+ struct prefix_rd *global_rd;
+
+ memset(&attr, 0, sizeof(attr));
+
+ /* Build path-attribute for this route. */
+ bgp_attr_default_set(&attr, bgp, BGP_ORIGIN_IGP);
+ attr.nexthop = es->originator_ip;
+ attr.mp_nexthop_global_in = es->originator_ip;
+ attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+
+ if (vpn) {
+ /* EAD-EVI route update */
+ /* MPLS label */
+ vni2label(vpn->vni, &(attr.label));
+
+ /* Set up extended community */
+ bgp_evpn_type1_evi_route_extcomm_build(es, vpn, &attr);
+
+ /* First, create (or fetch) route node within the VNI. */
+ dest = bgp_node_get(vpn->route_table, (struct prefix *)p);
+
+ /* Create or update route entry. */
+ ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, dest,
+ &attr, &pi, &route_changed);
+ if (ret != 0)
+ flog_err(
+ EC_BGP_ES_INVALID,
+ "%u Failed to update EAD-EVI route ESI: %s VNI %u VTEP %pI4",
+ bgp->vrf_id, es->esi_str, vpn->vni,
+ &es->originator_ip);
+ global_rd = &vpn->prd;
+ } else {
+ /* EAD-ES route update */
+ /* MPLS label is 0 for EAD-ES route */
+
+ /* Set up extended community */
+ bgp_evpn_type1_es_route_extcomm_build(es_frag, &attr);
+
+ /* First, create (or fetch) route node within the ES. */
+ /* NOTE: There is no RD here. */
+ /* XXX: fragment ID must be included as a part of the prefix. */
+ dest = bgp_node_get(es->route_table, (struct prefix *)p);
+
+ /* Create or update route entry. */
+ ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, dest,
+ &attr, &pi, &route_changed);
+ if (ret != 0) {
+ flog_err(
+ EC_BGP_ES_INVALID,
+ "%u ERROR: Failed to updated EAD-ES route ESI: %s VTEP %pI4",
+ bgp->vrf_id, es->esi_str, &es->originator_ip);
+ }
+ global_rd = &es_frag->prd;
+ }
+
+
+ assert(pi);
+ attr_new = pi->attr;
+
+ /* Perform route selection;
+ * this is just to set the flags correctly as local route in
+ * the ES always wins.
+ */
+ evpn_route_select_install(bgp, vpn, dest);
+ bgp_dest_unlock_node(dest);
+
+ /* If this is a new route or some attribute has changed, export the
+ * route to the global table. The route will be advertised to peers
+ * from there. Note that this table is a 2-level tree (RD-level +
+ * Prefix-level) similar to L3VPN routes.
+ */
+ if (route_changed) {
+ struct bgp_path_info *global_pi;
+
+ dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
+ p, global_rd);
+ bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, dest,
+ attr_new, &global_pi, &route_changed);
+
+ /* Schedule for processing and unlock node. */
+ bgp_process(bgp, dest, afi, safi);
+ bgp_dest_unlock_node(dest);
+ }
+
+ /* Unintern temporary. */
+ aspath_unintern(&attr.aspath);
+ return 0;
+}
+
+/*
+ * This function is called when the export RT for a VNI changes.
+ * Update all type-1 local routes for this VNI from VNI/ES tables and the global
+ * table and advertise these routes to peers.
+ */
+
+static void bgp_evpn_ead_es_route_update(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_frag *es_frag;
+ struct prefix_evpn p;
+
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+ es->originator_ip);
+ for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+ if (!listcount(es_frag->es_evi_frag_list))
+ continue;
+
+ p.prefix.ead_addr.frag_id = es_frag->rd_id;
+ if (bgp_evpn_type1_route_update(bgp, es, NULL, es_frag, &p))
+ flog_err(
+ EC_BGP_EVPN_ROUTE_CREATE,
+ "EAD-ES route creation failure for ESI %s frag %u",
+ es->esi_str, es_frag->rd_id);
+ }
+}
+
+static void bgp_evpn_ead_evi_route_update(struct bgp *bgp,
+ struct bgp_evpn_es *es,
+ struct bgpevpn *vpn,
+ struct prefix_evpn *p)
+{
+ if (bgp_evpn_type1_route_update(bgp, es, vpn, NULL, p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "EAD-EVI route creation failure for ESI %s VNI %u",
+ es->esi_str, vpn->vni);
+}
+
+void update_type1_routes_for_evi(struct bgp *bgp, struct bgpevpn *vpn)
+{
+ struct prefix_evpn p;
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+
+
+ RB_FOREACH (es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) {
+ es = es_evi->es;
+
+ if (es_evi->vpn != vpn)
+ continue;
+
+ /* Update EAD-ES */
+ bgp_evpn_ead_es_route_update(bgp, es);
+
+ /* Update EAD-EVI */
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+ bgp_evpn_ead_evi_route_update(bgp, es, vpn, &p);
+ }
+ }
+}
+
+/* Delete local Type-1 route */
+static void bgp_evpn_ead_es_route_delete(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_frag *es_frag;
+ struct prefix_evpn p;
+
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+ es->originator_ip);
+ for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+ p.prefix.ead_addr.frag_id = es_frag->rd_id;
+ bgp_evpn_mh_route_delete(bgp, es, NULL, es_frag, &p);
+ }
+}
+
+static int bgp_evpn_ead_evi_route_delete(struct bgp *bgp,
+ struct bgp_evpn_es *es,
+ struct bgpevpn *vpn,
+ struct prefix_evpn *p)
+{
+ return bgp_evpn_mh_route_delete(bgp, es, vpn, NULL, p);
+}
+
+/* Generate EAD-EVI for all VNIs */
+static void bgp_evpn_local_type1_evi_route_add(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *evi_node;
+ struct prefix_evpn p;
+ struct bgp_evpn_es_evi *es_evi;
+
+ /* EAD-per-EVI routes have been suppressed */
+ if (!bgp_mh_info->ead_evi_tx)
+ return;
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI))
+ /* EAD-EVI route add for this ES is already done */
+ return;
+
+ SET_FLAG(es->flags, BGP_EVPNES_ADV_EVI);
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ continue;
+ bgp_evpn_ead_evi_route_update(bgp, es, es_evi->vpn, &p);
+ }
+}
+
+/*
+ * Withdraw EAD-EVI for all VNIs
+ */
+static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *evi_node;
+ struct prefix_evpn p;
+ struct bgp_evpn_es_evi *es_evi;
+
+ /* Delete and withdraw locally learnt EAD-EVI route */
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI))
+ /* EAD-EVI route has not been advertised for this ES */
+ return;
+
+ UNSET_FLAG(es->flags, BGP_EVPNES_ADV_EVI);
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ continue;
+ if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, NULL, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: Type4 route creation failure for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+}
+
+/*
+ * Process received EVPN type-1 route (advertise or withdraw).
+ */
+int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi,
+ struct attr *attr, uint8_t *pfx, int psize,
+ uint32_t addpath_id)
+{
+ int ret;
+ struct prefix_rd prd;
+ esi_t esi;
+ uint32_t eth_tag;
+ mpls_label_t label;
+ struct in_addr vtep_ip;
+ struct prefix_evpn p;
+
+ if (psize != BGP_EVPN_TYPE1_PSIZE) {
+ flog_err(EC_BGP_EVPN_ROUTE_INVALID,
+ "%u:%s - Rx EVPN Type-1 NLRI with invalid length %d",
+ peer->bgp->vrf_id, peer->host, psize);
+ return -1;
+ }
+
+ /* Make prefix_rd */
+ prd.family = AF_UNSPEC;
+ prd.prefixlen = 64;
+ memcpy(&prd.val, pfx, RD_BYTES);
+ pfx += RD_BYTES;
+
+ /* get the ESI */
+ memcpy(&esi, pfx, ESI_BYTES);
+ pfx += ESI_BYTES;
+
+ /* Copy Ethernet Tag */
+ memcpy(&eth_tag, pfx, EVPN_ETH_TAG_BYTES);
+ eth_tag = ntohl(eth_tag);
+ pfx += EVPN_ETH_TAG_BYTES;
+
+ memcpy(&label, pfx, BGP_LABEL_BYTES);
+
+ /* EAD route prefix doesn't include the nexthop in the global
+ * table
+ */
+ vtep_ip.s_addr = INADDR_ANY;
+ build_evpn_type1_prefix(&p, eth_tag, &esi, vtep_ip);
+ /* Process the route. */
+ if (attr) {
+ ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, 0, NULL);
+ } else {
+ ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, NULL);
+ }
+ return ret;
+}
+
+void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp,
+ struct ecommunity *ecomcfg, bool del)
+{
+ struct listnode *node, *nnode, *node_to_del;
+ struct ecommunity *ecom;
+ struct bgp_evpn_es *es;
+
+ if (del) {
+ if (ecomcfg == NULL) {
+ /* Reset to default and process all routes. */
+ for (ALL_LIST_ELEMENTS(bgp_mh_info->ead_es_export_rtl,
+ node, nnode, ecom)) {
+ ecommunity_free(&ecom);
+ list_delete_node(bgp_mh_info->ead_es_export_rtl,
+ node);
+ }
+ }
+
+ /* Delete a specific export RT */
+ else {
+ node_to_del = NULL;
+
+ for (ALL_LIST_ELEMENTS(bgp_mh_info->ead_es_export_rtl,
+ node, nnode, ecom)) {
+ if (ecommunity_match(ecom, ecomcfg)) {
+ ecommunity_free(&ecom);
+ node_to_del = node;
+ break;
+ }
+ }
+
+ assert(node_to_del);
+ list_delete_node(bgp_mh_info->ead_es_export_rtl,
+ node_to_del);
+ }
+ } else {
+ listnode_add_sort(bgp_mh_info->ead_es_export_rtl, ecomcfg);
+ }
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("local ES del/re-add EAD route on export RT change");
+ /*
+ * walk through all active ESs withdraw the old EAD and
+ * generate a new one
+ */
+ RB_FOREACH (es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) {
+ if (!bgp_evpn_is_es_local(es) ||
+ !bgp_evpn_local_es_is_active(es))
+ continue;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug(
+ "local ES %s del/re-add EAD route on export RT change",
+ es->esi_str);
+
+ /*
+ * withdraw EAD-ES. XXX - this should technically not be
+ * needed; can be removed after testing
+ */
+ bgp_evpn_ead_es_route_delete(bgp, es);
+
+ /* generate EAD-ES */
+ bgp_evpn_ead_es_route_update(bgp, es);
+ }
+}
+
+/*****************************************************************************/
+/* Ethernet Segment Management
+ * 1. Ethernet Segment is a collection of links attached to the same
+ * server (MHD) or switch (MHN)
+ * 2. An Ethernet Segment can span multiple PEs and is identified by the
+ * 10-byte ES-ID.
+ * 3. Local ESs are configured in zebra and sent to BGP
+ * 4. Remote ESs are created by BGP when one or more ES-EVIs reference it i.e.
+ * created on first reference and release on last de-reference
+ * 5. An ES can be both local and remote. Infact most local ESs are expected
+ * to have an ES peer.
+ */
+
+/* A list of remote VTEPs is maintained for each ES. This list includes -
+ * 1. VTEPs for which we have imported the ESR i.e. ES-peers
+ * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI
+ * have been imported into one or more VNIs
+ */
+static int bgp_evpn_es_vtep_cmp(void *p1, void *p2)
+{
+ const struct bgp_evpn_es_vtep *es_vtep1 = p1;
+ const struct bgp_evpn_es_vtep *es_vtep2 = p2;
+
+ return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr;
+}
+
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_new(struct bgp_evpn_es *es,
+ struct in_addr vtep_ip)
+{
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ es_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(*es_vtep));
+
+ es_vtep->es = es;
+ es_vtep->vtep_ip.s_addr = vtep_ip.s_addr;
+ inet_ntop(AF_INET, &es_vtep->vtep_ip, es_vtep->vtep_str,
+ sizeof(es_vtep->vtep_str));
+ listnode_init(&es_vtep->es_listnode, es_vtep);
+ listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode);
+
+ return es_vtep;
+}
+
+static void bgp_evpn_es_vtep_free(struct bgp_evpn_es_vtep *es_vtep)
+{
+ struct bgp_evpn_es *es = es_vtep->es;
+
+ if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR) ||
+ es_vtep->evi_cnt)
+ /* as long as there is some reference we can't free it */
+ return;
+
+ list_delete_node(es->es_vtep_list, &es_vtep->es_listnode);
+ XFREE(MTYPE_BGP_EVPN_ES_VTEP, es_vtep);
+}
+
+/* check if VTEP is already part of the list */
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_find(struct bgp_evpn_es *es,
+ struct in_addr vtep_ip)
+{
+ struct listnode *node = NULL;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr)
+ return es_vtep;
+ }
+ return NULL;
+}
+
+/* Send the remote ES to zebra for NHG programming */
+static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp,
+ struct bgp_evpn_es_vtep *es_vtep, bool add)
+{
+ struct bgp_evpn_es *es = es_vtep->es;
+ struct stream *s;
+ uint32_t flags = 0;
+
+ /* Check socket. */
+ if (!zclient || zclient->sock < 0)
+ return 0;
+
+ /* Don't try to register if Zebra doesn't know of this instance. */
+ if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) {
+ if (BGP_DEBUG(zebra, ZEBRA))
+ zlog_debug("No zebra instance, not installing remote es %s",
+ es->esi_str);
+ return 0;
+ }
+
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ flags |= ZAPI_ES_VTEP_FLAG_ESR_RXED;
+
+ s = zclient->obuf;
+ stream_reset(s);
+
+ zclient_create_header(s,
+ add ? ZEBRA_REMOTE_ES_VTEP_ADD : ZEBRA_REMOTE_ES_VTEP_DEL,
+ bgp->vrf_id);
+ stream_put(s, &es->esi, sizeof(esi_t));
+ stream_put_ipv4(s, es_vtep->vtep_ip.s_addr);
+ if (add) {
+ stream_putl(s, flags);
+ stream_putc(s, es_vtep->df_alg);
+ stream_putw(s, es_vtep->df_pref);
+ }
+
+ stream_putw_at(s, 0, stream_get_endp(s));
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("Tx %s Remote ESI %s VTEP %pI4", add ? "ADD" : "DEL",
+ es->esi_str, &es_vtep->vtep_ip);
+
+ frrtrace(3, frr_bgp, evpn_mh_vtep_zsend, add, es, es_vtep);
+
+ return zclient_send_message(zclient);
+}
+
+static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp,
+ struct bgp_evpn_es_vtep *es_vtep,
+ bool param_change)
+{
+ bool old_active;
+ bool new_active;
+
+ old_active = CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+ /* currently we need an active EVI reference to use the VTEP as
+ * a nexthop. this may change...
+ */
+ if (es_vtep->evi_cnt)
+ SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+ else
+ UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+
+ new_active = CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+
+ if ((old_active != new_active) || (new_active && param_change)) {
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vtep %pI4 %s df %u/%u",
+ es_vtep->es->esi_str, &es_vtep->vtep_ip,
+ new_active ? "active" : "inactive",
+ es_vtep->df_alg, es_vtep->df_pref);
+
+ /* send remote ES to zebra */
+ bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active);
+
+ /* The NHG is updated first for efficient failover handling.
+ * Note the NHG can be de-activated while there are bgp
+ * routes referencing it. Zebra is capable of handling that
+ * elegantly by holding the NHG till all routes using it are
+ * removed.
+ */
+ bgp_evpn_l3nhg_update_on_vtep_chg(es_vtep->es);
+ /* queue up the es for background consistency checks */
+ bgp_evpn_es_cons_checks_pend_add(es_vtep->es);
+ }
+}
+
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
+ struct bgp_evpn_es *es,
+ struct in_addr vtep_ip,
+ bool esr, uint8_t df_alg,
+ uint16_t df_pref)
+{
+ struct bgp_evpn_es_vtep *es_vtep;
+ bool param_change = false;
+
+ es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip);
+
+ if (!es_vtep)
+ es_vtep = bgp_evpn_es_vtep_new(es, vtep_ip);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vtep %pI4 add %s df %u/%u",
+ es_vtep->es->esi_str, &es_vtep->vtep_ip,
+ esr ? "esr" : "ead", df_alg, df_pref);
+
+ if (esr) {
+ SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR);
+ if ((es_vtep->df_pref != df_pref)
+ || (es_vtep->df_alg != df_alg)) {
+ param_change = true;
+ es_vtep->df_pref = df_pref;
+ es_vtep->df_alg = df_alg;
+ }
+ } else {
+ ++es_vtep->evi_cnt;
+ }
+
+ bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change);
+
+ return es_vtep;
+}
+
+static void bgp_evpn_es_vtep_do_del(struct bgp *bgp,
+ struct bgp_evpn_es_vtep *es_vtep, bool esr)
+{
+ bool param_change = false;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vtep %pI4 del %s", es_vtep->es->esi_str,
+ &es_vtep->vtep_ip, esr ? "esr" : "ead");
+ if (esr) {
+ UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR);
+ if (es_vtep->df_pref || es_vtep->df_alg) {
+ param_change = true;
+ es_vtep->df_pref = 0;
+ es_vtep->df_alg = 0;
+ }
+ } else {
+ if (es_vtep->evi_cnt)
+ --es_vtep->evi_cnt;
+ }
+
+ bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change);
+ bgp_evpn_es_vtep_free(es_vtep);
+}
+
+static void bgp_evpn_es_vtep_del(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr)
+{
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip);
+ if (es_vtep)
+ bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr);
+}
+
+/********************** ES MAC-IP paths *************************************
+ * 1. Local MAC-IP routes in the VNI routing table are linked to the
+ * destination ES (macip_evi_path_list) for efficient updates on ES oper
+ * state changes.
+ * 2. Non-local MAC-IP routes in the global routing table are linked to
+ * the detination for efficient updates on -
+ * a. VTEP add/del - this results in a L3NHG update.
+ * b. ES-VRF add/del - this may result in the host route being migrated to
+ * L3NHG or vice versa (flat multipath list).
+ ****************************************************************************/
+static void bgp_evpn_path_es_info_free(struct bgp_path_es_info *es_info)
+{
+ bgp_evpn_path_es_unlink(es_info);
+ XFREE(MTYPE_BGP_EVPN_PATH_ES_INFO, es_info);
+}
+
+void bgp_evpn_path_mh_info_free(struct bgp_path_mh_info *mh_info)
+{
+ if (mh_info->es_info)
+ bgp_evpn_path_es_info_free(mh_info->es_info);
+ if (mh_info->nh_info)
+ bgp_evpn_path_nh_info_free(mh_info->nh_info);
+ XFREE(MTYPE_BGP_EVPN_PATH_MH_INFO, mh_info);
+}
+
+static struct bgp_path_es_info *
+bgp_evpn_path_es_info_new(struct bgp_path_info *pi, vni_t vni)
+{
+ struct bgp_path_info_extra *e;
+ struct bgp_path_mh_info *mh_info;
+ struct bgp_path_es_info *es_info;
+
+ e = bgp_path_info_extra_get(pi);
+
+ /* If mh_info doesn't exist allocate it */
+ mh_info = e->mh_info;
+ if (!mh_info)
+ e->mh_info = mh_info = XCALLOC(MTYPE_BGP_EVPN_PATH_MH_INFO,
+ sizeof(struct bgp_path_mh_info));
+
+ /* If es_info doesn't exist allocate it */
+ es_info = mh_info->es_info;
+ if (!es_info) {
+ mh_info->es_info = es_info =
+ XCALLOC(MTYPE_BGP_EVPN_PATH_ES_INFO,
+ sizeof(struct bgp_path_es_info));
+ es_info->vni = vni;
+ es_info->pi = pi;
+ }
+
+ return es_info;
+}
+
+static void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info)
+{
+ struct bgp_evpn_es *es = es_info->es;
+ struct bgp_path_info *pi;
+
+ if (!es)
+ return;
+
+ pi = es_info->pi;
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("vni %u path %pFX unlinked from es %s", es_info->vni,
+ &pi->net->p, es->esi_str);
+
+ if (es_info->vni)
+ list_delete_node(es->macip_evi_path_list,
+ &es_info->es_listnode);
+ else
+ list_delete_node(es->macip_global_path_list,
+ &es_info->es_listnode);
+
+ es_info->es = NULL;
+
+ /* if there are no other references against the ES it
+ * needs to be freed
+ */
+ bgp_evpn_es_free(es, __func__);
+
+ /* Note we don't free the path es_info on unlink; it will be freed up
+ * along with the path.
+ */
+}
+
+void bgp_evpn_path_es_link(struct bgp_path_info *pi, vni_t vni, esi_t *esi)
+{
+ struct bgp_path_es_info *es_info;
+ struct bgp_evpn_es *es;
+ struct bgp *bgp_evpn;
+
+ es_info = (pi->extra && pi->extra->mh_info)
+ ? pi->extra->mh_info->es_info
+ : NULL;
+ /* if the esi is zero just unlink the path from the old es */
+ if (!esi || !memcmp(esi, zero_esi, sizeof(*esi))) {
+ if (es_info)
+ bgp_evpn_path_es_unlink(es_info);
+ return;
+ }
+
+ bgp_evpn = bgp_get_evpn();
+ if (!bgp_evpn)
+ return;
+
+ /* setup es_info against the path if it doesn't aleady exist */
+ if (!es_info)
+ es_info = bgp_evpn_path_es_info_new(pi, vni);
+
+ /* find-create ES */
+ es = bgp_evpn_es_find(esi);
+ if (!es)
+ es = bgp_evpn_es_new(bgp_evpn, esi);
+
+ /* dup check */
+ if (es_info->es == es)
+ return;
+
+ /* unlink old ES if any */
+ bgp_evpn_path_es_unlink(es_info);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("vni %u path %pFX linked to es %s", vni, &pi->net->p,
+ es->esi_str);
+
+ /* link mac-ip path to the new destination ES */
+ es_info->es = es;
+ listnode_init(&es_info->es_listnode, es_info);
+ if (es_info->vni)
+ listnode_add(es->macip_evi_path_list, &es_info->es_listnode);
+ else
+ listnode_add(es->macip_global_path_list, &es_info->es_listnode);
+}
+
+static bool bgp_evpn_is_macip_path(struct bgp_path_info *pi)
+{
+ struct prefix_evpn *evp;
+
+ /* Only MAC-IP routes need to be linked (MAC-only routes can be
+ * skipped) as these lists are maintained for managing
+ * host routes in the tenant VRF
+ */
+ evp = (struct prefix_evpn *)&pi->net->p;
+ return is_evpn_prefix_ipaddr_v4(evp) || is_evpn_prefix_ipaddr_v6(evp);
+}
+
+/* When a remote ES is added to a VRF, routes using that as
+ * a destination need to be migrated to a L3NHG or viceversa.
+ * This is done indirectly by re-attempting an install of the
+ * route in the associated VRFs. As a part of the VRF install use
+ * of l3 NHG is evaluated and this results in the
+ * attr.es_flag ATTR_ES_L3_NHG_USE being set or cleared.
+ */
+static void
+bgp_evpn_es_path_update_on_es_vrf_chg(struct bgp_evpn_es_vrf *es_vrf,
+ const char *reason)
+{
+ struct listnode *node;
+ struct bgp_path_es_info *es_info;
+ struct bgp_path_info *pi;
+ struct bgp_evpn_es *es = es_vrf->es;
+
+ if (!bgp_mh_info->host_routes_use_l3nhg)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("update paths linked to es %s on es-vrf %s %s",
+ es->esi_str, es_vrf->bgp_vrf->name, reason);
+
+ for (ALL_LIST_ELEMENTS_RO(es->macip_global_path_list, node, es_info)) {
+ pi = es_info->pi;
+
+ if (!bgp_evpn_is_macip_path(pi))
+ continue;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug(
+ "update path %pFX linked to es %s on vrf chg",
+ &pi->net->p, es->esi_str);
+ bgp_evpn_route_entry_install_if_vrf_match(es_vrf->bgp_vrf, pi,
+ 1);
+ }
+}
+
+static void bgp_evpn_es_frag_free(struct bgp_evpn_es_frag *es_frag)
+{
+ struct bgp_evpn_es *es = es_frag->es;
+
+ if (es->es_base_frag == es_frag)
+ es->es_base_frag = NULL;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s frag %u free", es->esi_str, es_frag->rd_id);
+ list_delete_node(es->es_frag_list, &es_frag->es_listnode);
+
+ /* EVIs that are advertised using the info in this fragment */
+ list_delete(&es_frag->es_evi_frag_list);
+
+ bf_release_index(bm->rd_idspace, es_frag->rd_id);
+
+
+ XFREE(MTYPE_BGP_EVPN_ES_FRAG, es_frag);
+}
+
+static void bgp_evpn_es_frag_free_unused(struct bgp_evpn_es_frag *es_frag)
+{
+ if ((es_frag->es->es_base_frag == es_frag) ||
+ listcount(es_frag->es_evi_frag_list))
+ return;
+
+ bgp_evpn_es_frag_free(es_frag);
+}
+
+static void bgp_evpn_es_frag_free_all(struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ struct listnode *nnode;
+ struct bgp_evpn_es_frag *es_frag;
+
+ for (ALL_LIST_ELEMENTS(es->es_frag_list, node, nnode, es_frag))
+ bgp_evpn_es_frag_free(es_frag);
+}
+
+static struct bgp_evpn_es_frag *bgp_evpn_es_frag_new(struct bgp_evpn_es *es)
+{
+ struct bgp_evpn_es_frag *es_frag;
+ char buf[BGP_EVPN_PREFIX_RD_LEN];
+ struct bgp *bgp;
+
+ es_frag = XCALLOC(MTYPE_BGP_EVPN_ES_FRAG, sizeof(*es_frag));
+ bf_assign_index(bm->rd_idspace, es_frag->rd_id);
+ es_frag->prd.family = AF_UNSPEC;
+ es_frag->prd.prefixlen = 64;
+ bgp = bgp_get_evpn();
+ snprintfrr(buf, sizeof(buf), "%pI4:%hu", &bgp->router_id,
+ es_frag->rd_id);
+ (void)str2prefix_rd(buf, &es_frag->prd);
+
+ /* EVIs that are advertised using the info in this fragment */
+ es_frag->es_evi_frag_list = list_new();
+ listset_app_node_mem(es_frag->es_evi_frag_list);
+
+ /* Link the fragment to the parent ES */
+ es_frag->es = es;
+ listnode_init(&es_frag->es_listnode, es_frag);
+ listnode_add(es->es_frag_list, &es_frag->es_listnode);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s frag %u new", es->esi_str, es_frag->rd_id);
+ return es_frag;
+}
+
+static struct bgp_evpn_es_frag *
+bgp_evpn_es_find_frag_with_space(struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_frag *es_frag;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+ if (listcount(es_frag->es_evi_frag_list) <
+ bgp_mh_info->evi_per_es_frag)
+ return es_frag;
+ }
+
+ /* No frags where found with space; allocate a new one */
+ return bgp_evpn_es_frag_new(es);
+}
+
+/* Link the ES-EVI to one of the ES fragments */
+static void bgp_evpn_es_frag_evi_add(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es_frag *es_frag;
+ struct bgp_evpn_es *es = es_evi->es;
+
+ if (es_evi->es_frag ||
+ !(CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)))
+ return;
+
+ es_frag = bgp_evpn_es_find_frag_with_space(es);
+
+ es_evi->es_frag = es_frag;
+ listnode_init(&es_evi->es_frag_listnode, es_evi);
+ listnode_add(es_frag->es_evi_frag_list, &es_evi->es_frag_listnode);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vni %d linked to frag %u", es->esi_str,
+ es_evi->vpn->vni, es_frag->rd_id);
+}
+
+/* UnLink the ES-EVI from the ES fragment */
+static void bgp_evpn_es_frag_evi_del(struct bgp_evpn_es_evi *es_evi,
+ bool send_ead_del_if_empty)
+{
+ struct bgp_evpn_es_frag *es_frag = es_evi->es_frag;
+ struct prefix_evpn p;
+ struct bgp_evpn_es *es;
+ struct bgp *bgp;
+
+ if (!es_frag)
+ return;
+
+ es = es_frag->es;
+ es_evi->es_frag = NULL;
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vni %d unlinked from frag %u", es->esi_str,
+ es_evi->vpn->vni, es_frag->rd_id);
+
+ list_delete_node(es_frag->es_evi_frag_list, &es_evi->es_frag_listnode);
+
+ /*
+ * if there are no other EVIs on the fragment deleted the EAD-ES for
+ * the fragment
+ */
+ if (send_ead_del_if_empty && !listcount(es_frag->es_evi_frag_list)) {
+ bgp = bgp_get_evpn();
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s frag %u ead-es route delete",
+ es->esi_str, es_frag->rd_id);
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+ es->originator_ip);
+ p.prefix.ead_addr.frag_id = es_frag->rd_id;
+ bgp_evpn_mh_route_delete(bgp, es, NULL, es_frag, &p);
+ }
+
+ /* We don't attempt to coalesce frags that may not be full. Instead we
+ * only free up the frag when it is completely empty.
+ */
+ bgp_evpn_es_frag_free_unused(es_frag);
+}
+
+/* Link the ES-EVIs to one of the ES fragments */
+static void bgp_evpn_es_frag_evi_update_all(struct bgp_evpn_es *es, bool add)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_evi *es_evi;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) {
+ if (add)
+ bgp_evpn_es_frag_evi_add(es_evi);
+ else
+ bgp_evpn_es_frag_evi_del(es_evi, false);
+ }
+}
+
+/* compare ES-IDs for the global ES RB tree */
+static int bgp_es_rb_cmp(const struct bgp_evpn_es *es1,
+ const struct bgp_evpn_es *es2)
+{
+ return memcmp(&es1->esi, &es2->esi, ESI_BYTES);
+}
+RB_GENERATE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp);
+
+struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi)
+{
+ struct bgp_evpn_es tmp;
+
+ memcpy(&tmp.esi, esi, sizeof(esi_t));
+ return RB_FIND(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, &tmp);
+}
+
+static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi)
+{
+ struct bgp_evpn_es *es;
+
+ es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct bgp_evpn_es));
+
+ /* set the ESI */
+ memcpy(&es->esi, esi, sizeof(esi_t));
+
+ /* Initialise the VTEP list */
+ es->es_vtep_list = list_new();
+ listset_app_node_mem(es->es_vtep_list);
+ es->es_vtep_list->cmp = bgp_evpn_es_vtep_cmp;
+
+ esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str));
+
+ /* Initialize the ES routing table */
+ es->route_table = bgp_table_init(bgp, AFI_L2VPN, SAFI_EVPN);
+
+ /* Add to rb_tree */
+ RB_INSERT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es);
+
+ /* Initialise the ES-EVI list */
+ es->es_evi_list = list_new();
+ listset_app_node_mem(es->es_evi_list);
+
+ /* Initialise the ES-VRF list used for L3NHG management */
+ es->es_vrf_list = list_new();
+ listset_app_node_mem(es->es_vrf_list);
+
+ /* Initialise the route list used for efficient event handling */
+ es->macip_evi_path_list = list_new();
+ listset_app_node_mem(es->macip_evi_path_list);
+ es->macip_global_path_list = list_new();
+ listset_app_node_mem(es->macip_global_path_list);
+ es->es_frag_list = list_new();
+ listset_app_node_mem(es->es_frag_list);
+
+ QOBJ_REG(es, bgp_evpn_es);
+
+ return es;
+}
+
+/* Free a given ES -
+ * This just frees appropriate memory, caller should have taken other
+ * needed actions.
+ */
+static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller)
+{
+ if ((es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE))
+ || listcount(es->macip_evi_path_list)
+ || listcount(es->macip_global_path_list))
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("%s: es %s free", caller, es->esi_str);
+
+ /* cleanup resources maintained against the ES */
+ list_delete(&es->es_evi_list);
+ list_delete(&es->es_vrf_list);
+ list_delete(&es->es_vtep_list);
+ list_delete(&es->macip_evi_path_list);
+ list_delete(&es->macip_global_path_list);
+ list_delete(&es->es_frag_list);
+ bgp_table_unlock(es->route_table);
+
+ /* remove the entry from various databases */
+ RB_REMOVE(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es);
+ bgp_evpn_es_cons_checks_pend_del(es);
+
+ QOBJ_UNREG(es);
+ XFREE(MTYPE_BGP_EVPN_ES, es);
+}
+
+static inline bool bgp_evpn_is_es_local_and_non_bypass(struct bgp_evpn_es *es)
+{
+ return (es->flags & BGP_EVPNES_LOCAL)
+ && !(es->flags & BGP_EVPNES_BYPASS);
+}
+
+/* init local info associated with the ES */
+static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ bool old_is_local;
+ bool is_local;
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
+ return;
+
+ old_is_local = bgp_evpn_is_es_local_and_non_bypass(es);
+ SET_FLAG(es->flags, BGP_EVPNES_LOCAL);
+
+ listnode_init(&es->es_listnode, es);
+ listnode_add(bgp_mh_info->local_es_list, &es->es_listnode);
+
+ /* setup the first ES fragment; more fragments may be allocated based
+ * on the the number of EVI entries
+ */
+ es->es_base_frag = bgp_evpn_es_frag_new(es);
+ /* distribute ES-EVIs to one or more ES fragments */
+ bgp_evpn_es_frag_evi_update_all(es, true);
+
+ is_local = bgp_evpn_is_es_local_and_non_bypass(es);
+ if (old_is_local != is_local)
+ bgp_evpn_mac_update_on_es_local_chg(es, is_local);
+}
+
+/* clear any local info associated with the ES */
+static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es, bool finish)
+{
+ bool old_is_local;
+ bool is_local;
+
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
+ return;
+
+ /* clear the es frag references and free them up */
+ bgp_evpn_es_frag_evi_update_all(es, false);
+ es->es_base_frag = NULL;
+ bgp_evpn_es_frag_free_all(es);
+
+ old_is_local = bgp_evpn_is_es_local_and_non_bypass(es);
+ UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL);
+
+ is_local = bgp_evpn_is_es_local_and_non_bypass(es);
+ if (!finish && (old_is_local != is_local))
+ bgp_evpn_mac_update_on_es_local_chg(es, is_local);
+
+ /* remove from the ES local list */
+ list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode);
+
+ bgp_evpn_es_free(es, __func__);
+}
+
+/* eval remote info associated with the ES */
+static void bgp_evpn_es_remote_info_re_eval(struct bgp_evpn_es *es)
+{
+ if (es->remote_es_evi_cnt) {
+ SET_FLAG(es->flags, BGP_EVPNES_REMOTE);
+ } else {
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_REMOTE)) {
+ UNSET_FLAG(es->flags, BGP_EVPNES_REMOTE);
+ bgp_evpn_es_free(es, __func__);
+ }
+ }
+}
+
+/* If ES is present and local it needs to be active/oper-up for
+ * including L3 EC
+ */
+bool bgp_evpn_es_add_l3_ecomm_ok(esi_t *esi)
+{
+ struct bgp_evpn_es *es;
+
+ if (!esi || !bgp_mh_info->suppress_l3_ecomm_on_inactive_es)
+ return true;
+
+ es = bgp_evpn_es_find(esi);
+
+ return (!es || !(es->flags & BGP_EVPNES_LOCAL)
+ || bgp_evpn_local_es_is_active(es));
+}
+
+static bool bgp_evpn_is_valid_local_path(struct bgp_path_info *pi)
+{
+ return (CHECK_FLAG(pi->flags, BGP_PATH_VALID)
+ && pi->type == ZEBRA_ROUTE_BGP
+ && pi->sub_type == BGP_ROUTE_STATIC);
+}
+
+/* Update all local MAC-IP routes in the VNI routing table associated
+ * with the ES. When the ES is down the routes are advertised without
+ * the L3 extcomm
+ */
+static void bgp_evpn_mac_update_on_es_oper_chg(struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ struct bgp_path_es_info *es_info;
+ struct bgp_path_info *pi;
+ struct bgp *bgp;
+ struct bgpevpn *vpn;
+
+ if (!bgp_mh_info->suppress_l3_ecomm_on_inactive_es)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("update paths linked to es %s on oper chg",
+ es->esi_str);
+
+ bgp = bgp_get_evpn();
+ for (ALL_LIST_ELEMENTS_RO(es->macip_evi_path_list, node, es_info)) {
+ pi = es_info->pi;
+
+ if (!bgp_evpn_is_valid_local_path(pi))
+ continue;
+
+ if (!bgp_evpn_is_macip_path(pi))
+ continue;
+
+ vpn = bgp_evpn_lookup_vni(bgp, es_info->vni);
+ if (!vpn)
+ continue;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug(
+ "update path %d %pFX linked to es %s on oper chg",
+ es_info->vni, &pi->net->p, es->esi_str);
+
+ bgp_evpn_update_type2_route_entry(bgp, vpn, pi->net, pi,
+ __func__);
+ }
+}
+
+static bool bgp_evpn_is_valid_bgp_path(struct bgp_path_info *pi)
+{
+ return (CHECK_FLAG(pi->flags, BGP_PATH_VALID)
+ && pi->type == ZEBRA_ROUTE_BGP
+ && pi->sub_type == BGP_ROUTE_NORMAL);
+}
+
+/* If an ES is no longer local (or becomes local) we need to re-install
+ * paths using that ES as destination. This is needed as the criteria
+ * for best path selection has changed.
+ */
+static void bgp_evpn_mac_update_on_es_local_chg(struct bgp_evpn_es *es,
+ bool is_local)
+{
+ struct listnode *node;
+ struct bgp_path_es_info *es_info;
+ struct bgp_path_info *pi;
+ bool tmp_local;
+ struct attr *attr_new;
+ struct attr attr_tmp;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("update paths linked to es %s on chg to %s",
+ es->esi_str, is_local ? "local" : "non-local");
+
+ for (ALL_LIST_ELEMENTS_RO(es->macip_global_path_list, node, es_info)) {
+ pi = es_info->pi;
+
+ /* Consider "valid" remote routes */
+ if (!bgp_evpn_is_valid_bgp_path(pi))
+ continue;
+
+ if (!pi->attr)
+ continue;
+
+ tmp_local = !!(pi->attr->es_flags & ATTR_ES_IS_LOCAL);
+ if (tmp_local == is_local)
+ continue;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug(
+ "update path %pFX linked to es %s on chg to %s",
+ &pi->net->p, es->esi_str,
+ is_local ? "local" : "non-local");
+
+ attr_tmp = *pi->attr;
+ if (is_local)
+ attr_tmp.es_flags |= ATTR_ES_IS_LOCAL;
+ else
+ attr_tmp.es_flags &= ~ATTR_ES_IS_LOCAL;
+ attr_new = bgp_attr_intern(&attr_tmp);
+ bgp_attr_unintern(&pi->attr);
+ pi->attr = attr_new;
+ bgp_evpn_import_type2_route(pi, 1);
+ }
+}
+
+static void bgp_evpn_local_es_deactivate(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct prefix_evpn p;
+ int ret;
+
+ /* withdraw ESR */
+ /* Delete and withdraw locally learnt ES route */
+ build_evpn_type4_prefix(&p, &es->esi, es->originator_ip);
+ ret = bgp_evpn_type4_route_delete(bgp, es, &p);
+ if (ret) {
+ flog_err(EC_BGP_EVPN_ROUTE_DELETE,
+ "%u failed to delete type-4 route for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+
+ /* withdraw EAD-EVI */
+ if (!bgp_mh_info->ead_evi_adv_for_down_links)
+ bgp_evpn_local_type1_evi_route_del(bgp, es);
+
+ /* withdraw EAD-ES */
+ bgp_evpn_ead_es_route_delete(bgp, es);
+
+ bgp_evpn_mac_update_on_es_oper_chg(es);
+}
+
+/* Process ES link oper-down by withdrawing ES-EAD and ESR */
+static void bgp_evpn_local_es_down(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ bool old_active;
+
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP))
+ return;
+
+ old_active = bgp_evpn_local_es_is_active(es);
+ UNSET_FLAG(es->flags, BGP_EVPNES_OPER_UP);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s down", es->esi_str);
+
+ if (old_active)
+ bgp_evpn_local_es_deactivate(bgp, es);
+}
+
+static void bgp_evpn_local_es_activate(struct bgp *bgp, struct bgp_evpn_es *es,
+ bool regen_ead, bool regen_esr)
+{
+ struct prefix_evpn p;
+
+ if (regen_esr) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s generate ESR", es->esi_str);
+ /* generate ESR */
+ build_evpn_type4_prefix(&p, &es->esi, es->originator_ip);
+ if (bgp_evpn_type4_route_update(bgp, es, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: Type4 route creation failure for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+
+ if (regen_ead) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s generate EAD", es->esi_str);
+ /* generate EAD-EVI */
+ bgp_evpn_local_type1_evi_route_add(bgp, es);
+
+ /* generate EAD-ES */
+ bgp_evpn_ead_es_route_update(bgp, es);
+ }
+
+ bgp_evpn_mac_update_on_es_oper_chg(es);
+}
+
+/* Process ES link oper-up by generating ES-EAD and ESR */
+static void bgp_evpn_local_es_up(struct bgp *bgp, struct bgp_evpn_es *es,
+ bool regen_esr)
+{
+ bool regen_ead = false;
+ bool active = false;
+
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s up", es->esi_str);
+
+ SET_FLAG(es->flags, BGP_EVPNES_OPER_UP);
+ regen_esr = true;
+ regen_ead = true;
+ }
+
+ active = bgp_evpn_local_es_is_active(es);
+ if (active && (regen_ead || regen_esr))
+ bgp_evpn_local_es_activate(bgp, es, regen_ead, regen_esr);
+}
+
+/* If an ethernet segment is in LACP bypass we cannot advertise
+ * reachability to it i.e. EAD-per-ES and ESR is not advertised in
+ * bypass state.
+ * PS: EAD-per-EVI will continue to be advertised
+ */
+static void bgp_evpn_local_es_bypass_update(struct bgp *bgp,
+ struct bgp_evpn_es *es, bool bypass)
+{
+ bool old_bypass = !!(es->flags & BGP_EVPNES_BYPASS);
+ bool old_active;
+ bool new_active;
+ bool old_is_local;
+ bool is_local;
+
+ if (bypass == old_bypass)
+ return;
+
+ old_active = bgp_evpn_local_es_is_active(es);
+ old_is_local = bgp_evpn_is_es_local_and_non_bypass(es);
+ if (bypass)
+ SET_FLAG(es->flags, BGP_EVPNES_BYPASS);
+ else
+ UNSET_FLAG(es->flags, BGP_EVPNES_BYPASS);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s bypass %s", es->esi_str,
+ bypass ? "set" : "clear");
+
+ new_active = bgp_evpn_local_es_is_active(es);
+ if (old_active != new_active) {
+ if (new_active)
+ bgp_evpn_local_es_activate(bgp, es, true, true);
+ else
+ bgp_evpn_local_es_deactivate(bgp, es);
+ }
+
+ is_local = bgp_evpn_is_es_local_and_non_bypass(es);
+ if (old_is_local != is_local)
+ bgp_evpn_mac_update_on_es_local_chg(es, is_local);
+}
+
+static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ struct bgp_evpn_es_evi *es_evi;
+ struct listnode *evi_node, *evi_next_node;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del local es %s", es->esi_str);
+
+ /* Delete all local EVPN ES routes from ESI table
+ * and schedule for processing (to withdraw from peers))
+ */
+ bgp_evpn_es_route_del_all(bgp, es);
+
+ /* release all local ES EVIs associated with the ES */
+ for (ALL_LIST_ELEMENTS(es->es_evi_list, evi_node,
+ evi_next_node, es_evi)) {
+ bgp_evpn_local_es_evi_do_del(es_evi);
+ }
+
+ /* Clear local info associated with the ES and free it up if there is
+ * no remote reference
+ */
+ bgp_evpn_es_local_info_clear(es, false);
+}
+
+bool bgp_evpn_is_esi_local_and_non_bypass(esi_t *esi)
+{
+ struct bgp_evpn_es *es = NULL;
+
+ /* Lookup ESI hash - should exist. */
+ es = bgp_evpn_es_find(esi);
+
+ return es && bgp_evpn_is_es_local_and_non_bypass(es);
+}
+
+int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi)
+{
+ struct bgp_evpn_es *es = NULL;
+
+ /* Lookup ESI hash - should exist. */
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ flog_warn(EC_BGP_EVPN_ESI, "%u: ES missing at local ES DEL",
+ bgp->vrf_id);
+ return -1;
+ }
+
+ bgp_evpn_local_es_do_del(bgp, es);
+ return 0;
+}
+
+/* Handle device to ES id association. Results in the creation of a local
+ * ES.
+ */
+int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
+ struct in_addr originator_ip, bool oper_up,
+ uint16_t df_pref, bool bypass)
+{
+ struct bgp_evpn_es *es;
+ bool new_es = true;
+ bool regen_esr = false;
+
+ /* create the new es */
+ es = bgp_evpn_es_find(esi);
+ if (es) {
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
+ new_es = false;
+ } else
+ es = bgp_evpn_es_new(bgp, esi);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add local es %s orig-ip %pI4 df_pref %u %s",
+ es->esi_str, &originator_ip, df_pref,
+ bypass ? "bypass" : "");
+
+ es->originator_ip = originator_ip;
+ if (df_pref != es->df_pref) {
+ es->df_pref = df_pref;
+ regen_esr = true;
+ }
+ bgp_evpn_es_local_info_set(bgp, es);
+
+ /* import all remote Type-4 routes in the ES table */
+ if (new_es)
+ bgp_evpn_type4_remote_routes_import(bgp, es,
+ true /* install */);
+
+ /* create and advertise EAD-EVI routes for the ES -
+ * XXX - till an ES-EVI reference is created there is really nothing to
+ * advertise
+ */
+ if (bgp_mh_info->ead_evi_adv_for_down_links)
+ bgp_evpn_local_type1_evi_route_add(bgp, es);
+
+ bgp_evpn_local_es_bypass_update(bgp, es, bypass);
+
+ /* If the ES link is operationally up generate EAD-ES. EAD-EVI
+ * can be generated even if the link is inactive.
+ */
+ if (oper_up)
+ bgp_evpn_local_es_up(bgp, es, regen_esr);
+ else
+ bgp_evpn_local_es_down(bgp, es);
+
+ return 0;
+}
+
+static void bgp_evpn_es_json_frag_fill(json_object *json_frags,
+ struct bgp_evpn_es *es)
+{
+ json_object *json_frag;
+ struct listnode *node;
+ struct bgp_evpn_es_frag *es_frag;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+ json_frag = json_object_new_object();
+
+ json_object_string_addf(json_frag, "rd", "%pRD", &es_frag->prd);
+ json_object_int_add(json_frag, "eviCount",
+ listcount(es_frag->es_evi_frag_list));
+
+ json_object_array_add(json_frags, json_frag);
+ }
+}
+
+static void bgp_evpn_es_frag_show_detail(struct vty *vty,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_frag *es_frag;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+ vty_out(vty, " %pRD EVIs: %d\n", &es_frag->prd,
+ listcount(es_frag->es_evi_frag_list));
+ }
+}
+
+static char *bgp_evpn_es_vteps_str(char *vtep_str, struct bgp_evpn_es *es,
+ uint8_t vtep_str_size)
+{
+ char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ];
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+ bool first = true;
+ char ip_buf[INET6_ADDRSTRLEN];
+
+ vtep_str[0] = '\0';
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ vtep_flag_str[0] = '\0';
+
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str));
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
+ strlcat(vtep_flag_str, "A", sizeof(vtep_flag_str));
+
+ if (!strlen(vtep_flag_str))
+ strlcat(vtep_flag_str, "-", sizeof(vtep_flag_str));
+ if (first)
+ first = false;
+ else
+ strlcat(vtep_str, ",", vtep_str_size);
+ strlcat(vtep_str,
+ inet_ntop(AF_INET, &es_vtep->vtep_ip, ip_buf,
+ sizeof(ip_buf)),
+ vtep_str_size);
+ strlcat(vtep_str, "(", vtep_str_size);
+ strlcat(vtep_str, vtep_flag_str, vtep_str_size);
+ strlcat(vtep_str, ")", vtep_str_size);
+ }
+
+ return vtep_str;
+}
+
+static void bgp_evpn_es_json_vtep_fill(json_object *json_vteps,
+ struct bgp_evpn_es_vtep *es_vtep)
+{
+ json_object *json_vtep_entry;
+ json_object *json_flags;
+
+ json_vtep_entry = json_object_new_object();
+
+ json_object_string_addf(json_vtep_entry, "vtep_ip", "%pI4",
+ &es_vtep->vtep_ip);
+ if (es_vtep->flags & (BGP_EVPNES_VTEP_ESR |
+ BGP_EVPNES_VTEP_ACTIVE)) {
+ json_flags = json_object_new_array();
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ json_array_string_add(json_flags, "esr");
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
+ json_array_string_add(json_flags, "active");
+ json_object_object_add(json_vtep_entry, "flags", json_flags);
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) {
+ json_object_int_add(json_vtep_entry, "dfPreference",
+ es_vtep->df_pref);
+ json_object_int_add(json_vtep_entry, "dfAlgorithm",
+ es_vtep->df_pref);
+ }
+ }
+
+ json_object_array_add(json_vteps,
+ json_vtep_entry);
+}
+
+static void bgp_evpn_es_vteps_show_detail(struct vty *vty,
+ struct bgp_evpn_es *es)
+{
+ char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ];
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+ char alg_buf[EVPN_DF_ALG_STR_LEN];
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ vtep_flag_str[0] = '\0';
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str));
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
+ strlcat(vtep_flag_str, "A", sizeof(vtep_flag_str));
+
+ if (!strlen(vtep_flag_str))
+ strlcat(vtep_flag_str, "-", sizeof(vtep_flag_str));
+
+ vty_out(vty, " %pI4 flags: %s", &es_vtep->vtep_ip,
+ vtep_flag_str);
+
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ vty_out(vty, " df_alg: %s df_pref: %u\n",
+ evpn_es_df_alg2str(es_vtep->df_alg, alg_buf,
+ sizeof(alg_buf)),
+ es_vtep->df_pref);
+ else
+ vty_out(vty, "\n");
+ }
+}
+
+static void bgp_evpn_es_show_entry(struct vty *vty,
+ struct bgp_evpn_es *es, json_object *json)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ if (json) {
+ json_object *json_vteps;
+ json_object *json_types;
+
+ json_object_string_add(json, "esi", es->esi_str);
+ if (es->es_base_frag)
+ json_object_string_addf(json, "rd", "%pRD",
+ &es->es_base_frag->prd);
+
+ if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) {
+ json_types = json_object_new_array();
+ if (es->flags & BGP_EVPNES_LOCAL)
+ json_array_string_add(json_types, "local");
+ if (es->flags & BGP_EVPNES_REMOTE)
+ json_array_string_add(json_types, "remote");
+ json_object_object_add(json, "type", json_types);
+ }
+
+ if (listcount(es->es_vtep_list)) {
+ json_vteps = json_object_new_array();
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list,
+ node, es_vtep)) {
+ bgp_evpn_es_json_vtep_fill(json_vteps, es_vtep);
+ }
+ json_object_object_add(json, "vteps", json_vteps);
+ }
+ json_object_int_add(json, "vniCount",
+ listcount(es->es_evi_list));
+ } else {
+ char type_str[4];
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+
+ type_str[0] = '\0';
+ if (es->flags & BGP_EVPNES_BYPASS)
+ strlcat(type_str, "B", sizeof(type_str));
+ if (es->flags & BGP_EVPNES_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es->flags & BGP_EVPNES_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+ if (es->inconsistencies)
+ strlcat(type_str, "I", sizeof(type_str));
+
+ bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str));
+
+ vty_out(vty, "%-30s %-5s %-21pRD %-8d %s\n", es->esi_str,
+ type_str,
+ es->es_base_frag ? &es->es_base_frag->prd : NULL,
+ listcount(es->es_evi_list), vtep_str);
+ }
+}
+
+static void bgp_evpn_es_show_entry_detail(struct vty *vty,
+ struct bgp_evpn_es *es, json_object *json)
+{
+ if (json) {
+ json_object *json_flags;
+ json_object *json_incons;
+ json_object *json_vteps;
+ json_object *json_frags;
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ /* Add the "brief" info first */
+ bgp_evpn_es_show_entry(vty, es, json);
+ if (es->flags
+ & (BGP_EVPNES_OPER_UP | BGP_EVPNES_ADV_EVI
+ | BGP_EVPNES_BYPASS)) {
+ json_flags = json_object_new_array();
+ if (es->flags & BGP_EVPNES_OPER_UP)
+ json_array_string_add(json_flags, "up");
+ if (es->flags & BGP_EVPNES_ADV_EVI)
+ json_array_string_add(json_flags,
+ "advertiseEVI");
+ if (es->flags & BGP_EVPNES_BYPASS)
+ json_array_string_add(json_flags, "bypass");
+ json_object_object_add(json, "flags", json_flags);
+ }
+ json_object_string_addf(json, "originator_ip", "%pI4",
+ &es->originator_ip);
+ json_object_int_add(json, "remoteVniCount",
+ es->remote_es_evi_cnt);
+ json_object_int_add(json, "vrfCount",
+ listcount(es->es_vrf_list));
+ json_object_int_add(json, "macipPathCount",
+ listcount(es->macip_evi_path_list));
+ json_object_int_add(json, "macipGlobalPathCount",
+ listcount(es->macip_global_path_list));
+ json_object_int_add(json, "inconsistentVniVtepCount",
+ es->incons_evi_vtep_cnt);
+ if (listcount(es->es_vtep_list)) {
+ json_vteps = json_object_new_array();
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node,
+ es_vtep)) {
+ bgp_evpn_es_json_vtep_fill(json_vteps, es_vtep);
+ }
+ json_object_object_add(json, "vteps", json_vteps);
+ }
+ if (listcount(es->es_frag_list)) {
+ json_frags = json_object_new_array();
+ bgp_evpn_es_json_frag_fill(json_frags, es);
+ json_object_object_add(json, "fragments", json_frags);
+ }
+ if (es->inconsistencies) {
+ json_incons = json_object_new_array();
+ if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST)
+ json_array_string_add(json_incons,
+ "vni-vtep-mismatch");
+ json_object_object_add(json, "inconsistencies",
+ json_incons);
+ }
+ } else {
+ char incons_str[BGP_EVPNES_INCONS_STR_SZ];
+ char type_str[4];
+
+ type_str[0] = '\0';
+ if (es->flags & BGP_EVPNES_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es->flags & BGP_EVPNES_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+
+ vty_out(vty, "ESI: %s\n", es->esi_str);
+ vty_out(vty, " Type: %s\n", type_str);
+ vty_out(vty, " RD: %pRD\n",
+ es->es_base_frag ? &es->es_base_frag->prd : NULL);
+ vty_out(vty, " Originator-IP: %pI4\n", &es->originator_ip);
+ if (es->flags & BGP_EVPNES_LOCAL)
+ vty_out(vty, " Local ES DF preference: %u\n",
+ es->df_pref);
+ if (es->flags & BGP_EVPNES_BYPASS)
+ vty_out(vty, " LACP bypass: on\n");
+ vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list));
+ vty_out(vty, " Remote VNI Count: %d\n",
+ es->remote_es_evi_cnt);
+ vty_out(vty, " VRF Count: %d\n", listcount(es->es_vrf_list));
+ vty_out(vty, " MACIP EVI Path Count: %d\n",
+ listcount(es->macip_evi_path_list));
+ vty_out(vty, " MACIP Global Path Count: %d\n",
+ listcount(es->macip_global_path_list));
+ vty_out(vty, " Inconsistent VNI VTEP Count: %d\n",
+ es->incons_evi_vtep_cnt);
+ if (es->inconsistencies) {
+ incons_str[0] = '\0';
+ if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST)
+ strlcat(incons_str, "vni-vtep-mismatch",
+ sizeof(incons_str));
+ } else {
+ strlcpy(incons_str, "-", sizeof(incons_str));
+ }
+ vty_out(vty, " Inconsistencies: %s\n",
+ incons_str);
+ if (listcount(es->es_frag_list)) {
+ vty_out(vty, " Fragments:\n");
+ bgp_evpn_es_frag_show_detail(vty, es);
+ }
+ if (listcount(es->es_vtep_list)) {
+ vty_out(vty, " VTEPs:\n");
+ bgp_evpn_es_vteps_show_detail(vty, es);
+ }
+ vty_out(vty, "\n");
+ }
+}
+
+/* Display all ESs */
+void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail)
+{
+ struct bgp_evpn_es *es;
+ json_object *json_array = NULL;
+ json_object *json = NULL;
+
+ if (uj) {
+ /* create an array of ESs */
+ json_array = json_object_new_array();
+ } else {
+ if (!detail) {
+ vty_out(vty,
+ "ES Flags: B - bypass, L local, R remote, I inconsistent\n");
+ vty_out(vty,
+ "VTEP Flags: E ESR/Type-4, A active nexthop\n");
+ vty_out(vty,
+ "%-30s %-5s %-21s %-8s %s\n",
+ "ESI", "Flags", "RD", "#VNIs", "VTEPs");
+ }
+ }
+
+ RB_FOREACH(es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) {
+ if (uj)
+ /* create a separate json object for each ES */
+ json = json_object_new_object();
+ if (detail)
+ bgp_evpn_es_show_entry_detail(vty, es, json);
+ else
+ bgp_evpn_es_show_entry(vty, es, json);
+ /* add ES to the json array */
+ if (uj)
+ json_object_array_add(json_array, json);
+ }
+
+ /* print the array of json-ESs */
+ if (uj)
+ vty_json(vty, json_array);
+}
+
+/* Display specific ES */
+void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj)
+{
+ struct bgp_evpn_es *es;
+ json_object *json = NULL;
+
+ if (uj)
+ json = json_object_new_object();
+
+ es = bgp_evpn_es_find(esi);
+ if (es) {
+ bgp_evpn_es_show_entry_detail(vty, es, json);
+ } else {
+ if (!uj)
+ vty_out(vty, "ESI not found\n");
+ }
+
+ if (uj)
+ vty_json(vty, json);
+}
+
+/*****************************************************************************/
+/* Ethernet Segment to VRF association -
+ * 1. Each ES-EVI entry is associated with a tenant VRF. This associaton
+ * triggers the creation of an ES-VRF entry.
+ * 2. The ES-VRF entry is maintained for the purpose of L3-NHG creation
+ * 3. Type-2/MAC-IP routes are imported into a tenant VRF and programmed as
+ * a /32 or host route entry in the dataplane. If the destination of
+ * the host route is a remote-ES the route is programmed with the
+ * corresponding (keyed in by {vrf,ES-id}) L3-NHG.
+ * 4. The reason for this indirection (route->L3-NHG, L3-NHG->list-of-VTEPs)
+ * is to avoid route updates to the dplane when a remote-ES link flaps i.e.
+ * instead of updating all the dependent routes the NHG's contents are updated.
+ * This reduces the amount of datplane updates (nhg updates vs. route updates)
+ * allowing for a faster failover.
+ *
+ * XXX - can the L3 SVI index change without change in vpn->bgp_vrf
+ * association? If yes we need to handle that by updating all the L3 NHGs
+ * in that VRF.
+ */
+/******************************** L3 NHG management *************************/
+static void bgp_evpn_l3nhg_zebra_add_v4_or_v6(struct bgp_evpn_es_vrf *es_vrf,
+ bool v4_nhg)
+{
+ uint32_t nhg_id = v4_nhg ? es_vrf->nhg_id : es_vrf->v6_nhg_id;
+ struct bgp_evpn_es *es = es_vrf->es;
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+ struct nexthop nh;
+ struct zapi_nexthop *api_nh;
+ struct zapi_nhg api_nhg = {};
+
+ /* Skip installation of L3-NHG if host routes used */
+ if (!nhg_id)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vrf %u %s nhg %u to zebra", es->esi_str,
+ es_vrf->bgp_vrf->vrf_id,
+ v4_nhg ? "v4_nhg" : "v6_nhg", nhg_id);
+
+ frrtrace(4, frr_bgp, evpn_mh_nhg_zsend, true, v4_nhg, nhg_id, es_vrf);
+
+ /* only the gateway ip changes for each NH. rest of the params
+ * are constant
+ */
+ memset(&nh, 0, sizeof(nh));
+ nh.vrf_id = es_vrf->bgp_vrf->vrf_id;
+ nh.flags = NEXTHOP_FLAG_ONLINK;
+ nh.ifindex = es_vrf->bgp_vrf->l3vni_svi_ifindex;
+ nh.weight = 1;
+ nh.type =
+ v4_nhg ? NEXTHOP_TYPE_IPV4_IFINDEX : NEXTHOP_TYPE_IPV6_IFINDEX;
+
+ api_nhg.id = nhg_id;
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE))
+ continue;
+
+ /* Don't overrun the zapi buffer. */
+ if (api_nhg.nexthop_num == MULTIPATH_NUM)
+ break;
+
+ /* overwrite the gw */
+ if (v4_nhg)
+ nh.gate.ipv4 = es_vtep->vtep_ip;
+ else
+ ipv4_to_ipv4_mapped_ipv6(&nh.gate.ipv6,
+ es_vtep->vtep_ip);
+
+ /* convert to zapi format */
+ api_nh = &api_nhg.nexthops[api_nhg.nexthop_num];
+ zapi_nexthop_from_nexthop(api_nh, &nh);
+
+ ++api_nhg.nexthop_num;
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("nhg %u vtep %pI4 l3-svi %d", api_nhg.id,
+ &es_vtep->vtep_ip,
+ es_vrf->bgp_vrf->l3vni_svi_ifindex);
+
+ frrtrace(3, frr_bgp, evpn_mh_nh_zsend, nhg_id, es_vtep, es_vrf);
+ }
+
+ if (!api_nhg.nexthop_num)
+ return;
+
+ zclient_nhg_send(zclient, ZEBRA_NHG_ADD, &api_nhg);
+}
+
+static bool bgp_evpn_l3nhg_zebra_ok(struct bgp_evpn_es_vrf *es_vrf)
+{
+ if (!bgp_mh_info->host_routes_use_l3nhg)
+ return false;
+
+ /* Check socket. */
+ if (!zclient || zclient->sock < 0)
+ return false;
+
+ return true;
+}
+
+static void bgp_evpn_l3nhg_zebra_add(struct bgp_evpn_es_vrf *es_vrf)
+{
+ if (!bgp_evpn_l3nhg_zebra_ok(es_vrf))
+ return;
+
+ bgp_evpn_l3nhg_zebra_add_v4_or_v6(es_vrf, true /*v4_nhg*/);
+ bgp_evpn_l3nhg_zebra_add_v4_or_v6(es_vrf, false /*v4_nhg*/);
+}
+
+static void bgp_evpn_l3nhg_zebra_del_v4_or_v6(struct bgp_evpn_es_vrf *es_vrf,
+ bool v4_nhg)
+{
+ struct zapi_nhg api_nhg = {};
+
+ api_nhg.id = v4_nhg ? es_vrf->nhg_id : es_vrf->v6_nhg_id;
+
+ /* Skip installation of L3-NHG if host routes used */
+ if (!api_nhg.id)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vrf %u %s nhg %u to zebra",
+ es_vrf->es->esi_str, es_vrf->bgp_vrf->vrf_id,
+ v4_nhg ? "v4_nhg" : "v6_nhg", api_nhg.id);
+
+
+ frrtrace(4, frr_bgp, evpn_mh_nhg_zsend, false, v4_nhg, api_nhg.id,
+ es_vrf);
+
+ zclient_nhg_send(zclient, ZEBRA_NHG_DEL, &api_nhg);
+}
+
+static void bgp_evpn_l3nhg_zebra_del(struct bgp_evpn_es_vrf *es_vrf)
+{
+ if (!bgp_evpn_l3nhg_zebra_ok(es_vrf))
+ return;
+
+ bgp_evpn_l3nhg_zebra_del_v4_or_v6(es_vrf, true /*v4_nhg*/);
+ bgp_evpn_l3nhg_zebra_del_v4_or_v6(es_vrf, false /*v4_nhg*/);
+}
+
+static void bgp_evpn_l3nhg_deactivate(struct bgp_evpn_es_vrf *es_vrf)
+{
+ if (!(es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE))
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vrf %u nhg %u de-activate",
+ es_vrf->es->esi_str, es_vrf->bgp_vrf->vrf_id,
+ es_vrf->nhg_id);
+ bgp_evpn_l3nhg_zebra_del(es_vrf);
+ es_vrf->flags &= ~BGP_EVPNES_VRF_NHG_ACTIVE;
+ /* MAC-IPs can now be installed via the L3NHG */
+ bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "l3nhg-deactivate");
+}
+
+static void bgp_evpn_l3nhg_activate(struct bgp_evpn_es_vrf *es_vrf, bool update)
+{
+ if (!bgp_evpn_es_get_active_vtep_cnt(es_vrf->es)) {
+ bgp_evpn_l3nhg_deactivate(es_vrf);
+ return;
+ }
+
+ if (es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE) {
+ if (!update)
+ return;
+ } else {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vrf %u nhg %u activate",
+ es_vrf->es->esi_str, es_vrf->bgp_vrf->vrf_id,
+ es_vrf->nhg_id);
+ es_vrf->flags |= BGP_EVPNES_VRF_NHG_ACTIVE;
+ /* MAC-IPs can now be installed via the L3NHG */
+ bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "l3nhg_activate");
+ }
+
+ bgp_evpn_l3nhg_zebra_add(es_vrf);
+}
+
+/* when a VTEP is activated or de-activated against an ES associated
+ * VRFs' NHG needs to be updated
+ */
+static void bgp_evpn_l3nhg_update_on_vtep_chg(struct bgp_evpn_es *es)
+{
+ struct bgp_evpn_es_vrf *es_vrf;
+ struct listnode *es_vrf_node;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s nhg update on vtep chg", es->esi_str);
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vrf_list, es_vrf_node, es_vrf))
+ bgp_evpn_l3nhg_activate(es_vrf, true /* update */);
+}
+
+/* compare ES-IDs for the ES-VRF RB tree maintained per-VRF */
+static int bgp_es_vrf_rb_cmp(const struct bgp_evpn_es_vrf *es_vrf1,
+ const struct bgp_evpn_es_vrf *es_vrf2)
+{
+ return memcmp(&es_vrf1->es->esi, &es_vrf2->es->esi, ESI_BYTES);
+}
+RB_GENERATE(bgp_es_vrf_rb_head, bgp_evpn_es_vrf, rb_node, bgp_es_vrf_rb_cmp);
+
+/* Initialize the ES tables maintained per-tenant vrf */
+void bgp_evpn_vrf_es_init(struct bgp *bgp_vrf)
+{
+ /* Initialize the ES-VRF RB tree */
+ RB_INIT(bgp_es_vrf_rb_head, &bgp_vrf->es_vrf_rb_tree);
+}
+
+/* find the ES-VRF in the per-VRF RB tree */
+static struct bgp_evpn_es_vrf *bgp_evpn_es_vrf_find(struct bgp_evpn_es *es,
+ struct bgp *bgp_vrf)
+{
+ struct bgp_evpn_es_vrf es_vrf;
+
+ es_vrf.es = es;
+
+ return RB_FIND(bgp_es_vrf_rb_head, &bgp_vrf->es_vrf_rb_tree, &es_vrf);
+}
+
+/* allocate a new ES-VRF and setup L3NHG for it */
+static struct bgp_evpn_es_vrf *bgp_evpn_es_vrf_create(struct bgp_evpn_es *es,
+ struct bgp *bgp_vrf)
+{
+ struct bgp_evpn_es_vrf *es_vrf;
+
+ es_vrf = XCALLOC(MTYPE_BGP_EVPN_ES_VRF, sizeof(*es_vrf));
+
+ es_vrf->es = es;
+ es_vrf->bgp_vrf = bgp_vrf;
+
+ /* insert into the VRF-ESI rb tree */
+ RB_INSERT(bgp_es_vrf_rb_head, &bgp_vrf->es_vrf_rb_tree, es_vrf);
+
+ /* add to the ES's VRF list */
+ listnode_init(&es_vrf->es_listnode, es_vrf);
+ listnode_add(es->es_vrf_list, &es_vrf->es_listnode);
+
+ /* setup the L3 NHG id for the ES */
+ es_vrf->nhg_id = bgp_l3nhg_id_alloc();
+ es_vrf->v6_nhg_id = bgp_l3nhg_id_alloc();
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vrf %u nhg %u v6_nhg %d create", es->esi_str,
+ bgp_vrf->vrf_id, es_vrf->nhg_id, es_vrf->v6_nhg_id);
+ bgp_evpn_l3nhg_activate(es_vrf, false /* update */);
+
+ /* update paths in the VRF that may already be associated with
+ * this destination ES
+ */
+ bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "es-vrf-create");
+
+ return es_vrf;
+}
+
+/* remove the L3-NHG associated with the ES-VRF and free it */
+static void bgp_evpn_es_vrf_delete(struct bgp_evpn_es_vrf *es_vrf)
+{
+ struct bgp_evpn_es *es = es_vrf->es;
+ struct bgp *bgp_vrf = es_vrf->bgp_vrf;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vrf %u nhg %u delete", es->esi_str,
+ bgp_vrf->vrf_id, es_vrf->nhg_id);
+
+ /* Remove the NHG resources */
+ bgp_evpn_l3nhg_deactivate(es_vrf);
+ if (es_vrf->nhg_id)
+ bgp_l3nhg_id_free(es_vrf->nhg_id);
+ es_vrf->nhg_id = 0;
+ if (es_vrf->v6_nhg_id)
+ bgp_l3nhg_id_free(es_vrf->v6_nhg_id);
+ es_vrf->v6_nhg_id = 0;
+
+ /* remove from the ES's VRF list */
+ list_delete_node(es->es_vrf_list, &es_vrf->es_listnode);
+
+ /* remove from the VRF-ESI rb tree */
+ RB_REMOVE(bgp_es_vrf_rb_head, &bgp_vrf->es_vrf_rb_tree, es_vrf);
+
+ /* update paths in the VRF that may already be associated with
+ * this destination ES
+ */
+ bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "es-vrf-delete");
+
+ XFREE(MTYPE_BGP_EVPN_ES_VRF, es_vrf);
+}
+
+/* deref and delete if there are no references */
+void bgp_evpn_es_vrf_deref(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es_vrf *es_vrf = es_evi->es_vrf;
+
+ if (!es_vrf)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es-evi %s vni %u vrf %u de-ref",
+ es_evi->es->esi_str, es_evi->vpn->vni,
+ es_vrf->bgp_vrf->vrf_id);
+
+ es_evi->es_vrf = NULL;
+ if (es_vrf->ref_cnt)
+ --es_vrf->ref_cnt;
+
+ if (!es_vrf->ref_cnt)
+ bgp_evpn_es_vrf_delete(es_vrf);
+}
+
+/* find or create and reference */
+void bgp_evpn_es_vrf_ref(struct bgp_evpn_es_evi *es_evi, struct bgp *bgp_vrf)
+{
+ struct bgp_evpn_es *es = es_evi->es;
+ struct bgp_evpn_es_vrf *es_vrf = es_evi->es_vrf;
+ struct bgp *old_bgp_vrf = NULL;
+
+ if (es_vrf)
+ old_bgp_vrf = es_vrf->bgp_vrf;
+
+ if (old_bgp_vrf == bgp_vrf)
+ return;
+
+ /* deref the old ES-VRF */
+ bgp_evpn_es_vrf_deref(es_evi);
+
+ if (!bgp_vrf)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es-evi %s vni %u vrf %u ref", es_evi->es->esi_str,
+ es_evi->vpn->vni, bgp_vrf->vrf_id);
+
+ /* find-create the new ES-VRF */
+ es_vrf = bgp_evpn_es_vrf_find(es, bgp_vrf);
+ if (!es_vrf)
+ es_vrf = bgp_evpn_es_vrf_create(es, bgp_vrf);
+
+ es_evi->es_vrf = es_vrf;
+ ++es_vrf->ref_cnt;
+}
+
+/* When the L2-VNI is associated with a L3-VNI/VRF update all the
+ * associated ES-EVI entries
+ */
+void bgp_evpn_es_evi_vrf_deref(struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi *es_evi;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es-vrf de-ref for vni %u", vpn->vni);
+
+ RB_FOREACH (es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree)
+ bgp_evpn_es_vrf_deref(es_evi);
+}
+void bgp_evpn_es_evi_vrf_ref(struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi *es_evi;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es-vrf ref for vni %u", vpn->vni);
+
+ RB_FOREACH (es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree)
+ bgp_evpn_es_vrf_ref(es_evi, vpn->bgp_vrf);
+}
+
+/* 1. If ES-VRF is not present install the host route with the exploded/flat
+ * multi-path list.
+ * 2. If ES-VRF is present -
+ * - if L3NHG has not been activated for the ES-VRF (this could be because
+ * all the PEs attached to the VRF are down) do not install the route
+ * in zebra.
+ * - if L3NHG has been activated install the route via that L3NHG
+ */
+void bgp_evpn_es_vrf_use_nhg(struct bgp *bgp_vrf, esi_t *esi, bool *use_l3nhg,
+ bool *is_l3nhg_active,
+ struct bgp_evpn_es_vrf **es_vrf_p)
+{
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_vrf *es_vrf;
+
+ if (!bgp_mh_info->host_routes_use_l3nhg)
+ return;
+
+ es = bgp_evpn_es_find(esi);
+ if (!es)
+ return;
+
+ es_vrf = bgp_evpn_es_vrf_find(es, bgp_vrf);
+ if (!es_vrf)
+ return;
+
+ *use_l3nhg = true;
+ if (es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE)
+ *is_l3nhg_active = true;
+ if (es_vrf_p)
+ *es_vrf_p = es_vrf;
+}
+
+/* returns false if legacy-exploded mp needs to be used for route install */
+bool bgp_evpn_path_es_use_nhg(struct bgp *bgp_vrf, struct bgp_path_info *pi,
+ uint32_t *nhg_p)
+{
+ esi_t *esi;
+ struct bgp_evpn_es_vrf *es_vrf = NULL;
+ struct bgp_path_info *parent_pi;
+ struct bgp_node *rn;
+ struct prefix_evpn *evp;
+ struct bgp_path_info *mpinfo;
+ bool use_l3nhg = false;
+ bool is_l3nhg_active = false;
+
+ *nhg_p = 0;
+
+ /* we don't support NHG for routes leaked from another VRF yet */
+ if (pi->extra && pi->extra->bgp_orig)
+ return false;
+
+ parent_pi = get_route_parent_evpn(pi);
+ if (!parent_pi)
+ return false;
+
+ rn = parent_pi->net;
+ if (!rn)
+ return false;
+
+ evp = (struct prefix_evpn *)&rn->p;
+ if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
+ return false;
+
+ /* non-es path, use legacy-exploded multipath */
+ esi = bgp_evpn_attr_get_esi(parent_pi->attr);
+ if (!memcmp(esi, zero_esi, sizeof(*esi)))
+ return false;
+
+ bgp_evpn_es_vrf_use_nhg(bgp_vrf, esi, &use_l3nhg, &is_l3nhg_active,
+ &es_vrf);
+
+ /* L3NHG support is disabled, use legacy-exploded multipath */
+ if (!use_l3nhg)
+ return false;
+
+ /* if the NHG has not been installed we cannot install the route yet,
+ * return a 0-NHG to indicate that
+ */
+ if (!is_l3nhg_active)
+ return true;
+
+ /* this needs to be set the v6NHG if v6route */
+ if (is_evpn_prefix_ipaddr_v6(evp))
+ *nhg_p = es_vrf->v6_nhg_id;
+ else
+ *nhg_p = es_vrf->nhg_id;
+
+ for (mpinfo = bgp_path_info_mpath_next(pi); mpinfo;
+ mpinfo = bgp_path_info_mpath_next(mpinfo)) {
+ /* if any of the paths have a different ESI we can't use
+ * the NHG associated with the ES. fallback to legacy-exploded
+ * multipath
+ */
+ if (memcmp(esi, bgp_evpn_attr_get_esi(mpinfo->attr),
+ sizeof(*esi)))
+ return false;
+ }
+
+ return true;
+}
+
+static void bgp_evpn_es_vrf_show_entry(struct vty *vty,
+ struct bgp_evpn_es_vrf *es_vrf,
+ json_object *json)
+{
+ struct bgp_evpn_es *es = es_vrf->es;
+ struct bgp *bgp_vrf = es_vrf->bgp_vrf;
+
+ if (json) {
+ json_object *json_types;
+
+ json_object_string_add(json, "esi", es->esi_str);
+ json_object_string_add(json, "vrf", bgp_vrf->name);
+
+ if (es_vrf->flags & (BGP_EVPNES_VRF_NHG_ACTIVE)) {
+ json_types = json_object_new_array();
+ if (es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE)
+ json_array_string_add(json_types, "active");
+ json_object_object_add(json, "flags", json_types);
+ }
+
+ json_object_int_add(json, "ipv4NHG", es_vrf->nhg_id);
+ json_object_int_add(json, "ipv6NHG", es_vrf->v6_nhg_id);
+ json_object_int_add(json, "refCount", es_vrf->ref_cnt);
+ } else {
+ char flags_str[4];
+
+ flags_str[0] = '\0';
+ if (es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE)
+ strlcat(flags_str, "A", sizeof(flags_str));
+
+ vty_out(vty, "%-30s %-15s %-5s %-8u %-8u %u\n", es->esi_str,
+ bgp_vrf->name, flags_str, es_vrf->nhg_id,
+ es_vrf->v6_nhg_id, es_vrf->ref_cnt);
+ }
+}
+
+static void bgp_evpn_es_vrf_show_es(struct vty *vty, json_object *json_array,
+ struct bgp_evpn_es *es)
+{
+ json_object *json = NULL;
+ struct listnode *es_vrf_node;
+ struct bgp_evpn_es_vrf *es_vrf;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vrf_list, es_vrf_node, es_vrf)) {
+ /* create a separate json object for each ES-VRF */
+ if (json_array)
+ json = json_object_new_object();
+ bgp_evpn_es_vrf_show_entry(vty, es_vrf, json);
+ /* add ES-VRF to the json array */
+ if (json_array)
+ json_object_array_add(json_array, json);
+ }
+}
+
+/* Display all ES VRFs */
+void bgp_evpn_es_vrf_show(struct vty *vty, bool uj, struct bgp_evpn_es *es)
+{
+ json_object *json_array = NULL;
+
+ if (uj) {
+ /* create an array of ESs */
+ json_array = json_object_new_array();
+ } else {
+ vty_out(vty, "ES-VRF Flags: A Active\n");
+ vty_out(vty, "%-30s %-15s %-5s %-8s %-8s %s\n", "ESI", "VRF",
+ "Flags", "IPv4-NHG", "IPv6-NHG", "Ref");
+ }
+
+ if (es) {
+ bgp_evpn_es_vrf_show_es(vty, json_array, es);
+ } else {
+ RB_FOREACH (es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree)
+ bgp_evpn_es_vrf_show_es(vty, json_array, es);
+ }
+
+ /* print the array of json-ESs */
+ if (uj)
+ vty_json(vty, json_array);
+}
+
+/* Display specific ES VRF */
+void bgp_evpn_es_vrf_show_esi(struct vty *vty, esi_t *esi, bool uj)
+{
+ struct bgp_evpn_es *es;
+
+ es = bgp_evpn_es_find(esi);
+ if (es) {
+ bgp_evpn_es_vrf_show(vty, uj, es);
+ } else {
+ if (!uj)
+ vty_out(vty, "ESI not found\n");
+ }
+}
+
+/*****************************************************************************/
+/* Ethernet Segment to EVI association -
+ * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI
+ * (bgpevpn->es_evi_rb_tree).
+ * 2. Each local ES-EVI entry is rxed from zebra and then used by BGP to
+ * advertises an EAD-EVI (Type-1 EVPN) route
+ * 3. The remote ES-EVI is created when a bgp_evpn_es_evi_vtep references
+ * it.
+ */
+
+/* A list of remote VTEPs is maintained for each ES-EVI. This list includes -
+ * 1. VTEPs for which we have imported the EAD-per-ES Type1 route
+ * 2. VTEPs for which we have imported the EAD-per-EVI Type1 route
+ * VTEPs for which both routes have been rxed are activated. Activation
+ * creates a NHG in the parent ES.
+ */
+static int bgp_evpn_es_evi_vtep_cmp(void *p1, void *p2)
+{
+ const struct bgp_evpn_es_evi_vtep *evi_vtep1 = p1;
+ const struct bgp_evpn_es_evi_vtep *evi_vtep2 = p2;
+
+ return evi_vtep1->vtep_ip.s_addr - evi_vtep2->vtep_ip.s_addr;
+}
+
+static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_new(
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ evi_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_EVI_VTEP, sizeof(*evi_vtep));
+
+ evi_vtep->es_evi = es_evi;
+ evi_vtep->vtep_ip.s_addr = vtep_ip.s_addr;
+ listnode_init(&evi_vtep->es_evi_listnode, evi_vtep);
+ listnode_add_sort(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode);
+
+ return evi_vtep;
+}
+
+static void bgp_evpn_es_evi_vtep_free(struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ struct bgp_evpn_es_evi *es_evi = evi_vtep->es_evi;
+
+ if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD))
+ /* as long as there is some reference we can't free it */
+ return;
+
+ list_delete_node(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode);
+ XFREE(MTYPE_BGP_EVPN_ES_EVI_VTEP, evi_vtep);
+}
+
+/* check if VTEP is already part of the list */
+static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_find(
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip)
+{
+ struct listnode *node = NULL;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) {
+ if (evi_vtep->vtep_ip.s_addr == vtep_ip.s_addr)
+ return evi_vtep;
+ }
+ return NULL;
+}
+
+/* A VTEP can be added as "active" attach to an ES if EAD-per-ES and
+ * EAD-per-EVI routes are rxed from it.
+ */
+static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp,
+ struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ bool old_active;
+ bool new_active;
+ uint32_t ead_activity_flags;
+
+ old_active = CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+
+ if (bgp_mh_info->ead_evi_rx)
+ /* Both EAD-per-ES and EAD-per-EVI routes must be rxed from a PE
+ * before it can be activated.
+ */
+ ead_activity_flags = BGP_EVPN_EVI_VTEP_EAD;
+ else
+ /* EAD-per-ES is sufficent to activate the PE */
+ ead_activity_flags = BGP_EVPN_EVI_VTEP_EAD_PER_ES;
+
+ if ((evi_vtep->flags & ead_activity_flags) == ead_activity_flags)
+ SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+ else
+ UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+
+ new_active = CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+
+ if (old_active == new_active)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s evi %u vtep %pI4 %s",
+ evi_vtep->es_evi->es->esi_str,
+ evi_vtep->es_evi->vpn->vni, &evi_vtep->vtep_ip,
+ new_active ? "active" : "inactive");
+
+ /* add VTEP to parent es */
+ if (new_active)
+ evi_vtep->es_vtep = bgp_evpn_es_vtep_add(
+ bgp, evi_vtep->es_evi->es, evi_vtep->vtep_ip,
+ false /*esr*/, 0, 0);
+ else {
+ if (evi_vtep->es_vtep) {
+ bgp_evpn_es_vtep_do_del(bgp, evi_vtep->es_vtep,
+ false /*esr*/);
+ evi_vtep->es_vtep = NULL;
+ }
+ }
+ /* queue up the parent es for background consistency checks */
+ bgp_evpn_es_cons_checks_pend_add(evi_vtep->es_evi->es);
+}
+
+static void bgp_evpn_es_evi_vtep_add(struct bgp *bgp,
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip,
+ bool ead_es)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip);
+
+ if (!evi_vtep)
+ evi_vtep = bgp_evpn_es_evi_vtep_new(es_evi, vtep_ip);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add es %s evi %u vtep %pI4 %s",
+ evi_vtep->es_evi->es->esi_str,
+ evi_vtep->es_evi->vpn->vni, &evi_vtep->vtep_ip,
+ ead_es ? "ead_es" : "ead_evi");
+
+ if (ead_es)
+ SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES);
+ else
+ SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI);
+
+ bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep);
+}
+
+static void bgp_evpn_es_evi_vtep_del(struct bgp *bgp,
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip,
+ bool ead_es)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip);
+ if (!evi_vtep)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del es %s evi %u vtep %pI4 %s",
+ evi_vtep->es_evi->es->esi_str,
+ evi_vtep->es_evi->vpn->vni, &evi_vtep->vtep_ip,
+ ead_es ? "ead_es" : "ead_evi");
+
+ if (ead_es)
+ UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES);
+ else
+ UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI);
+
+ bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep);
+ bgp_evpn_es_evi_vtep_free(evi_vtep);
+}
+
+/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */
+static int bgp_es_evi_rb_cmp(const struct bgp_evpn_es_evi *es_evi1,
+ const struct bgp_evpn_es_evi *es_evi2)
+{
+ return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES);
+}
+RB_GENERATE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp);
+
+/* find the ES-EVI in the per-L2-VNI RB tree */
+static struct bgp_evpn_es_evi *bgp_evpn_es_evi_find(struct bgp_evpn_es *es,
+ struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi es_evi;
+
+ es_evi.es = es;
+
+ return RB_FIND(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, &es_evi);
+}
+
+/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES
+ * tables.
+ */
+static struct bgp_evpn_es_evi *bgp_evpn_es_evi_new(struct bgp_evpn_es *es,
+ struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi *es_evi;
+
+ es_evi = XCALLOC(MTYPE_BGP_EVPN_ES_EVI, sizeof(*es_evi));
+
+ es_evi->es = es;
+ es_evi->vpn = vpn;
+
+ /* Initialise the VTEP list */
+ es_evi->es_evi_vtep_list = list_new();
+ listset_app_node_mem(es_evi->es_evi_vtep_list);
+ es_evi->es_evi_vtep_list->cmp = bgp_evpn_es_evi_vtep_cmp;
+
+ /* insert into the VNI-ESI rb tree */
+ RB_INSERT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi);
+
+ /* add to the ES's VNI list */
+ listnode_init(&es_evi->es_listnode, es_evi);
+ listnode_add(es->es_evi_list, &es_evi->es_listnode);
+
+ bgp_evpn_es_vrf_ref(es_evi, vpn->bgp_vrf);
+
+ return es_evi;
+}
+
+/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free
+ * up the memory.
+ */
+static struct bgp_evpn_es_evi *
+bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es *es = es_evi->es;
+ struct bgpevpn *vpn = es_evi->vpn;
+
+ /* cannot free the element as long as there is a local or remote
+ * reference
+ */
+ if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE))
+ return es_evi;
+ bgp_evpn_es_frag_evi_del(es_evi, false);
+ bgp_evpn_es_vrf_deref(es_evi);
+
+ /* remove from the ES's VNI list */
+ list_delete_node(es->es_evi_list, &es_evi->es_listnode);
+
+ /* remove from the VNI-ESI rb tree */
+ RB_REMOVE(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi);
+
+ /* free the VTEP list */
+ list_delete(&es_evi->es_evi_vtep_list);
+
+ /* remove from the VNI-ESI rb tree */
+ XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi);
+
+ return NULL;
+}
+
+/* init local info associated with the ES-EVI */
+static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgpevpn *vpn = es_evi->vpn;
+
+ if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ return;
+
+ SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL);
+ listnode_init(&es_evi->l2vni_listnode, es_evi);
+ listnode_add(vpn->local_es_evi_list, &es_evi->l2vni_listnode);
+ bgp_evpn_es_frag_evi_add(es_evi);
+}
+
+/* clear any local info associated with the ES-EVI */
+static struct bgp_evpn_es_evi *
+bgp_evpn_es_evi_local_info_clear(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgpevpn *vpn = es_evi->vpn;
+
+ UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL);
+ list_delete_node(vpn->local_es_evi_list, &es_evi->l2vni_listnode);
+
+ return bgp_evpn_es_evi_free(es_evi);
+}
+
+/* eval remote info associated with the ES */
+static void bgp_evpn_es_evi_remote_info_re_eval(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es *es = es_evi->es;
+
+ /* if there are remote VTEPs the ES-EVI is classified as "remote" */
+ if (listcount(es_evi->es_evi_vtep_list)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) {
+ SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE);
+ ++es->remote_es_evi_cnt;
+ /* set remote on the parent es */
+ bgp_evpn_es_remote_info_re_eval(es);
+ }
+ } else {
+ if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) {
+ UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE);
+ if (es->remote_es_evi_cnt)
+ --es->remote_es_evi_cnt;
+ bgp_evpn_es_evi_free(es_evi);
+ /* check if "remote" can be cleared from the
+ * parent es.
+ */
+ bgp_evpn_es_remote_info_re_eval(es);
+ }
+ }
+}
+
+static struct bgp_evpn_es_evi *
+bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi)
+{
+ struct prefix_evpn p;
+ struct bgp_evpn_es *es = es_evi->es;
+ struct bgp *bgp;
+
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ return es_evi;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del local es %s evi %u",
+ es_evi->es->esi_str,
+ es_evi->vpn->vni);
+
+ bgp = bgp_get_evpn();
+
+ /* remove the es_evi from the es_frag before sending the update */
+ bgp_evpn_es_frag_evi_del(es_evi, true);
+ if (bgp) {
+ /* update EAD-ES with new list of VNIs */
+ if (bgp_evpn_local_es_is_active(es))
+ bgp_evpn_ead_es_route_update(bgp, es);
+
+ /* withdraw and delete EAD-EVI */
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+ if (bgp_evpn_ead_evi_route_delete(bgp, es, es_evi->vpn,
+ &p))
+ flog_err(EC_BGP_EVPN_ROUTE_DELETE,
+ "%u: EAD-EVI route deletion failure for ESI %s VNI %u",
+ bgp->vrf_id, es->esi_str,
+ es_evi->vpn->vni);
+ }
+ }
+
+ return bgp_evpn_es_evi_local_info_clear(es_evi);
+}
+
+int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni)
+{
+ struct bgpevpn *vpn;
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ char buf[ESI_STR_LEN];
+
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to deref VNI %d from ESI %s; ES not present",
+ bgp->vrf_id, vni,
+ esi_to_str(esi, buf, sizeof(buf)));
+ return -1;
+ }
+
+ vpn = bgp_evpn_lookup_vni(bgp, vni);
+ if (!vpn) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to deref VNI %d from ESI %s; VNI not present",
+ bgp->vrf_id, vni, es->esi_str);
+ return -1;
+ }
+
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+ if (!es_evi) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to deref VNI %d from ESI %s; ES-VNI not present",
+ bgp->vrf_id, vni, es->esi_str);
+ return -1;
+ }
+
+ bgp_evpn_local_es_evi_do_del(es_evi);
+ return 0;
+}
+
+/* Create ES-EVI and advertise the corresponding EAD routes */
+int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni)
+{
+ struct bgpevpn *vpn;
+ struct prefix_evpn p;
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ char buf[ESI_STR_LEN];
+
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to associate VNI %d with ESI %s; ES not present",
+ bgp->vrf_id, vni,
+ esi_to_str(esi, buf, sizeof(buf)));
+ return -1;
+ }
+
+ vpn = bgp_evpn_lookup_vni(bgp, vni);
+ if (!vpn) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to associate VNI %d with ESI %s; VNI not present",
+ bgp->vrf_id, vni, es->esi_str);
+ return -1;
+ }
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add local es %s evi %u",
+ es->esi_str, vni);
+
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+
+ if (es_evi) {
+ if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ /* dup */
+ return 0;
+ } else
+ es_evi = bgp_evpn_es_evi_new(es, vpn);
+
+ bgp_evpn_es_evi_local_info_set(es_evi);
+
+ /* generate an EAD-EVI for this new VNI */
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, &es->esi,
+ es->originator_ip);
+ bgp_evpn_ead_evi_route_update(bgp, es, vpn, &p);
+ }
+
+ /* update EAD-ES */
+ if (bgp_evpn_local_es_is_active(es))
+ bgp_evpn_ead_es_route_update(bgp, es);
+
+ return 0;
+}
+
+/* Add remote ES-EVI entry. This is actually the remote VTEP add and the
+ * ES-EVI is implicity created on first VTEP's reference.
+ */
+int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn,
+ const struct prefix_evpn *p)
+{
+ char buf[ESI_STR_LEN];
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ bool ead_es;
+ const esi_t *esi = &p->prefix.ead_addr.esi;
+
+ if (!vpn)
+ /* local EAD-ES need not be sent back to zebra */
+ return 0;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add remote %s es %s evi %u vtep %pI4",
+ p->prefix.ead_addr.eth_tag ? "ead-es" : "ead-evi",
+ esi_to_str(esi, buf, sizeof(buf)), vpn->vni,
+ &p->prefix.ead_addr.ip.ipaddr_v4);
+
+ es = bgp_evpn_es_find(esi);
+ if (!es)
+ es = bgp_evpn_es_new(bgp, esi);
+
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+ if (!es_evi)
+ es_evi = bgp_evpn_es_evi_new(es, vpn);
+
+ ead_es = !!p->prefix.ead_addr.eth_tag;
+ bgp_evpn_es_evi_vtep_add(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4,
+ ead_es);
+
+ bgp_evpn_es_evi_remote_info_re_eval(es_evi);
+ return 0;
+}
+
+/* A remote VTEP has withdrawn. The es-evi-vtep will be deleted and the
+ * parent es-evi freed up implicitly in last VTEP's deref.
+ */
+int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn,
+ const struct prefix_evpn *p)
+{
+ char buf[ESI_STR_LEN];
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ bool ead_es;
+
+ if (!vpn)
+ /* local EAD-ES need not be sent back to zebra */
+ return 0;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug(
+ "del remote %s es %s evi %u vtep %pI4",
+ p->prefix.ead_addr.eth_tag ? "ead-es" : "ead-evi",
+ esi_to_str(&p->prefix.ead_addr.esi, buf, sizeof(buf)),
+ vpn->vni, &p->prefix.ead_addr.ip.ipaddr_v4);
+
+ es = bgp_evpn_es_find(&p->prefix.ead_addr.esi);
+ if (!es) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug(
+ "del remote %s es %s evi %u vtep %pI4, NO es",
+ p->prefix.ead_addr.eth_tag ? "ead-es"
+ : "ead-evi",
+ esi_to_str(&p->prefix.ead_addr.esi, buf,
+ sizeof(buf)),
+ vpn->vni, &p->prefix.ead_addr.ip.ipaddr_v4);
+ return 0;
+ }
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+ if (!es_evi) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug(
+ "del remote %s es %s evi %u vtep %pI4, NO es-evi",
+ p->prefix.ead_addr.eth_tag ? "ead-es"
+ : "ead-evi",
+ esi_to_str(&p->prefix.ead_addr.esi, buf,
+ sizeof(buf)),
+ vpn->vni,
+ &p->prefix.ead_addr.ip.ipaddr_v4);
+ return 0;
+ }
+
+ ead_es = !!p->prefix.ead_addr.eth_tag;
+ bgp_evpn_es_evi_vtep_del(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4,
+ ead_es);
+ bgp_evpn_es_evi_remote_info_re_eval(es_evi);
+ return 0;
+}
+
+/* If a VNI is being deleted we need to force del all remote VTEPs */
+static void bgp_evpn_remote_es_evi_flush(struct bgp_evpn_es_evi *es_evi)
+{
+ struct listnode *node = NULL;
+ struct listnode *nnode = NULL;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+ struct bgp *bgp;
+
+ bgp = bgp_get_evpn();
+ if (!bgp)
+ return;
+
+ /* delete all VTEPs */
+ for (ALL_LIST_ELEMENTS(es_evi->es_evi_vtep_list, node, nnode,
+ evi_vtep)) {
+ evi_vtep->flags &= ~(BGP_EVPN_EVI_VTEP_EAD_PER_ES
+ | BGP_EVPN_EVI_VTEP_EAD_PER_EVI);
+ bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep);
+ bgp_evpn_es_evi_vtep_free(evi_vtep);
+ }
+ /* delete the EVI */
+ bgp_evpn_es_evi_remote_info_re_eval(es_evi);
+}
+
+/* Initialize the ES tables maintained per-L2_VNI */
+void bgp_evpn_vni_es_init(struct bgpevpn *vpn)
+{
+ /* Initialize the ES-EVI RB tree */
+ RB_INIT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree);
+
+ /* Initialize the local list maintained for quick walks by type */
+ vpn->local_es_evi_list = list_new();
+ listset_app_node_mem(vpn->local_es_evi_list);
+}
+
+/* Cleanup the ES info maintained per-L2_VNI */
+void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi *es_evi;
+ struct bgp_evpn_es_evi *es_evi_next;
+
+ RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head,
+ &vpn->es_evi_rb_tree, es_evi_next) {
+ es_evi = bgp_evpn_local_es_evi_do_del(es_evi);
+ if (es_evi)
+ bgp_evpn_remote_es_evi_flush(es_evi);
+ }
+
+ list_delete(&vpn->local_es_evi_list);
+}
+
+static char *bgp_evpn_es_evi_vteps_str(char *vtep_str,
+ struct bgp_evpn_es_evi *es_evi,
+ uint8_t vtep_str_size)
+{
+ char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ];
+ struct listnode *node;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+ bool first = true;
+ char ip_buf[INET6_ADDRSTRLEN];
+
+ vtep_str[0] = '\0';
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) {
+ vtep_flag_str[0] = '\0';
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES)
+ strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str));
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI)
+ strlcat(vtep_flag_str, "V", sizeof(vtep_flag_str));
+
+ if (!strnlen(vtep_flag_str, sizeof(vtep_flag_str)))
+ strlcpy(vtep_flag_str, "-", sizeof(vtep_flag_str));
+ if (first)
+ first = false;
+ else
+ strlcat(vtep_str, ",", vtep_str_size);
+ strlcat(vtep_str,
+ inet_ntop(AF_INET, &evi_vtep->vtep_ip, ip_buf,
+ sizeof(ip_buf)),
+ vtep_str_size);
+ strlcat(vtep_str, "(", vtep_str_size);
+ strlcat(vtep_str, vtep_flag_str, vtep_str_size);
+ strlcat(vtep_str, ")", vtep_str_size);
+ }
+
+ return vtep_str;
+}
+
+static void bgp_evpn_es_evi_json_vtep_fill(json_object *json_vteps,
+ struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ json_object *json_vtep_entry;
+ json_object *json_flags;
+
+ json_vtep_entry = json_object_new_object();
+
+ json_object_string_addf(json_vtep_entry, "vtep_ip", "%pI4",
+ &evi_vtep->vtep_ip);
+ if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD_PER_ES |
+ BGP_EVPN_EVI_VTEP_EAD_PER_EVI)) {
+ json_flags = json_object_new_array();
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES)
+ json_array_string_add(json_flags, "ead-per-es");
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI)
+ json_array_string_add(json_flags, "ead-per-evi");
+ json_object_object_add(json_vtep_entry,
+ "flags", json_flags);
+ }
+
+ json_object_array_add(json_vteps,
+ json_vtep_entry);
+}
+
+static void bgp_evpn_es_evi_show_entry(struct vty *vty,
+ struct bgp_evpn_es_evi *es_evi, json_object *json)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ if (json) {
+ json_object *json_vteps;
+ json_object *json_types;
+
+ json_object_string_add(json, "esi", es_evi->es->esi_str);
+ json_object_int_add(json, "vni", es_evi->vpn->vni);
+
+ if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL |
+ BGP_EVPNES_EVI_REMOTE)) {
+ json_types = json_object_new_array();
+ if (es_evi->flags & BGP_EVPNES_EVI_LOCAL)
+ json_array_string_add(json_types, "local");
+ if (es_evi->flags & BGP_EVPNES_EVI_REMOTE)
+ json_array_string_add(json_types, "remote");
+ json_object_object_add(json, "type", json_types);
+ }
+
+ if (listcount(es_evi->es_evi_vtep_list)) {
+ json_vteps = json_object_new_array();
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list,
+ node, evi_vtep)) {
+ bgp_evpn_es_evi_json_vtep_fill(json_vteps,
+ evi_vtep);
+ }
+ json_object_object_add(json, "vteps", json_vteps);
+ }
+ } else {
+ char type_str[4];
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+
+ type_str[0] = '\0';
+ if (es_evi->flags & BGP_EVPNES_EVI_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es_evi->flags & BGP_EVPNES_EVI_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+ if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST)
+ strlcat(type_str, "I", sizeof(type_str));
+
+ bgp_evpn_es_evi_vteps_str(vtep_str, es_evi, sizeof(vtep_str));
+
+ vty_out(vty, "%-8d %-30s %-5s %s\n",
+ es_evi->vpn->vni, es_evi->es->esi_str,
+ type_str, vtep_str);
+ }
+}
+
+static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty,
+ struct bgp_evpn_es_evi *es_evi, json_object *json)
+{
+ if (json) {
+ json_object *json_flags;
+
+ /* Add the "brief" info first */
+ bgp_evpn_es_evi_show_entry(vty, es_evi, json);
+ if (es_evi->es_frag)
+ json_object_string_addf(json, "esFragmentRd", "%pRD",
+ &es_evi->es_frag->prd);
+ if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) {
+ json_flags = json_object_new_array();
+ json_array_string_add(json_flags, "es-vtep-mismatch");
+ json_object_object_add(json, "flags", json_flags);
+ }
+ } else {
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+ char type_str[4];
+
+ type_str[0] = '\0';
+ if (es_evi->flags & BGP_EVPNES_EVI_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es_evi->flags & BGP_EVPNES_EVI_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+
+ bgp_evpn_es_evi_vteps_str(vtep_str, es_evi, sizeof(vtep_str));
+ if (!strlen(vtep_str))
+ strlcpy(vtep_str, "-", sizeof(type_str));
+
+ vty_out(vty, "VNI: %d ESI: %s\n",
+ es_evi->vpn->vni, es_evi->es->esi_str);
+ vty_out(vty, " Type: %s\n", type_str);
+ if (es_evi->es_frag)
+ vty_out(vty, " ES fragment RD: %pRD\n",
+ &es_evi->es_frag->prd);
+ vty_out(vty, " Inconsistencies: %s\n",
+ (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) ?
+ "es-vtep-mismatch":"-");
+ vty_out(vty, " VTEPs: %s\n", vtep_str);
+ vty_out(vty, "\n");
+ }
+}
+
+static void bgp_evpn_es_evi_show_one_vni(struct bgpevpn *vpn, struct vty *vty,
+ json_object *json_array, bool detail)
+{
+ struct bgp_evpn_es_evi *es_evi;
+ json_object *json = NULL;
+
+ RB_FOREACH(es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) {
+ if (json_array)
+ /* create a separate json object for each ES */
+ json = json_object_new_object();
+ if (detail)
+ bgp_evpn_es_evi_show_entry_detail(vty, es_evi, json);
+ else
+ bgp_evpn_es_evi_show_entry(vty, es_evi, json);
+ /* add ES to the json array */
+ if (json_array)
+ json_object_array_add(json_array, json);
+ }
+}
+
+struct es_evi_show_ctx {
+ struct vty *vty;
+ json_object *json;
+ int detail;
+};
+
+static void bgp_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket,
+ void *ctxt)
+{
+ struct bgpevpn *vpn = (struct bgpevpn *)bucket->data;
+ struct es_evi_show_ctx *wctx = (struct es_evi_show_ctx *)ctxt;
+
+ bgp_evpn_es_evi_show_one_vni(vpn, wctx->vty, wctx->json, wctx->detail);
+}
+
+/* Display all ES EVIs */
+void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail)
+{
+ json_object *json_array = NULL;
+ struct es_evi_show_ctx wctx;
+ struct bgp *bgp;
+
+ if (uj) {
+ /* create an array of ES-EVIs */
+ json_array = json_object_new_array();
+ }
+
+ wctx.vty = vty;
+ wctx.json = json_array;
+ wctx.detail = detail;
+
+ bgp = bgp_get_evpn();
+
+ if (!json_array && !detail) {
+ vty_out(vty, "Flags: L local, R remote, I inconsistent\n");
+ vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n");
+ vty_out(vty, "%-8s %-30s %-5s %s\n",
+ "VNI", "ESI", "Flags", "VTEPs");
+ }
+
+ if (bgp)
+ hash_iterate(bgp->vnihash,
+ (void (*)(struct hash_bucket *,
+ void *))bgp_evpn_es_evi_show_one_vni_hash_cb,
+ &wctx);
+ if (uj)
+ vty_json(vty, json_array);
+}
+
+/* Display specific ES EVI */
+void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni,
+ bool uj, bool detail)
+{
+ struct bgpevpn *vpn = NULL;
+ json_object *json_array = NULL;
+ struct bgp *bgp;
+
+ if (uj) {
+ /* create an array of ES-EVIs */
+ json_array = json_object_new_array();
+ }
+
+ bgp = bgp_get_evpn();
+ if (bgp)
+ vpn = bgp_evpn_lookup_vni(bgp, vni);
+
+ if (vpn) {
+ if (!json_array && !detail) {
+ vty_out(vty, "Flags: L local, R remote, I inconsistent\n");
+ vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n");
+ vty_out(vty, "%-8s %-30s %-5s %s\n",
+ "VNI", "ESI", "Flags", "VTEPs");
+ }
+
+ bgp_evpn_es_evi_show_one_vni(vpn, vty, json_array, detail);
+ } else {
+ if (!uj)
+ vty_out(vty, "VNI not found\n");
+ }
+
+ if (uj)
+ vty_json(vty, json_array);
+}
+
+/*****************************************************************************
+ * Ethernet Segment Consistency checks
+ * Consistency checking is done to detect misconfig or mis-cabling. When
+ * an inconsistency is detected it is simply logged (and displayed via
+ * show commands) at this point. A more drastic action can be executed (based
+ * on user config) in the future.
+ */
+static void bgp_evpn_es_cons_checks_timer_start(void)
+{
+ if (!bgp_mh_info->consistency_checking || bgp_mh_info->t_cons_check)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("periodic consistency checking started");
+
+ thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, NULL,
+ BGP_EVPN_CONS_CHECK_INTERVAL,
+ &bgp_mh_info->t_cons_check);
+}
+
+/* queue up the es for background consistency checks */
+static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es)
+{
+ if (!bgp_mh_info->consistency_checking)
+ /* consistency checking is not enabled */
+ return;
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND))
+ /* already queued for consistency checking */
+ return;
+
+ /* start the periodic timer for consistency checks if it is not
+ * already running */
+ bgp_evpn_es_cons_checks_timer_start();
+
+ SET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND);
+ listnode_init(&es->pend_es_listnode, es);
+ listnode_add_after(bgp_mh_info->pend_es_list,
+ listtail_unchecked(bgp_mh_info->pend_es_list),
+ &es->pend_es_listnode);
+}
+
+/* pull the ES from the consistency check list */
+static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es)
+{
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND))
+ return;
+
+ UNSET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND);
+ list_delete_node(bgp_mh_info->pend_es_list,
+ &es->pend_es_listnode);
+}
+
+/* Number of active VTEPs associated with the ES-per-EVI */
+static uint32_t bgp_evpn_es_evi_get_active_vtep_cnt(
+ struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+ struct listnode *node;
+ uint32_t vtep_cnt = 0;
+
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) {
+ if (CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE))
+ ++vtep_cnt;
+ }
+
+ return vtep_cnt;
+}
+
+/* Number of active VTEPs associated with the ES */
+static uint32_t bgp_evpn_es_get_active_vtep_cnt(struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ uint32_t vtep_cnt = 0;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE))
+ ++vtep_cnt;
+ }
+
+ return vtep_cnt;
+}
+
+static struct bgp_evpn_es_vtep *bgp_evpn_es_get_next_active_vtep(
+ struct bgp_evpn_es *es, struct bgp_evpn_es_vtep *es_vtep)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *next_es_vtep;
+
+ if (es_vtep)
+ node = listnextnode_unchecked(&es_vtep->es_listnode);
+ else
+ node = listhead(es->es_vtep_list);
+
+ for (; node; node = listnextnode_unchecked(node)) {
+ next_es_vtep = listgetdata(node);
+ if (CHECK_FLAG(next_es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE))
+ return next_es_vtep;
+ }
+
+ return NULL;
+}
+
+static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_get_next_active_vtep(
+ struct bgp_evpn_es_evi *es_evi,
+ struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_evi_vtep *next_evi_vtep;
+
+ if (evi_vtep)
+ node = listnextnode_unchecked(&evi_vtep->es_evi_listnode);
+ else
+ node = listhead(es_evi->es_evi_vtep_list);
+
+ for (; node; node = listnextnode_unchecked(node)) {
+ next_evi_vtep = listgetdata(node);
+ if (CHECK_FLAG(next_evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE))
+ return next_evi_vtep;
+ }
+
+ return NULL;
+}
+
+static void bgp_evpn_es_evi_set_inconsistent(struct bgp_evpn_es_evi *es_evi)
+{
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST)) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("inconsistency detected - es %s evi %u vtep list mismatch",
+ es_evi->es->esi_str,
+ es_evi->vpn->vni);
+ SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST);
+
+ /* update parent ES with the incosistency setting */
+ if (!es_evi->es->incons_evi_vtep_cnt &&
+ BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("inconsistency detected - es %s vtep list mismatch",
+ es_evi->es->esi_str);
+ ++es_evi->es->incons_evi_vtep_cnt;
+ SET_FLAG(es_evi->es->inconsistencies,
+ BGP_EVPNES_INCONS_VTEP_LIST);
+ }
+}
+
+static uint32_t bgp_evpn_es_run_consistency_checks(struct bgp_evpn_es *es)
+{
+ int proc_cnt = 0;
+ int es_active_vtep_cnt;
+ int evi_active_vtep_cnt;
+ struct bgp_evpn_es_evi *es_evi;
+ struct listnode *evi_node;
+ struct bgp_evpn_es_vtep *es_vtep;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ /* reset the inconsistencies and re-evaluate */
+ es->incons_evi_vtep_cnt = 0;
+ es->inconsistencies = 0;
+
+ es_active_vtep_cnt = bgp_evpn_es_get_active_vtep_cnt(es);
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list,
+ evi_node, es_evi)) {
+ ++proc_cnt;
+
+ /* reset the inconsistencies on the EVI and re-evaluate*/
+ UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST);
+
+ evi_active_vtep_cnt =
+ bgp_evpn_es_evi_get_active_vtep_cnt(es_evi);
+ if (es_active_vtep_cnt != evi_active_vtep_cnt) {
+ bgp_evpn_es_evi_set_inconsistent(es_evi);
+ continue;
+ }
+
+ if (!es_active_vtep_cnt)
+ continue;
+
+ es_vtep = NULL;
+ evi_vtep = NULL;
+ while ((es_vtep = bgp_evpn_es_get_next_active_vtep(
+ es, es_vtep))) {
+ evi_vtep = bgp_evpn_es_evi_get_next_active_vtep(es_evi,
+ evi_vtep);
+ if (!evi_vtep) {
+ bgp_evpn_es_evi_set_inconsistent(es_evi);
+ break;
+ }
+ if (es_vtep->vtep_ip.s_addr !=
+ evi_vtep->vtep_ip.s_addr) {
+ /* inconsistency detected; set it and move
+ * to the next evi
+ */
+ bgp_evpn_es_evi_set_inconsistent(es_evi);
+ break;
+ }
+ }
+ }
+
+ return proc_cnt;
+}
+
+static void bgp_evpn_run_consistency_checks(struct thread *t)
+{
+ int proc_cnt = 0;
+ struct listnode *node;
+ struct listnode *nextnode;
+ struct bgp_evpn_es *es;
+
+ for (ALL_LIST_ELEMENTS(bgp_mh_info->pend_es_list,
+ node, nextnode, es)) {
+ ++proc_cnt;
+ /* run consistency checks on the ES and remove it from the
+ * pending list
+ */
+ proc_cnt += bgp_evpn_es_run_consistency_checks(es);
+ bgp_evpn_es_cons_checks_pend_del(es);
+ if (proc_cnt > 500)
+ break;
+ }
+
+ /* restart the timer */
+ thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, NULL,
+ BGP_EVPN_CONS_CHECK_INTERVAL,
+ &bgp_mh_info->t_cons_check);
+}
+
+/*****************************************************************************
+ * EVPN-Nexthop and RMAC management: nexthops associated with Type-2 routes
+ * that have an ES as destination are consolidated by BGP into a per-VRF
+ * nh->rmac mapping which is sent to zebra. Zebra installs the nexthop
+ * as a remote neigh/fdb entry with a dummy (type-1) prefix referencing it.
+ *
+ * This handling is needed because Type-2 routes with ES as dest use NHG
+ * that is setup using EAD routes (i.e. such NHGs do not include the
+ * RMAC info).
+ ****************************************************************************/
+static void bgp_evpn_nh_zebra_update_send(struct bgp_evpn_nh *nh, bool add)
+{
+ struct stream *s;
+ struct bgp *bgp_vrf = nh->bgp_vrf;
+
+ /* Check socket. */
+ if (!zclient || zclient->sock < 0)
+ return;
+
+ /* Don't try to register if Zebra doesn't know of this instance. */
+ if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp_vrf)) {
+ if (BGP_DEBUG(zebra, ZEBRA))
+ zlog_debug("No zebra instance, not %s remote nh %s",
+ add ? "adding" : "deleting", nh->nh_str);
+ return;
+ }
+
+ s = zclient->obuf;
+ stream_reset(s);
+
+ zclient_create_header(
+ s, add ? ZEBRA_EVPN_REMOTE_NH_ADD : ZEBRA_EVPN_REMOTE_NH_DEL,
+ bgp_vrf->vrf_id);
+ stream_putl(s, bgp_vrf->vrf_id);
+ stream_put(s, &nh->ip, sizeof(nh->ip));
+ if (add)
+ stream_put(s, &nh->rmac, sizeof(nh->rmac));
+
+ stream_putw_at(s, 0, stream_get_endp(s));
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) {
+ if (add)
+ zlog_debug("evpn vrf %s nh %s rmac %pEA add to zebra",
+ nh->bgp_vrf->name, nh->nh_str, &nh->rmac);
+ else if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh %s del to zebra",
+ nh->bgp_vrf->name, nh->nh_str);
+ }
+
+ frrtrace(2, frr_bgp, evpn_mh_nh_rmac_zsend, add, nh);
+
+ zclient_send_message(zclient);
+}
+
+static void bgp_evpn_nh_zebra_update(struct bgp_evpn_nh *nh, bool add)
+{
+ if (add && !is_zero_mac(&nh->rmac)) {
+ nh->flags |= BGP_EVPN_NH_READY_FOR_ZEBRA;
+ bgp_evpn_nh_zebra_update_send(nh, true);
+ } else {
+ if (!(nh->flags & BGP_EVPN_NH_READY_FOR_ZEBRA))
+ return;
+ nh->flags &= ~BGP_EVPN_NH_READY_FOR_ZEBRA;
+ bgp_evpn_nh_zebra_update_send(nh, false);
+ }
+}
+
+static void *bgp_evpn_nh_alloc(void *p)
+{
+ struct bgp_evpn_nh *tmp_n = p;
+ struct bgp_evpn_nh *n;
+
+ n = XCALLOC(MTYPE_BGP_EVPN_NH, sizeof(struct bgp_evpn_nh));
+ *n = *tmp_n;
+
+ return ((void *)n);
+}
+
+static struct bgp_evpn_nh *bgp_evpn_nh_find(struct bgp *bgp_vrf,
+ struct ipaddr *ip)
+{
+ struct bgp_evpn_nh tmp;
+ struct bgp_evpn_nh *n;
+
+ memset(&tmp, 0, sizeof(tmp));
+ memcpy(&tmp.ip, ip, sizeof(struct ipaddr));
+ n = hash_lookup(bgp_vrf->evpn_nh_table, &tmp);
+
+ return n;
+}
+
+/* Add nexthop entry - implicitly created on first path reference */
+static struct bgp_evpn_nh *bgp_evpn_nh_add(struct bgp *bgp_vrf,
+ struct ipaddr *ip,
+ struct bgp_path_info *pi)
+{
+ struct bgp_evpn_nh tmp_n;
+ struct bgp_evpn_nh *n = NULL;
+
+ memset(&tmp_n, 0, sizeof(tmp_n));
+ memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr));
+ n = hash_get(bgp_vrf->evpn_nh_table, &tmp_n, bgp_evpn_nh_alloc);
+ ipaddr2str(ip, n->nh_str, sizeof(n->nh_str));
+ n->bgp_vrf = bgp_vrf;
+
+ n->pi_list = list_new();
+ listset_app_node_mem(n->pi_list);
+
+ /* Setup ref_pi when the nh is created */
+ if (CHECK_FLAG(pi->flags, BGP_PATH_VALID) && pi->attr) {
+ n->ref_pi = pi;
+ memcpy(&n->rmac, &pi->attr->rmac, ETH_ALEN);
+ }
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh %s rmac %pEA add", n->bgp_vrf->name,
+ n->nh_str, &n->rmac);
+ bgp_evpn_nh_zebra_update(n, true);
+ return n;
+}
+
+/* Delete nexthop entry if there are no paths referencing it */
+static void bgp_evpn_nh_del(struct bgp_evpn_nh *n)
+{
+ struct bgp_evpn_nh *tmp_n;
+ struct bgp *bgp_vrf = n->bgp_vrf;
+
+ if (listcount(n->pi_list))
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh %s del to zebra", bgp_vrf->name,
+ n->nh_str);
+
+ bgp_evpn_nh_zebra_update(n, false);
+ list_delete(&n->pi_list);
+ tmp_n = hash_release(bgp_vrf->evpn_nh_table, n);
+ XFREE(MTYPE_BGP_EVPN_NH, tmp_n);
+}
+
+static void hash_evpn_nh_free(struct bgp_evpn_nh *ben)
+{
+ XFREE(MTYPE_BGP_EVPN_NH, ben);
+}
+
+static unsigned int bgp_evpn_nh_hash_keymake(const void *p)
+{
+ const struct bgp_evpn_nh *n = p;
+ const struct ipaddr *ip = &n->ip;
+
+ if (IS_IPADDR_V4(ip))
+ return jhash_1word(ip->ipaddr_v4.s_addr, 0);
+
+ return jhash2(ip->ipaddr_v6.s6_addr32,
+ array_size(ip->ipaddr_v6.s6_addr32), 0);
+}
+
+static bool bgp_evpn_nh_cmp(const void *p1, const void *p2)
+{
+ const struct bgp_evpn_nh *n1 = p1;
+ const struct bgp_evpn_nh *n2 = p2;
+
+ if (n1 == NULL && n2 == NULL)
+ return true;
+
+ if (n1 == NULL || n2 == NULL)
+ return false;
+
+ return (ipaddr_cmp(&n1->ip, &n2->ip) == 0);
+}
+
+void bgp_evpn_nh_init(struct bgp *bgp_vrf)
+{
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh init", bgp_vrf->name);
+ bgp_vrf->evpn_nh_table = hash_create(
+ bgp_evpn_nh_hash_keymake, bgp_evpn_nh_cmp, "BGP EVPN NH table");
+}
+
+static void bgp_evpn_nh_flush_entry(struct bgp_evpn_nh *nh)
+{
+ struct listnode *node;
+ struct listnode *nnode;
+ struct bgp_path_evpn_nh_info *nh_info;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh %s flush", nh->bgp_vrf->name,
+ nh->nh_str);
+
+ /* force flush paths */
+ for (ALL_LIST_ELEMENTS(nh->pi_list, node, nnode, nh_info))
+ bgp_evpn_path_nh_del(nh->bgp_vrf, nh_info->pi);
+}
+
+static void bgp_evpn_nh_flush_cb(struct hash_bucket *bucket, void *ctxt)
+{
+ struct bgp_evpn_nh *nh = (struct bgp_evpn_nh *)bucket->data;
+
+ bgp_evpn_nh_flush_entry(nh);
+}
+
+void bgp_evpn_nh_finish(struct bgp *bgp_vrf)
+{
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh finish", bgp_vrf->name);
+ hash_iterate(
+ bgp_vrf->evpn_nh_table,
+ (void (*)(struct hash_bucket *, void *))bgp_evpn_nh_flush_cb,
+ NULL);
+ hash_clean(bgp_vrf->evpn_nh_table, (void (*)(void *))hash_evpn_nh_free);
+ hash_free(bgp_vrf->evpn_nh_table);
+ bgp_vrf->evpn_nh_table = NULL;
+}
+
+static void bgp_evpn_nh_update_ref_pi(struct bgp_evpn_nh *nh)
+{
+ struct listnode *node;
+ struct bgp_path_info *pi;
+ struct bgp_path_evpn_nh_info *nh_info;
+
+ if (nh->ref_pi)
+ return;
+
+ for (ALL_LIST_ELEMENTS_RO(nh->pi_list, node, nh_info)) {
+ pi = nh_info->pi;
+ if (!CHECK_FLAG(pi->flags, BGP_PATH_VALID) || !pi->attr)
+ continue;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh %s ref_pi update",
+ nh->bgp_vrf->name, nh->nh_str);
+ nh->ref_pi = pi;
+ /* If we have a new pi copy rmac from it and update
+ * zebra if the new rmac is different
+ */
+ if (memcmp(&nh->rmac, &nh->ref_pi->attr->rmac, ETH_ALEN)) {
+ memcpy(&nh->rmac, &nh->ref_pi->attr->rmac, ETH_ALEN);
+ bgp_evpn_nh_zebra_update(nh, true);
+ }
+ break;
+ }
+}
+
+static void bgp_evpn_nh_clear_ref_pi(struct bgp_evpn_nh *nh,
+ struct bgp_path_info *pi)
+{
+ if (nh->ref_pi != pi)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("evpn vrf %s nh %s ref_pi clear", nh->bgp_vrf->name,
+ nh->nh_str);
+ nh->ref_pi = NULL;
+ /* try to find another ref_pi */
+ bgp_evpn_nh_update_ref_pi(nh);
+ /* couldn't find one - clear the old rmac and notify zebra */
+ if (!nh->ref_pi) {
+ memset(&nh->rmac, 0, ETH_ALEN);
+ bgp_evpn_nh_zebra_update(nh, true);
+ }
+}
+
+static void bgp_evpn_path_nh_info_free(struct bgp_path_evpn_nh_info *nh_info)
+{
+ bgp_evpn_path_nh_unlink(nh_info);
+ XFREE(MTYPE_BGP_EVPN_PATH_NH_INFO, nh_info);
+}
+
+static struct bgp_path_evpn_nh_info *
+bgp_evpn_path_nh_info_new(struct bgp_path_info *pi)
+{
+ struct bgp_path_info_extra *e;
+ struct bgp_path_mh_info *mh_info;
+ struct bgp_path_evpn_nh_info *nh_info;
+
+ e = bgp_path_info_extra_get(pi);
+
+ /* If mh_info doesn't exist allocate it */
+ mh_info = e->mh_info;
+ if (!mh_info)
+ e->mh_info = mh_info = XCALLOC(MTYPE_BGP_EVPN_PATH_MH_INFO,
+ sizeof(struct bgp_path_mh_info));
+
+ /* If nh_info doesn't exist allocate it */
+ nh_info = mh_info->nh_info;
+ if (!nh_info) {
+ mh_info->nh_info = nh_info =
+ XCALLOC(MTYPE_BGP_EVPN_PATH_NH_INFO,
+ sizeof(struct bgp_path_evpn_nh_info));
+ nh_info->pi = pi;
+ }
+
+ return nh_info;
+}
+
+static void bgp_evpn_path_nh_unlink(struct bgp_path_evpn_nh_info *nh_info)
+{
+ struct bgp_evpn_nh *nh = nh_info->nh;
+ struct bgp_path_info *pi;
+ char prefix_buf[PREFIX_STRLEN];
+
+ if (!nh)
+ return;
+
+ pi = nh_info->pi;
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("path %s unlinked from nh %s %s",
+ pi->net ? prefix2str(&pi->net->p, prefix_buf,
+ sizeof(prefix_buf))
+ : "",
+ nh->bgp_vrf->name, nh->nh_str);
+
+ list_delete_node(nh->pi_list, &nh_info->nh_listnode);
+
+ nh_info->nh = NULL;
+
+ /* check if the ref_pi need to be updated */
+ bgp_evpn_nh_clear_ref_pi(nh, pi);
+
+ /* if there are no other references against the nh it
+ * needs to be freed
+ */
+ bgp_evpn_nh_del(nh);
+
+ /* Note we don't free the path nh_info on unlink; it will be freed up
+ * along with the path.
+ */
+}
+
+static void bgp_evpn_path_nh_link(struct bgp *bgp_vrf, struct bgp_path_info *pi)
+{
+ struct bgp_path_evpn_nh_info *nh_info;
+ struct bgp_evpn_nh *nh;
+ struct ipaddr ip;
+
+ /* EVPN nexthop setup in bgp has been turned off */
+ if (!bgp_mh_info->bgp_evpn_nh_setup)
+ return;
+
+ if (!bgp_vrf->evpn_nh_table) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("path %pFX linked to vrf %s failed",
+ &pi->net->p, bgp_vrf->name);
+ return;
+ }
+
+ nh_info = (pi->extra && pi->extra->mh_info)
+ ? pi->extra->mh_info->nh_info
+ : NULL;
+
+ /* if NHG is not being used for this path we don't need to manage the
+ * nexthops in bgp (they are managed by zebra instead)
+ */
+ if (!(pi->attr->es_flags & ATTR_ES_L3_NHG_USE)) {
+ if (nh_info)
+ bgp_evpn_path_nh_unlink(nh_info);
+ return;
+ }
+
+ /* setup nh_info against the path if it doesn't aleady exist */
+ if (!nh_info)
+ nh_info = bgp_evpn_path_nh_info_new(pi);
+
+ /* find-create nh */
+ memset(&ip, 0, sizeof(ip));
+ if (pi->net->p.family == AF_INET6) {
+ SET_IPADDR_V6(&ip);
+ memcpy(&ip.ipaddr_v6, &pi->attr->mp_nexthop_global,
+ sizeof(ip.ipaddr_v6));
+ } else {
+ SET_IPADDR_V4(&ip);
+ memcpy(&ip.ipaddr_v4, &pi->attr->nexthop, sizeof(ip.ipaddr_v4));
+ }
+
+ nh = bgp_evpn_nh_find(bgp_vrf, &ip);
+ if (!nh)
+ nh = bgp_evpn_nh_add(bgp_vrf, &ip, pi);
+
+ /* dup check */
+ if (nh_info->nh == nh) {
+ /* Check if any of the paths are now valid */
+ bgp_evpn_nh_update_ref_pi(nh);
+ return;
+ }
+
+ /* unlink old nh if any */
+ bgp_evpn_path_nh_unlink(nh_info);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("path %pFX linked to nh %s %s", &pi->net->p,
+ nh->bgp_vrf->name, nh->nh_str);
+
+ /* link mac-ip path to the new nh */
+ nh_info->nh = nh;
+ listnode_init(&nh_info->nh_listnode, nh_info);
+ listnode_add(nh->pi_list, &nh_info->nh_listnode);
+ /* If a new valid path got linked to the nh see if can get the rmac
+ * from it
+ */
+ bgp_evpn_nh_update_ref_pi(nh);
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) {
+ if (!nh->ref_pi)
+ zlog_debug(
+ "path %pFX linked to nh %s %s with no valid pi",
+ &pi->net->p, nh->bgp_vrf->name, nh->nh_str);
+ }
+}
+
+void bgp_evpn_path_nh_del(struct bgp *bgp_vrf, struct bgp_path_info *pi)
+{
+ struct bgp_path_evpn_nh_info *nh_info;
+
+ nh_info = (pi->extra && pi->extra->mh_info)
+ ? pi->extra->mh_info->nh_info
+ : NULL;
+
+ if (!nh_info)
+ return;
+
+ bgp_evpn_path_nh_unlink(nh_info);
+}
+
+void bgp_evpn_path_nh_add(struct bgp *bgp_vrf, struct bgp_path_info *pi)
+{
+ bgp_evpn_path_nh_link(bgp_vrf, pi);
+}
+
+static void bgp_evpn_nh_show_entry(struct bgp_evpn_nh *nh, struct vty *vty,
+ json_object *json_array)
+{
+ json_object *json = NULL;
+ char mac_buf[ETHER_ADDR_STRLEN];
+ char prefix_buf[PREFIX_STRLEN];
+
+ if (json_array)
+ /* create a separate json object for each ES */
+ json = json_object_new_object();
+
+ prefix_mac2str(&nh->rmac, mac_buf, sizeof(mac_buf));
+ if (nh->ref_pi && nh->ref_pi->net)
+ prefix2str(&nh->ref_pi->net->p, prefix_buf, sizeof(prefix_buf));
+ else
+ prefix_buf[0] = '\0';
+ if (json) {
+ json_object_string_add(json, "vrf", nh->bgp_vrf->name);
+ json_object_string_add(json, "ip", nh->nh_str);
+ json_object_string_add(json, "rmac", mac_buf);
+ json_object_string_add(json, "basePath", prefix_buf);
+ json_object_int_add(json, "pathCount", listcount(nh->pi_list));
+ } else {
+ vty_out(vty, "%-15s %-15s %-17s %-10d %s\n", nh->bgp_vrf->name,
+ nh->nh_str, mac_buf, listcount(nh->pi_list),
+ prefix_buf);
+ }
+
+ /* add ES to the json array */
+ if (json_array)
+ json_object_array_add(json_array, json);
+}
+
+struct nh_show_ctx {
+ struct vty *vty;
+ json_object *json;
+};
+
+static void bgp_evpn_nh_show_hash_cb(struct hash_bucket *bucket, void *ctxt)
+{
+ struct bgp_evpn_nh *nh = (struct bgp_evpn_nh *)bucket->data;
+ struct nh_show_ctx *wctx = (struct nh_show_ctx *)ctxt;
+
+ bgp_evpn_nh_show_entry(nh, wctx->vty, wctx->json);
+}
+
+/* Display all evpn nexthops */
+void bgp_evpn_nh_show(struct vty *vty, bool uj)
+{
+ json_object *json_array = NULL;
+ struct bgp *bgp_vrf;
+ struct listnode *node;
+ struct nh_show_ctx wctx;
+
+ if (uj) {
+ /* create an array of nexthops */
+ json_array = json_object_new_array();
+ } else {
+ vty_out(vty, "%-15s %-15s %-17s %-10s %s\n", "VRF", "IP",
+ "RMAC", "#Paths", "Base Path");
+ }
+
+ wctx.vty = vty;
+ wctx.json = json_array;
+
+ /* walk through all vrfs */
+ for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) {
+ hash_iterate(bgp_vrf->evpn_nh_table,
+ (void (*)(struct hash_bucket *,
+ void *))bgp_evpn_nh_show_hash_cb,
+ &wctx);
+ }
+
+ /* print the array of json-ESs */
+ if (uj)
+ vty_json(vty, json_array);
+}
+
+/*****************************************************************************/
+void bgp_evpn_mh_init(void)
+{
+ bm->mh_info = XCALLOC(MTYPE_BGP_EVPN_MH_INFO, sizeof(*bm->mh_info));
+
+ /* setup ES tables */
+ RB_INIT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree);
+ /* local ES list */
+ bgp_mh_info->local_es_list = list_new();
+ listset_app_node_mem(bgp_mh_info->local_es_list);
+ /* list of ESs with pending processing */
+ bgp_mh_info->pend_es_list = list_new();
+ listset_app_node_mem(bgp_mh_info->pend_es_list);
+
+ bgp_mh_info->ead_evi_rx = BGP_EVPN_MH_EAD_EVI_RX_DEF;
+ bgp_mh_info->ead_evi_tx = BGP_EVPN_MH_EAD_EVI_TX_DEF;
+ bgp_mh_info->ead_es_export_rtl = list_new();
+ bgp_mh_info->ead_es_export_rtl->cmp =
+ (int (*)(void *, void *))bgp_evpn_route_target_cmp;
+ bgp_mh_info->ead_es_export_rtl->del = bgp_evpn_xxport_delete_ecomm;
+
+ /* config knobs - XXX add cli to control it */
+ bgp_mh_info->ead_evi_adv_for_down_links = true;
+ bgp_mh_info->consistency_checking = true;
+ bgp_mh_info->host_routes_use_l3nhg = BGP_EVPN_MH_USE_ES_L3NHG_DEF;
+ bgp_mh_info->suppress_l3_ecomm_on_inactive_es = true;
+ bgp_mh_info->bgp_evpn_nh_setup = true;
+ bgp_mh_info->evi_per_es_frag = BGP_EVPN_MAX_EVI_PER_ES_FRAG;
+
+ memset(&zero_esi_buf, 0, sizeof(esi_t));
+}
+
+void bgp_evpn_mh_finish(void)
+{
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es *es_next;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("evpn mh finish");
+
+ RB_FOREACH_SAFE (es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree,
+ es_next) {
+ bgp_evpn_es_local_info_clear(es, true);
+ }
+ if (bgp_mh_info->t_cons_check)
+ THREAD_OFF(bgp_mh_info->t_cons_check);
+ list_delete(&bgp_mh_info->local_es_list);
+ list_delete(&bgp_mh_info->pend_es_list);
+ list_delete(&bgp_mh_info->ead_es_export_rtl);
+
+ XFREE(MTYPE_BGP_EVPN_MH_INFO, bgp_mh_info);
+}
+
+/* This function is called when disable-ead-evi-rx knob flaps */
+void bgp_evpn_switch_ead_evi_rx(void)
+{
+ struct bgp *bgp;
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ struct listnode *evi_node = NULL;
+ struct listnode *evi_next = NULL;
+ struct bgp_evpn_es_evi_vtep *vtep;
+ struct listnode *vtep_node = NULL;
+ struct listnode *vtep_next = NULL;
+
+ bgp = bgp_get_evpn();
+ if (!bgp)
+ return;
+
+ /*
+ * Process all the remote es_evi_vteps and reevaluate if the es_evi_vtep
+ * is active.
+ */
+ RB_FOREACH(es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) {
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_REMOTE))
+ continue;
+
+ for (ALL_LIST_ELEMENTS(es->es_evi_list, evi_node, evi_next,
+ es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE))
+ continue;
+
+ for (ALL_LIST_ELEMENTS(es_evi->es_evi_vtep_list,
+ vtep_node, vtep_next, vtep))
+ bgp_evpn_es_evi_vtep_re_eval_active(bgp, vtep);
+ }
+ }
+}