summaryrefslogtreecommitdiffstats
path: root/bgpd/rfapi/rfapi_import.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-09 13:16:35 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-09 13:16:35 +0000
commite2bbf175a2184bd76f6c54ccf8456babeb1a46fc (patch)
treef0b76550d6e6f500ada964a3a4ee933a45e5a6f1 /bgpd/rfapi/rfapi_import.c
parentInitial commit. (diff)
downloadfrr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.tar.xz
frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.zip
Adding upstream version 9.1.upstream/9.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'bgpd/rfapi/rfapi_import.c')
-rw-r--r--bgpd/rfapi/rfapi_import.c4818
1 files changed, 4818 insertions, 0 deletions
diff --git a/bgpd/rfapi/rfapi_import.c b/bgpd/rfapi/rfapi_import.c
new file mode 100644
index 0000000..a93e186
--- /dev/null
+++ b/bgpd/rfapi/rfapi_import.c
@@ -0,0 +1,4818 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2009-2016, LabN Consulting, L.L.C.
+ */
+
+/*
+ * File: rfapi_import.c
+ * Purpose: Handle import of routes from BGP to RFAPI
+ */
+
+#include "lib/zebra.h"
+#include "lib/prefix.h"
+#include "lib/agg_table.h"
+#include "lib/vty.h"
+#include "lib/memory.h"
+#include "lib/log.h"
+#include "lib/skiplist.h"
+#include "frrevent.h"
+#include "lib/stream.h"
+#include "lib/lib_errors.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_ecommunity.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */
+#include "bgpd/bgp_vnc_types.h"
+#include "bgpd/bgp_rd.h"
+
+#include "bgpd/rfapi/rfapi.h"
+#include "bgpd/rfapi/bgp_rfapi_cfg.h"
+#include "bgpd/rfapi/rfapi_backend.h"
+#include "bgpd/rfapi/rfapi_import.h"
+#include "bgpd/rfapi/rfapi_private.h"
+#include "bgpd/rfapi/rfapi_monitor.h"
+#include "bgpd/rfapi/rfapi_nve_addr.h"
+#include "bgpd/rfapi/rfapi_vty.h"
+#include "bgpd/rfapi/vnc_export_bgp.h"
+#include "bgpd/rfapi/vnc_export_bgp_p.h"
+#include "bgpd/rfapi/vnc_zebra.h"
+#include "bgpd/rfapi/vnc_import_bgp.h"
+#include "bgpd/rfapi/vnc_import_bgp_p.h"
+#include "bgpd/rfapi/rfapi_rib.h"
+#include "bgpd/rfapi/rfapi_encap_tlv.h"
+#include "bgpd/rfapi/vnc_debug.h"
+
+#ifdef HAVE_GLIBC_BACKTRACE
+/* for backtrace and friends */
+#include <execinfo.h>
+#endif /* HAVE_GLIBC_BACKTRACE */
+
+#undef DEBUG_MONITOR_MOVE_SHORTER
+#undef DEBUG_RETURNED_NHL
+#undef DEBUG_ROUTE_COUNTERS
+#undef DEBUG_ENCAP_MONITOR
+#undef DEBUG_L2_EXTRA
+#undef DEBUG_IT_NODES
+#undef DEBUG_BI_SEARCH
+
+/*
+ * Allocated for each withdraw timer instance; freed when the timer
+ * expires or is canceled
+ */
+struct rfapi_withdraw {
+ struct rfapi_import_table *import_table;
+ struct agg_node *node;
+ struct bgp_path_info *info;
+ safi_t safi; /* used only for bulk operations */
+ /*
+ * For import table node reference count checking (i.e., debugging).
+ * Normally when a timer expires, lockoffset should be 0. However, if
+ * the timer expiration function is called directly (e.g.,
+ * rfapiExpireVpnNow), the node could be locked by a preceding
+ * agg_route_top() or agg_route_next() in a loop, so we need to pass
+ * this value in.
+ */
+ int lockoffset;
+};
+
+/*
+ * DEBUG FUNCTION
+ * It's evil and fiendish. It's compiler-dependent.
+ * ? Might need LDFLAGS -rdynamic to produce all function names
+ */
+void rfapiDebugBacktrace(void)
+{
+#ifdef HAVE_GLIBC_BACKTRACE
+#define RFAPI_DEBUG_BACKTRACE_NENTRIES 200
+ void *buf[RFAPI_DEBUG_BACKTRACE_NENTRIES];
+ char **syms;
+ size_t i;
+ size_t size;
+
+ size = backtrace(buf, RFAPI_DEBUG_BACKTRACE_NENTRIES);
+ syms = backtrace_symbols(buf, size);
+
+ for (i = 0; i < size && i < RFAPI_DEBUG_BACKTRACE_NENTRIES; ++i) {
+ vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i, syms[i]);
+ }
+
+ free(syms);
+#else
+#endif
+}
+
+/*
+ * DEBUG FUNCTION
+ * Count remote routes and compare with actively-maintained values.
+ * Abort if they disagree.
+ */
+void rfapiCheckRouteCount(void)
+{
+ struct bgp *bgp = bgp_get_default();
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+ afi_t afi;
+
+ assert(bgp);
+
+ h = bgp->rfapi;
+ assert(h);
+
+ for (it = h->imports; it; it = it->next) {
+ for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
+
+ struct agg_table *rt;
+ struct agg_node *rn;
+
+ int holddown_count = 0;
+ int imported_count = 0;
+ int remote_count = 0;
+
+ rt = it->imported_vpn[afi];
+
+ for (rn = agg_route_top(rt); rn;
+ rn = agg_route_next(rn)) {
+ struct bgp_path_info *bpi;
+ struct bgp_path_info *next;
+
+ for (bpi = rn->info; bpi; bpi = next) {
+ next = bpi->next;
+
+ if (CHECK_FLAG(bpi->flags,
+ BGP_PATH_REMOVED)) {
+ ++holddown_count;
+
+ } else {
+ if (!RFAPI_LOCAL_BI(bpi)) {
+ if (RFAPI_DIRECT_IMPORT_BI(
+ bpi)) {
+ ++imported_count;
+ } else {
+ ++remote_count;
+ }
+ }
+ }
+ }
+ }
+
+ if (it->holddown_count[afi] != holddown_count) {
+ vnc_zlog_debug_verbose(
+ "%s: it->holddown_count %d != holddown_count %d",
+ __func__, it->holddown_count[afi],
+ holddown_count);
+ assert(0);
+ }
+ if (it->remote_count[afi] != remote_count) {
+ vnc_zlog_debug_verbose(
+ "%s: it->remote_count %d != remote_count %d",
+ __func__, it->remote_count[afi],
+ remote_count);
+ assert(0);
+ }
+ if (it->imported_count[afi] != imported_count) {
+ vnc_zlog_debug_verbose(
+ "%s: it->imported_count %d != imported_count %d",
+ __func__, it->imported_count[afi],
+ imported_count);
+ assert(0);
+ }
+ }
+ }
+}
+
+#ifdef DEBUG_ROUTE_COUNTERS
+#define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0)
+#else
+#define VNC_ITRCCK
+#endif
+
+/*
+ * Validate reference count for a node in an import table
+ *
+ * Normally lockoffset is 0 for nodes in quiescent state. However,
+ * agg_unlock_node will delete the node if it is called when
+ * node->lock == 1, and we have to validate the refcount before
+ * the node is deleted. In this case, we specify lockoffset 1.
+ */
+void rfapiCheckRefcount(struct agg_node *rn, safi_t safi, int lockoffset)
+{
+ unsigned int count_bpi = 0;
+ unsigned int count_monitor = 0;
+ struct bgp_path_info *bpi;
+ struct rfapi_monitor_encap *hme;
+ struct rfapi_monitor_vpn *hmv;
+
+ for (bpi = rn->info; bpi; bpi = bpi->next)
+ ++count_bpi;
+
+
+ if (rn->aggregate) {
+ ++count_monitor; /* rfapi_it_extra */
+
+ switch (safi) {
+ void *cursor;
+ int rc;
+
+ case SAFI_ENCAP:
+ for (hme = RFAPI_MONITOR_ENCAP(rn); hme;
+ hme = hme->next)
+ ++count_monitor;
+ break;
+
+ case SAFI_MPLS_VPN:
+
+ for (hmv = RFAPI_MONITOR_VPN(rn); hmv; hmv = hmv->next)
+ ++count_monitor;
+
+ if (RFAPI_MONITOR_EXTERIOR(rn)->source) {
+ ++count_monitor; /* sl */
+ cursor = NULL;
+ for (rc = skiplist_next(
+ RFAPI_MONITOR_EXTERIOR(rn)->source,
+ NULL, NULL, &cursor);
+ !rc;
+ rc = skiplist_next(
+ RFAPI_MONITOR_EXTERIOR(rn)->source,
+ NULL, NULL, &cursor)) {
+
+ ++count_monitor; /* sl entry */
+ }
+ }
+ break;
+
+ case SAFI_UNSPEC:
+ case SAFI_UNICAST:
+ case SAFI_MULTICAST:
+ case SAFI_EVPN:
+ case SAFI_LABELED_UNICAST:
+ case SAFI_FLOWSPEC:
+ case SAFI_MAX:
+ assert(!"Passed in safi should be impossible");
+ }
+ }
+
+ if (count_bpi + count_monitor + lockoffset
+ != agg_node_get_lock_count(rn)) {
+ vnc_zlog_debug_verbose(
+ "%s: count_bpi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d",
+ __func__, count_bpi, count_monitor, lockoffset,
+ agg_node_get_lock_count(rn));
+ assert(0);
+ }
+}
+
+/*
+ * Perform deferred rfapi_close operations that were queued
+ * during callbacks.
+ */
+static wq_item_status rfapi_deferred_close_workfunc(struct work_queue *q,
+ void *data)
+{
+ struct rfapi_descriptor *rfd = data;
+ struct rfapi *h = q->spec.data;
+
+ assert(!(h->flags & RFAPI_INCALLBACK));
+ rfapi_close(rfd);
+ vnc_zlog_debug_verbose("%s: completed deferred close on handle %p",
+ __func__, rfd);
+ return WQ_SUCCESS;
+}
+
+/*
+ * Extract layer 2 option from Encap TLVS in BGP attrs
+ */
+int rfapiGetL2o(struct attr *attr, struct rfapi_l2address_option *l2o)
+{
+ if (attr) {
+ struct bgp_attr_encap_subtlv *pEncap;
+
+ for (pEncap = bgp_attr_get_vnc_subtlvs(attr); pEncap;
+ pEncap = pEncap->next) {
+
+ if (pEncap->type == BGP_VNC_SUBTLV_TYPE_RFPOPTION) {
+ if (pEncap->value[0]
+ == RFAPI_VN_OPTION_TYPE_L2ADDR) {
+
+ if (pEncap->value[1] == 14) {
+ memcpy(l2o->macaddr.octet,
+ pEncap->value + 2,
+ ETH_ALEN);
+ l2o->label =
+ ((pEncap->value[10]
+ >> 4)
+ & 0x0f)
+ + ((pEncap->value[9]
+ << 4)
+ & 0xff0)
+ + ((pEncap->value[8]
+ << 12)
+ & 0xff000);
+
+ l2o->local_nve_id =
+ pEncap->value[12];
+
+ l2o->logical_net_id =
+ (pEncap->value[15]
+ & 0xff)
+ + ((pEncap->value[14]
+ << 8)
+ & 0xff00)
+ + ((pEncap->value[13]
+ << 16)
+ & 0xff0000);
+ }
+
+ return 0;
+ }
+ }
+ }
+ }
+
+ return ENOENT;
+}
+
+/*
+ * Extract the lifetime from the Tunnel Encap attribute of a route in
+ * an import table
+ */
+int rfapiGetVncLifetime(struct attr *attr, uint32_t *lifetime)
+{
+ struct bgp_attr_encap_subtlv *pEncap;
+
+ *lifetime = RFAPI_INFINITE_LIFETIME; /* default to infinite */
+
+ if (attr) {
+
+ for (pEncap = bgp_attr_get_vnc_subtlvs(attr); pEncap;
+ pEncap = pEncap->next) {
+
+ if (pEncap->type
+ == BGP_VNC_SUBTLV_TYPE_LIFETIME) { /* lifetime */
+ if (pEncap->length == 4) {
+ memcpy(lifetime, pEncap->value, 4);
+ *lifetime = ntohl(*lifetime);
+ return 0;
+ }
+ }
+ }
+ }
+
+ return ENOENT;
+}
+
+/*
+ * Look for UN address in Encap attribute
+ */
+int rfapiGetVncTunnelUnAddr(struct attr *attr, struct prefix *p)
+{
+ struct bgp_attr_encap_subtlv *pEncap;
+ bgp_encap_types tun_type = BGP_ENCAP_TYPE_MPLS;/*Default tunnel type*/
+
+ bgp_attr_extcom_tunnel_type(attr, &tun_type);
+ if (tun_type == BGP_ENCAP_TYPE_MPLS) {
+ if (!p)
+ return 0;
+ /* MPLS carries UN address in next hop */
+ rfapiNexthop2Prefix(attr, p);
+ if (p->family != AF_UNSPEC)
+ return 0;
+
+ return ENOENT;
+ }
+ if (attr) {
+ for (pEncap = attr->encap_subtlvs; pEncap;
+ pEncap = pEncap->next) {
+
+ if (pEncap->type
+ == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT) { /* un
+ addr
+ */
+ switch (pEncap->length) {
+ case 8:
+ if (p) {
+ p->family = AF_INET;
+ p->prefixlen = IPV4_MAX_BITLEN;
+ memcpy(p->u.val, pEncap->value,
+ 4);
+ }
+ return 0;
+
+ case 20:
+ if (p) {
+ p->family = AF_INET6;
+ p->prefixlen = IPV6_MAX_BITLEN;
+ memcpy(p->u.val, pEncap->value,
+ 16);
+ }
+ return 0;
+ }
+ }
+ }
+ }
+
+ return ENOENT;
+}
+
+/*
+ * Get UN address wherever it might be
+ */
+int rfapiGetUnAddrOfVpnBi(struct bgp_path_info *bpi, struct prefix *p)
+{
+ /* If it's in this route's VNC attribute, we're done */
+ if (!rfapiGetVncTunnelUnAddr(bpi->attr, p))
+ return 0;
+ /*
+ * Otherwise, see if it's cached from a corresponding ENCAP SAFI
+ * advertisement
+ */
+ if (bpi->extra) {
+ switch (bpi->extra->vnc.import.un_family) {
+ case AF_INET:
+ if (p) {
+ p->family = bpi->extra->vnc.import.un_family;
+ p->u.prefix4 = bpi->extra->vnc.import.un.addr4;
+ p->prefixlen = IPV4_MAX_BITLEN;
+ }
+ return 0;
+ case AF_INET6:
+ if (p) {
+ p->family = bpi->extra->vnc.import.un_family;
+ p->u.prefix6 = bpi->extra->vnc.import.un.addr6;
+ p->prefixlen = IPV6_MAX_BITLEN;
+ }
+ return 0;
+ default:
+ if (p)
+ p->family = AF_UNSPEC;
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose(
+ "%s: bpi->extra->vnc.import.un_family is 0, no UN addr",
+ __func__);
+#endif
+ break;
+ }
+ }
+
+ return ENOENT;
+}
+
+
+/*
+ * Make a new bgp_path_info from gathered parameters
+ */
+static struct bgp_path_info *rfapiBgpInfoCreate(struct attr *attr,
+ struct peer *peer, void *rfd,
+ struct prefix_rd *prd,
+ uint8_t type, uint8_t sub_type,
+ uint32_t *label)
+{
+ struct bgp_path_info *new;
+
+ new = info_make(type, sub_type, 0, peer, attr, NULL);
+
+ new->attr = bgp_attr_intern(attr);
+
+ bgp_path_info_extra_get(new);
+ if (prd) {
+ new->extra->vnc.import.rd = *prd;
+ new->extra->vnc.import.create_time = monotime(NULL);
+ }
+ if (label)
+ encode_label(*label, &new->extra->label[0]);
+
+ peer_lock(peer);
+
+ return new;
+}
+
+/*
+ * Frees bgp_path_info as used in import tables (parts are not
+ * allocated exactly the way they are in the main RIBs)
+ */
+static void rfapiBgpInfoFree(struct bgp_path_info *goner)
+{
+ if (!goner)
+ return;
+
+ if (goner->peer) {
+ vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d",
+ __func__, goner->peer,
+ goner->peer->lock);
+ peer_unlock(goner->peer);
+ }
+
+ bgp_attr_unintern(&goner->attr);
+
+ if (goner->extra)
+ bgp_path_info_extra_free(&goner->extra);
+ XFREE(MTYPE_BGP_ROUTE, goner);
+}
+
+struct rfapi_import_table *rfapiMacImportTableGetNoAlloc(struct bgp *bgp,
+ uint32_t lni)
+{
+ struct rfapi *h;
+ struct rfapi_import_table *it = NULL;
+ uintptr_t lni_as_ptr = lni;
+
+ h = bgp->rfapi;
+ if (!h)
+ return NULL;
+
+ if (!h->import_mac)
+ return NULL;
+
+ if (skiplist_search(h->import_mac, (void *)lni_as_ptr, (void **)&it))
+ return NULL;
+
+ return it;
+}
+
+struct rfapi_import_table *rfapiMacImportTableGet(struct bgp *bgp, uint32_t lni)
+{
+ struct rfapi *h;
+ struct rfapi_import_table *it = NULL;
+ uintptr_t lni_as_ptr = lni;
+
+ h = bgp->rfapi;
+ assert(h);
+
+ if (!h->import_mac) {
+ /* default cmp is good enough for LNI */
+ h->import_mac = skiplist_new(0, NULL, NULL);
+ }
+
+ if (skiplist_search(h->import_mac, (void *)lni_as_ptr, (void **)&it)) {
+
+ struct ecommunity *enew;
+ struct ecommunity_val eval;
+ afi_t afi;
+
+ it = XCALLOC(MTYPE_RFAPI_IMPORTTABLE,
+ sizeof(struct rfapi_import_table));
+ /* set RT list of new import table based on LNI */
+ memset((char *)&eval, 0, sizeof(eval));
+ eval.val[0] = 0; /* VNC L2VPN */
+ eval.val[1] = 2; /* VNC L2VPN */
+ eval.val[5] = (lni >> 16) & 0xff;
+ eval.val[6] = (lni >> 8) & 0xff;
+ eval.val[7] = (lni >> 0) & 0xff;
+
+ enew = ecommunity_new();
+ ecommunity_add_val(enew, &eval, false, false);
+ it->rt_import_list = enew;
+
+ for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
+ it->imported_vpn[afi] = agg_table_init();
+ it->imported_encap[afi] = agg_table_init();
+ }
+
+ it->l2_logical_net_id = lni;
+
+ skiplist_insert(h->import_mac, (void *)lni_as_ptr, it);
+ }
+
+ assert(it);
+ return it;
+}
+
+/*
+ * Implement MONITOR_MOVE_SHORTER(original_node) from
+ * RFAPI-Import-Event-Handling.txt
+ *
+ * Returns pointer to the list of moved monitors
+ */
+static struct rfapi_monitor_vpn *
+rfapiMonitorMoveShorter(struct agg_node *original_vpn_node, int lockoffset)
+{
+ struct bgp_path_info *bpi;
+ struct agg_node *par;
+ struct rfapi_monitor_vpn *m;
+ struct rfapi_monitor_vpn *mlast;
+ struct rfapi_monitor_vpn *moved;
+ int movecount = 0;
+ int parent_already_refcounted = 0;
+
+ RFAPI_CHECK_REFCOUNT(original_vpn_node, SAFI_MPLS_VPN, lockoffset);
+
+#ifdef DEBUG_MONITOR_MOVE_SHORTER
+ {
+ vnc_zlog_debug_verbose("%s: called with node pfx=%pFX",
+ __func__, &original_vpn_node->p);
+ }
+#endif
+
+ /*
+ * 1. If there is at least one bpi (either regular route or
+ * route marked as withdrawn, with a pending timer) at
+ * original_node with a valid UN address, we're done. Return.
+ */
+ for (bpi = original_vpn_node->info; bpi; bpi = bpi->next) {
+ struct prefix pfx;
+
+ if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) {
+#ifdef DEBUG_MONITOR_MOVE_SHORTER
+ vnc_zlog_debug_verbose(
+ "%s: have valid UN at original node, no change",
+ __func__);
+#endif
+ return NULL;
+ }
+ }
+
+ /*
+ * 2. Travel up the tree (toward less-specific prefixes) from
+ * original_node to find the first node that has at least
+ * one route (even if it is only a withdrawn route) with a
+ * valid UN address. Call this node "Node P."
+ */
+ for (par = agg_node_parent(original_vpn_node); par;
+ par = agg_node_parent(par)) {
+ for (bpi = par->info; bpi; bpi = bpi->next) {
+ struct prefix pfx;
+ if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) {
+ break;
+ }
+ }
+ if (bpi)
+ break;
+ }
+
+ if (par) {
+ RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, 0);
+ }
+
+ /*
+ * If no less-specific routes, try to use the 0/0 node
+ */
+ if (!par) {
+ const struct prefix *p;
+ /* this isn't necessarily 0/0 */
+ par = agg_route_table_top(original_vpn_node);
+
+ if (par)
+ p = agg_node_get_prefix(par);
+ /*
+ * If we got the top node but it wasn't 0/0,
+ * ignore it
+ */
+ if (par && p->prefixlen) {
+ agg_unlock_node(par); /* maybe free */
+ par = NULL;
+ }
+
+ if (par) {
+ ++parent_already_refcounted;
+ }
+ }
+
+ /*
+ * Create 0/0 node if it isn't there
+ */
+ if (!par) {
+ struct prefix pfx_default;
+ const struct prefix *p = agg_node_get_prefix(original_vpn_node);
+
+ memset(&pfx_default, 0, sizeof(pfx_default));
+ pfx_default.family = p->family;
+
+ /* creates default node if none exists */
+ par = agg_node_get(agg_get_table(original_vpn_node),
+ &pfx_default);
+ ++parent_already_refcounted;
+ }
+
+ /*
+ * 3. Move each of the monitors found at original_node to Node P.
+ * These are "Moved Monitors."
+ *
+ */
+
+ /*
+ * Attach at end so that the list pointer we return points
+ * only to the moved routes
+ */
+ for (m = RFAPI_MONITOR_VPN(par), mlast = NULL; m;
+ mlast = m, m = m->next)
+ ;
+
+ if (mlast) {
+ moved = mlast->next = RFAPI_MONITOR_VPN(original_vpn_node);
+ } else {
+ moved = RFAPI_MONITOR_VPN_W_ALLOC(par) =
+ RFAPI_MONITOR_VPN(original_vpn_node);
+ }
+ if (RFAPI_MONITOR_VPN(
+ original_vpn_node)) /* check agg, so not allocated */
+ RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node) = NULL;
+
+ /*
+ * update the node pointers on the monitors
+ */
+ for (m = moved; m; m = m->next) {
+ ++movecount;
+ m->node = par;
+ }
+
+ RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN,
+ parent_already_refcounted - movecount);
+ while (movecount > parent_already_refcounted) {
+ agg_lock_node(par);
+ ++parent_already_refcounted;
+ }
+ while (movecount < parent_already_refcounted) {
+ /* unlikely, but code defensively */
+ agg_unlock_node(par);
+ --parent_already_refcounted;
+ }
+ RFAPI_CHECK_REFCOUNT(original_vpn_node, SAFI_MPLS_VPN,
+ movecount + lockoffset);
+ while (movecount--) {
+ agg_unlock_node(original_vpn_node);
+ }
+
+#ifdef DEBUG_MONITOR_MOVE_SHORTER
+ {
+ vnc_zlog_debug_verbose("%s: moved to node pfx=%pFX", __func__,
+ &par->p);
+ }
+#endif
+
+
+ return moved;
+}
+
+/*
+ * Implement MONITOR_MOVE_LONGER(new_node) from
+ * RFAPI-Import-Event-Handling.txt
+ */
+static void rfapiMonitorMoveLonger(struct agg_node *new_vpn_node)
+{
+ struct rfapi_monitor_vpn *monitor;
+ struct rfapi_monitor_vpn *mlast;
+ struct bgp_path_info *bpi;
+ struct agg_node *par;
+ const struct prefix *new_vpn_node_p = agg_node_get_prefix(new_vpn_node);
+
+ RFAPI_CHECK_REFCOUNT(new_vpn_node, SAFI_MPLS_VPN, 0);
+
+ /*
+ * Make sure we have at least one valid route at the new node
+ */
+ for (bpi = new_vpn_node->info; bpi; bpi = bpi->next) {
+ struct prefix pfx;
+ if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx))
+ break;
+ }
+
+ if (!bpi) {
+ vnc_zlog_debug_verbose(
+ "%s: no valid routes at node %p, so not attempting moves",
+ __func__, new_vpn_node);
+ return;
+ }
+
+ /*
+ * Find first parent node that has monitors
+ */
+ for (par = agg_node_parent(new_vpn_node); par;
+ par = agg_node_parent(par)) {
+ if (RFAPI_MONITOR_VPN(par))
+ break;
+ }
+
+ if (!par) {
+ vnc_zlog_debug_verbose(
+ "%s: no parent nodes with monitors, done", __func__);
+ return;
+ }
+
+ /*
+ * Check each of these monitors to see of their longest-match
+ * is now the updated node. Move any such monitors to the more-
+ * specific updated node
+ */
+ for (mlast = NULL, monitor = RFAPI_MONITOR_VPN(par); monitor;) {
+ /*
+ * If new longest match for monitor prefix is the new
+ * route's prefix, move monitor to new route's prefix
+ */
+ if (prefix_match(new_vpn_node_p, &monitor->p)) {
+ /* detach */
+ if (mlast) {
+ mlast->next = monitor->next;
+ } else {
+ RFAPI_MONITOR_VPN_W_ALLOC(par) = monitor->next;
+ }
+
+
+ /* attach */
+ monitor->next = RFAPI_MONITOR_VPN(new_vpn_node);
+ RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node) = monitor;
+ monitor->node = new_vpn_node;
+
+ agg_lock_node(new_vpn_node); /* incr refcount */
+
+ monitor = mlast ? mlast->next : RFAPI_MONITOR_VPN(par);
+
+ RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, 1);
+ /* decr refcount after we're done with par as this might
+ * free it */
+ agg_unlock_node(par);
+
+ continue;
+ }
+ mlast = monitor;
+ monitor = monitor->next;
+ }
+
+ RFAPI_CHECK_REFCOUNT(new_vpn_node, SAFI_MPLS_VPN, 0);
+}
+
+
+static void rfapiBgpInfoChainFree(struct bgp_path_info *bpi)
+{
+ struct bgp_path_info *next;
+
+ while (bpi) {
+
+ /*
+ * If there is a timer waiting to delete this bpi, cancel
+ * the timer and delete immediately
+ */
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
+ && bpi->extra->vnc.import.timer) {
+ struct rfapi_withdraw *wcb =
+ EVENT_ARG(bpi->extra->vnc.import.timer);
+
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ EVENT_OFF(bpi->extra->vnc.import.timer);
+ }
+
+ next = bpi->next;
+ bpi->next = NULL;
+ rfapiBgpInfoFree(bpi);
+ bpi = next;
+ }
+}
+
+static void rfapiImportTableFlush(struct rfapi_import_table *it)
+{
+ afi_t afi;
+
+ /*
+ * Free ecommunity
+ */
+ ecommunity_free(&it->rt_import_list);
+ it->rt_import_list = NULL;
+
+ for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
+
+ struct agg_node *rn;
+ struct agg_table *at;
+
+ at = it->imported_vpn[afi];
+ if (at) {
+ for (rn = agg_route_top(at); rn;
+ rn = agg_route_next(rn)) {
+ /*
+ * Each route_node has:
+ * aggregate: points to rfapi_it_extra with
+ * monitor chain(s)
+ * info: points to chain of bgp_path_info
+ */
+ /* free bgp_path_info and its children */
+ rfapiBgpInfoChainFree(rn->info);
+ rn->info = NULL;
+
+ rfapiMonitorExtraFlush(SAFI_MPLS_VPN, rn);
+ }
+ agg_table_finish(at);
+ }
+
+ if (at) {
+ at = it->imported_encap[afi];
+ for (rn = agg_route_top(at); rn;
+ rn = agg_route_next(rn)) {
+ /* free bgp_path_info and its children */
+ rfapiBgpInfoChainFree(rn->info);
+ rn->info = NULL;
+
+ rfapiMonitorExtraFlush(SAFI_ENCAP, rn);
+ }
+ agg_table_finish(at);
+ }
+ }
+ if (it->monitor_exterior_orphans) {
+ skiplist_free(it->monitor_exterior_orphans);
+ }
+}
+
+void rfapiImportTableRefDelByIt(struct bgp *bgp,
+ struct rfapi_import_table *it_target)
+{
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+ struct rfapi_import_table *prev = NULL;
+
+ assert(it_target);
+
+ h = bgp->rfapi;
+ assert(h);
+
+ for (it = h->imports; it; prev = it, it = it->next) {
+ if (it == it_target)
+ break;
+ }
+
+ assert(it);
+ assert(it->refcount);
+
+ it->refcount -= 1;
+
+ if (!it->refcount) {
+ if (prev) {
+ prev->next = it->next;
+ } else {
+ h->imports = it->next;
+ }
+ rfapiImportTableFlush(it);
+ XFREE(MTYPE_RFAPI_IMPORTTABLE, it);
+ }
+}
+
+#ifdef RFAPI_REQUIRE_ENCAP_BEEC
+/*
+ * Look for magic BGP Encapsulation Extended Community value
+ * Format in RFC 5512 Sect. 4.5
+ */
+static int rfapiEcommunitiesMatchBeec(struct ecommunity *ecom,
+ bgp_encap_types type)
+{
+ int i;
+
+ if (!ecom)
+ return 0;
+
+ for (i = 0; i < (ecom->size * ECOMMUNITY_SIZE); i += ECOMMUNITY_SIZE) {
+
+ uint8_t *ep;
+
+ ep = ecom->val + i;
+
+ if (ep[0] == ECOMMUNITY_ENCODE_OPAQUE
+ && ep[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
+ && ep[6] == ((type && 0xff00) >> 8)
+ && ep[7] == (type & 0xff)) {
+
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
+int rfapiEcommunitiesIntersect(struct ecommunity *e1, struct ecommunity *e2)
+{
+ uint32_t i, j;
+
+ if (!e1 || !e2)
+ return 0;
+
+ {
+ char *s1, *s2;
+ s1 = ecommunity_ecom2str(e1, ECOMMUNITY_FORMAT_DISPLAY, 0);
+ s2 = ecommunity_ecom2str(e2, ECOMMUNITY_FORMAT_DISPLAY, 0);
+ vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__, s1, s2);
+ XFREE(MTYPE_ECOMMUNITY_STR, s1);
+ XFREE(MTYPE_ECOMMUNITY_STR, s2);
+ }
+
+ for (i = 0; i < e1->size; ++i) {
+ for (j = 0; j < e2->size; ++j) {
+ if (!memcmp(e1->val + (i * ECOMMUNITY_SIZE),
+ e2->val + (j * ECOMMUNITY_SIZE),
+ ECOMMUNITY_SIZE)) {
+
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+int rfapiEcommunityGetLNI(struct ecommunity *ecom, uint32_t *lni)
+{
+ if (ecom) {
+ uint32_t i;
+
+ for (i = 0; i < ecom->size; ++i) {
+ uint8_t *p = ecom->val + (i * ECOMMUNITY_SIZE);
+
+ if ((*(p + 0) == 0x00) && (*(p + 1) == 0x02)) {
+
+ *lni = (*(p + 5) << 16) | (*(p + 6) << 8)
+ | (*(p + 7));
+ return 0;
+ }
+ }
+ }
+ return ENOENT;
+}
+
+int rfapiEcommunityGetEthernetTag(struct ecommunity *ecom, uint16_t *tag_id)
+{
+ struct bgp *bgp = bgp_get_default();
+ *tag_id = 0; /* default to untagged */
+ if (ecom) {
+ uint32_t i;
+
+ for (i = 0; i < ecom->size; ++i) {
+ as_t as = 0;
+ int encode = 0;
+ const uint8_t *p = ecom->val + (i * ECOMMUNITY_SIZE);
+
+ /* High-order octet of type. */
+ encode = *p++;
+
+ if (*p++ == ECOMMUNITY_ROUTE_TARGET) {
+ if (encode == ECOMMUNITY_ENCODE_AS4) {
+ p = ptr_get_be32(p, &as);
+ } else if (encode == ECOMMUNITY_ENCODE_AS) {
+ as = (*p++ << 8);
+ as |= (*p++);
+ p += 2; /* skip next two, tag/vid
+ always in lowest bytes */
+ }
+ if (as == bgp->as) {
+ *tag_id = *p++ << 8;
+ *tag_id |= (*p++);
+ return 0;
+ }
+ }
+ }
+ }
+ return ENOENT;
+}
+
+static int rfapiVpnBiNhEqualsPt(struct bgp_path_info *bpi,
+ struct rfapi_ip_addr *hpt)
+{
+ uint8_t family;
+
+ if (!hpt || !bpi)
+ return 0;
+
+ family = BGP_MP_NEXTHOP_FAMILY(bpi->attr->mp_nexthop_len);
+
+ if (hpt->addr_family != family)
+ return 0;
+
+ switch (family) {
+ case AF_INET:
+ if (bpi->attr->mp_nexthop_global_in.s_addr
+ != hpt->addr.v4.s_addr)
+ return 0;
+ break;
+
+ case AF_INET6:
+ if (IPV6_ADDR_CMP(&bpi->attr->mp_nexthop_global, &hpt->addr.v6))
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/*
+ * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses
+ */
+static int rfapiVpnBiSamePtUn(struct bgp_path_info *bpi1,
+ struct bgp_path_info *bpi2)
+{
+ struct prefix pfx_un1;
+ struct prefix pfx_un2;
+
+ if (!bpi1 || !bpi2)
+ return 0;
+
+ /*
+ * VN address comparisons
+ */
+
+ if (BGP_MP_NEXTHOP_FAMILY(bpi1->attr->mp_nexthop_len)
+ != BGP_MP_NEXTHOP_FAMILY(bpi2->attr->mp_nexthop_len)) {
+ return 0;
+ }
+
+ switch (BGP_MP_NEXTHOP_FAMILY(bpi1->attr->mp_nexthop_len)) {
+ case AF_INET:
+ if (bpi1->attr->mp_nexthop_global_in.s_addr
+ != bpi2->attr->mp_nexthop_global_in.s_addr)
+ return 0;
+ break;
+
+ case AF_INET6:
+ if (IPV6_ADDR_CMP(&bpi1->attr->mp_nexthop_global,
+ &bpi2->attr->mp_nexthop_global))
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ memset(&pfx_un1, 0, sizeof(pfx_un1));
+ memset(&pfx_un2, 0, sizeof(pfx_un2));
+
+ /*
+ * UN address comparisons
+ */
+ if (rfapiGetVncTunnelUnAddr(bpi1->attr, &pfx_un1)) {
+ if (bpi1->extra) {
+ pfx_un1.family = bpi1->extra->vnc.import.un_family;
+ switch (bpi1->extra->vnc.import.un_family) {
+ case AF_INET:
+ pfx_un1.u.prefix4 =
+ bpi1->extra->vnc.import.un.addr4;
+ break;
+ case AF_INET6:
+ pfx_un1.u.prefix6 =
+ bpi1->extra->vnc.import.un.addr6;
+ break;
+ default:
+ pfx_un1.family = AF_UNSPEC;
+ break;
+ }
+ }
+ }
+
+ if (rfapiGetVncTunnelUnAddr(bpi2->attr, &pfx_un2)) {
+ if (bpi2->extra) {
+ pfx_un2.family = bpi2->extra->vnc.import.un_family;
+ switch (bpi2->extra->vnc.import.un_family) {
+ case AF_INET:
+ pfx_un2.u.prefix4 =
+ bpi2->extra->vnc.import.un.addr4;
+ break;
+ case AF_INET6:
+ pfx_un2.u.prefix6 =
+ bpi2->extra->vnc.import.un.addr6;
+ break;
+ default:
+ pfx_un2.family = AF_UNSPEC;
+ break;
+ }
+ }
+ }
+
+ if (pfx_un1.family == AF_UNSPEC || pfx_un2.family == AF_UNSPEC)
+ return 0;
+
+ if (pfx_un1.family != pfx_un2.family)
+ return 0;
+
+ switch (pfx_un1.family) {
+ case AF_INET:
+ if (!IPV4_ADDR_SAME(&pfx_un1.u.prefix4, &pfx_un2.u.prefix4))
+ return 0;
+ break;
+ case AF_INET6:
+ if (!IPV6_ADDR_SAME(&pfx_un1.u.prefix6, &pfx_un2.u.prefix6))
+ return 0;
+ break;
+ }
+
+
+ return 1;
+}
+
+uint8_t rfapiRfpCost(struct attr *attr)
+{
+ if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF)) {
+ if (attr->local_pref > 255) {
+ return 0;
+ }
+ return 255 - attr->local_pref;
+ }
+
+ return 255;
+}
+
+/*------------------------------------------
+ * rfapi_extract_l2o
+ *
+ * Find Layer 2 options in an option chain
+ *
+ * input:
+ * pHop option chain
+ *
+ * output:
+ * l2o layer 2 options extracted
+ *
+ * return value:
+ * 0 OK
+ * 1 no options found
+ *
+ --------------------------------------------*/
+int rfapi_extract_l2o(
+ struct bgp_tea_options *pHop, /* chain of options */
+ struct rfapi_l2address_option *l2o) /* return extracted value */
+{
+ struct bgp_tea_options *p;
+
+ for (p = pHop; p; p = p->next) {
+ if ((p->type == RFAPI_VN_OPTION_TYPE_L2ADDR)
+ && (p->length >= 8)) {
+
+ char *v = p->value;
+
+ memcpy(&l2o->macaddr, v, 6);
+
+ l2o->label = ((v[6] << 12) & 0xff000)
+ + ((v[7] << 4) & 0xff0)
+ + ((v[8] >> 4) & 0xf);
+
+ l2o->local_nve_id = (uint8_t)v[10];
+
+ l2o->logical_net_id =
+ (v[11] << 16) + (v[12] << 8) + (v[13] << 0);
+
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static struct rfapi_next_hop_entry *
+rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix *rprefix,
+ struct bgp_path_info *bpi, /* route to encode */
+ uint32_t lifetime, /* use this in nhe */
+ struct agg_node *rn) /* req for L2 eth addr */
+{
+ struct rfapi_next_hop_entry *new;
+ int have_vnc_tunnel_un = 0;
+ const struct prefix *p = agg_node_get_prefix(rn);
+
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose("%s: entry, bpi %p, rn %p", __func__, bpi, rn);
+#endif
+
+ new = XCALLOC(MTYPE_RFAPI_NEXTHOP, sizeof(struct rfapi_next_hop_entry));
+
+ new->prefix = *rprefix;
+
+ if (bpi->extra
+ && decode_rd_type(bpi->extra->vnc.import.rd.val)
+ == RD_TYPE_VNC_ETH) {
+ /* ethernet */
+
+ struct rfapi_vn_option *vo;
+
+ vo = XCALLOC(MTYPE_RFAPI_VN_OPTION,
+ sizeof(struct rfapi_vn_option));
+
+ vo->type = RFAPI_VN_OPTION_TYPE_L2ADDR;
+
+ memcpy(&vo->v.l2addr.macaddr, &p->u.prefix_eth.octet, ETH_ALEN);
+ /* only low 3 bytes of this are significant */
+ (void)rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(bpi->attr),
+ &vo->v.l2addr.logical_net_id);
+ (void)rfapiEcommunityGetEthernetTag(
+ bgp_attr_get_ecommunity(bpi->attr),
+ &vo->v.l2addr.tag_id);
+
+ /* local_nve_id comes from lower byte of RD type */
+ vo->v.l2addr.local_nve_id = bpi->extra->vnc.import.rd.val[1];
+
+ /* label comes from MP_REACH_NLRI label */
+ vo->v.l2addr.label = decode_label(&bpi->extra->label[0]);
+
+ new->vn_options = vo;
+
+ /*
+ * If there is an auxiliary prefix (i.e., host IP address),
+ * use it as the nexthop prefix instead of the query prefix
+ */
+ if (bpi->extra->vnc.import.aux_prefix.family) {
+ rfapiQprefix2Rprefix(&bpi->extra->vnc.import.aux_prefix,
+ &new->prefix);
+ }
+ }
+
+ bgp_encap_types tun_type = BGP_ENCAP_TYPE_MPLS; /*Default*/
+ new->prefix.cost = rfapiRfpCost(bpi->attr);
+
+ struct bgp_attr_encap_subtlv *pEncap;
+
+ switch (BGP_MP_NEXTHOP_FAMILY(bpi->attr->mp_nexthop_len)) {
+ case AF_INET:
+ new->vn_address.addr_family = AF_INET;
+ new->vn_address.addr.v4 = bpi->attr->mp_nexthop_global_in;
+ break;
+
+ case AF_INET6:
+ new->vn_address.addr_family = AF_INET6;
+ new->vn_address.addr.v6 = bpi->attr->mp_nexthop_global;
+ break;
+
+ default:
+ zlog_warn("%s: invalid vpn nexthop length: %d", __func__,
+ bpi->attr->mp_nexthop_len);
+ rfapi_free_next_hop_list(new);
+ return NULL;
+ }
+
+ for (pEncap = bgp_attr_get_vnc_subtlvs(bpi->attr); pEncap;
+ pEncap = pEncap->next) {
+ switch (pEncap->type) {
+ case BGP_VNC_SUBTLV_TYPE_LIFETIME:
+ /* use configured lifetime, not attr lifetime */
+ break;
+
+ default:
+ zlog_warn("%s: unknown VNC option type %d", __func__,
+ pEncap->type);
+
+ break;
+ }
+ }
+
+ bgp_attr_extcom_tunnel_type(bpi->attr, &tun_type);
+ if (tun_type == BGP_ENCAP_TYPE_MPLS) {
+ struct prefix p;
+ /* MPLS carries UN address in next hop */
+ rfapiNexthop2Prefix(bpi->attr, &p);
+ if (p.family != AF_UNSPEC) {
+ rfapiQprefix2Raddr(&p, &new->un_address);
+ have_vnc_tunnel_un = 1;
+ }
+ }
+
+ for (pEncap = bpi->attr->encap_subtlvs; pEncap; pEncap = pEncap->next) {
+ switch (pEncap->type) {
+ case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT:
+ /*
+ * Overrides ENCAP UN address, if any
+ */
+ switch (pEncap->length) {
+
+ case 8:
+ new->un_address.addr_family = AF_INET;
+ memcpy(&new->un_address.addr.v4, pEncap->value,
+ 4);
+ have_vnc_tunnel_un = 1;
+ break;
+
+ case 20:
+ new->un_address.addr_family = AF_INET6;
+ memcpy(&new->un_address.addr.v6, pEncap->value,
+ 16);
+ have_vnc_tunnel_un = 1;
+ break;
+
+ default:
+ zlog_warn(
+ "%s: invalid tunnel subtlv UN addr length (%d) for bpi %p",
+ __func__, pEncap->length, bpi);
+ }
+ break;
+
+ default:
+ zlog_warn("%s: unknown Encap Attribute option type %d",
+ __func__, pEncap->type);
+ break;
+ }
+ }
+
+ new->un_options = rfapi_encap_tlv_to_un_option(bpi->attr);
+
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d", __func__,
+ __LINE__, have_vnc_tunnel_un);
+#endif
+
+ if (!have_vnc_tunnel_un && bpi->extra) {
+ /*
+ * use cached UN address from ENCAP route
+ */
+ new->un_address.addr_family = bpi->extra->vnc.import.un_family;
+ switch (new->un_address.addr_family) {
+ case AF_INET:
+ new->un_address.addr.v4 =
+ bpi->extra->vnc.import.un.addr4;
+ break;
+ case AF_INET6:
+ new->un_address.addr.v6 =
+ bpi->extra->vnc.import.un.addr6;
+ break;
+ default:
+ zlog_warn("%s: invalid UN addr family (%d) for bpi %p",
+ __func__, new->un_address.addr_family, bpi);
+ rfapi_free_next_hop_list(new);
+ return NULL;
+ }
+ }
+
+ new->lifetime = lifetime;
+ return new;
+}
+
+int rfapiHasNonRemovedRoutes(struct agg_node *rn)
+{
+ struct bgp_path_info *bpi;
+
+ for (bpi = rn->info; bpi; bpi = bpi->next) {
+ struct prefix pfx;
+
+ if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
+ && (bpi->extra && !rfapiGetUnAddrOfVpnBi(bpi, &pfx))) {
+
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#ifdef DEBUG_IT_NODES
+/*
+ * DEBUG FUNCTION
+ */
+void rfapiDumpNode(struct agg_node *rn)
+{
+ struct bgp_path_info *bpi;
+
+ vnc_zlog_debug_verbose("%s: rn=%p", __func__, rn);
+ for (bpi = rn->info; bpi; bpi = bpi->next) {
+ struct prefix pfx;
+ int ctrc = rfapiGetUnAddrOfVpnBi(bpi, &pfx);
+ int nr;
+
+ if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
+ && (bpi->extra && !ctrc)) {
+
+ nr = 1;
+ } else {
+ nr = 0;
+ }
+
+ vnc_zlog_debug_verbose(
+ " bpi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bpi,
+ nr, bpi->flags, bpi->extra, ctrc);
+ }
+}
+#endif
+
+static int rfapiNhlAddNodeRoutes(
+ struct agg_node *rn, /* in */
+ struct rfapi_ip_prefix *rprefix, /* in */
+ uint32_t lifetime, /* in */
+ int removed, /* in */
+ struct rfapi_next_hop_entry **head, /* in/out */
+ struct rfapi_next_hop_entry **tail, /* in/out */
+ struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
+ struct agg_node *rfd_rib_node, /* preload this NVE rib node */
+ struct prefix *pfx_target_original) /* query target */
+{
+ struct bgp_path_info *bpi;
+ struct rfapi_next_hop_entry *new;
+ struct prefix pfx_un;
+ struct skiplist *seen_nexthops;
+ int count = 0;
+ const struct prefix *p = agg_node_get_prefix(rn);
+ int is_l2 = (p->family == AF_ETHERNET);
+
+ if (rfd_rib_node) {
+ struct agg_table *atable = agg_get_table(rfd_rib_node);
+ struct rfapi_descriptor *rfd;
+
+ if (atable) {
+ rfd = agg_get_table_info(atable);
+
+ if (rfapiRibFTDFilterRecentPrefix(rfd, rn,
+ pfx_target_original))
+ return 0;
+ }
+ }
+
+ seen_nexthops =
+ skiplist_new(0, vnc_prefix_cmp, prefix_free_lists);
+
+ for (bpi = rn->info; bpi; bpi = bpi->next) {
+
+ struct prefix pfx_vn;
+ struct prefix *newpfx;
+
+ if (removed && !CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
+#ifdef DEBUG_RETURNED_NHL
+ vnc_zlog_debug_verbose(
+ "%s: want holddown, this route not holddown, skip",
+ __func__);
+#endif
+ continue;
+ }
+ if (!removed && CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
+ continue;
+ }
+
+ if (!bpi->extra) {
+ continue;
+ }
+
+ /*
+ * Check for excluded VN address
+ */
+ if (rfapiVpnBiNhEqualsPt(bpi, exclude_vnaddr))
+ continue;
+
+ /*
+ * Check for VN address (nexthop) copied already
+ */
+ if (is_l2) {
+ /* L2 routes: semantic nexthop in aux_prefix; VN addr
+ * ain't it */
+ pfx_vn = bpi->extra->vnc.import.aux_prefix;
+ } else {
+ rfapiNexthop2Prefix(bpi->attr, &pfx_vn);
+ }
+ if (!skiplist_search(seen_nexthops, &pfx_vn, NULL)) {
+#ifdef DEBUG_RETURNED_NHL
+ vnc_zlog_debug_verbose(
+ "%s: already put VN/nexthop %pFX, skip",
+ __func__, &pfx_vn);
+#endif
+ continue;
+ }
+
+ if (rfapiGetUnAddrOfVpnBi(bpi, &pfx_un)) {
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose(
+ "%s: failed to get UN address of this VPN bpi",
+ __func__);
+#endif
+ continue;
+ }
+
+ newpfx = prefix_new();
+ *newpfx = pfx_vn;
+ skiplist_insert(seen_nexthops, newpfx, newpfx);
+
+ new = rfapiRouteInfo2NextHopEntry(rprefix, bpi, lifetime, rn);
+ if (new) {
+ if (rfapiRibPreloadBi(rfd_rib_node, &pfx_vn, &pfx_un,
+ lifetime, bpi)) {
+ /* duplicate filtered by RIB */
+ rfapi_free_next_hop_list(new);
+ new = NULL;
+ }
+ }
+
+ if (new) {
+ if (*tail) {
+ (*tail)->next = new;
+ } else {
+ *head = new;
+ }
+ *tail = new;
+ ++count;
+ }
+ }
+
+ skiplist_free(seen_nexthops);
+
+ return count;
+}
+
+
+/*
+ * Breadth-first
+ *
+ * omit_node is meant for the situation where we are adding a subtree
+ * of a parent of some original requested node. The response already
+ * contains the original requested node, and we don't want to duplicate
+ * its routes in the list, so we skip it if the right or left node
+ * matches (of course, we still travel down its child subtrees).
+ */
+static int rfapiNhlAddSubtree(
+ struct agg_node *rn, /* in */
+ uint32_t lifetime, /* in */
+ struct rfapi_next_hop_entry **head, /* in/out */
+ struct rfapi_next_hop_entry **tail, /* in/out */
+ struct agg_node *omit_node, /* in */
+ struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
+ struct agg_table *rfd_rib_table, /* preload here */
+ struct prefix *pfx_target_original) /* query target */
+{
+ struct rfapi_ip_prefix rprefix;
+ int rcount = 0;
+
+ /* FIXME: need to find a better way here to work without sticking our
+ * hands in node->link */
+ if (agg_node_left(rn) && agg_node_left(rn) != omit_node) {
+ if (agg_node_left(rn)->info) {
+ const struct prefix *p =
+ agg_node_get_prefix(agg_node_left(rn));
+ int count = 0;
+ struct agg_node *rib_rn = NULL;
+
+ rfapiQprefix2Rprefix(p, &rprefix);
+ if (rfd_rib_table)
+ rib_rn = agg_node_get(rfd_rib_table, p);
+
+ count = rfapiNhlAddNodeRoutes(
+ agg_node_left(rn), &rprefix, lifetime, 0, head,
+ tail, exclude_vnaddr, rib_rn,
+ pfx_target_original);
+ if (!count) {
+ count = rfapiNhlAddNodeRoutes(
+ agg_node_left(rn), &rprefix, lifetime,
+ 1, head, tail, exclude_vnaddr, rib_rn,
+ pfx_target_original);
+ }
+ rcount += count;
+ if (rib_rn)
+ agg_unlock_node(rib_rn);
+ }
+ }
+
+ if (agg_node_right(rn) && agg_node_right(rn) != omit_node) {
+ if (agg_node_right(rn)->info) {
+ const struct prefix *p =
+ agg_node_get_prefix(agg_node_right(rn));
+ int count = 0;
+ struct agg_node *rib_rn = NULL;
+
+ rfapiQprefix2Rprefix(p, &rprefix);
+ if (rfd_rib_table)
+ rib_rn = agg_node_get(rfd_rib_table, p);
+
+ count = rfapiNhlAddNodeRoutes(
+ agg_node_right(rn), &rprefix, lifetime, 0, head,
+ tail, exclude_vnaddr, rib_rn,
+ pfx_target_original);
+ if (!count) {
+ count = rfapiNhlAddNodeRoutes(
+ agg_node_right(rn), &rprefix, lifetime,
+ 1, head, tail, exclude_vnaddr, rib_rn,
+ pfx_target_original);
+ }
+ rcount += count;
+ if (rib_rn)
+ agg_unlock_node(rib_rn);
+ }
+ }
+
+ if (agg_node_left(rn)) {
+ rcount += rfapiNhlAddSubtree(
+ agg_node_left(rn), lifetime, head, tail, omit_node,
+ exclude_vnaddr, rfd_rib_table, pfx_target_original);
+ }
+ if (agg_node_right(rn)) {
+ rcount += rfapiNhlAddSubtree(
+ agg_node_right(rn), lifetime, head, tail, omit_node,
+ exclude_vnaddr, rfd_rib_table, pfx_target_original);
+ }
+
+ return rcount;
+}
+
+/*
+ * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt
+ *
+ * Construct an rfapi nexthop list based on the routes attached to
+ * the specified node.
+ *
+ * If there are any routes that do NOT have BGP_PATH_REMOVED set,
+ * return those only. If there are ONLY routes with BGP_PATH_REMOVED,
+ * then return those, and also include all the non-removed routes from the
+ * next less-specific node (i.e., this node's parent) at the end.
+ */
+struct rfapi_next_hop_entry *rfapiRouteNode2NextHopList(
+ struct agg_node *rn, uint32_t lifetime, /* put into nexthop entries */
+ struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
+ struct agg_table *rfd_rib_table, /* preload here */
+ struct prefix *pfx_target_original) /* query target */
+{
+ struct rfapi_ip_prefix rprefix;
+ struct rfapi_next_hop_entry *answer = NULL;
+ struct rfapi_next_hop_entry *last = NULL;
+ struct agg_node *parent;
+ const struct prefix *p = agg_node_get_prefix(rn);
+ int count = 0;
+ struct agg_node *rib_rn;
+
+#ifdef DEBUG_RETURNED_NHL
+ vnc_zlog_debug_verbose("%s: called with node pfx=%rRN", __func__, rn);
+ rfapiDebugBacktrace();
+#endif
+
+ rfapiQprefix2Rprefix(p, &rprefix);
+
+ rib_rn = rfd_rib_table ? agg_node_get(rfd_rib_table, p) : NULL;
+
+ /*
+ * Add non-withdrawn routes at this node
+ */
+ count = rfapiNhlAddNodeRoutes(rn, &rprefix, lifetime, 0, &answer, &last,
+ exclude_vnaddr, rib_rn,
+ pfx_target_original);
+
+ /*
+ * If the list has at least one entry, it's finished
+ */
+ if (count) {
+ count += rfapiNhlAddSubtree(rn, lifetime, &answer, &last, NULL,
+ exclude_vnaddr, rfd_rib_table,
+ pfx_target_original);
+ vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__,
+ count, answer);
+#ifdef DEBUG_RETURNED_NHL
+ rfapiPrintNhl(NULL, answer);
+#endif
+ if (rib_rn)
+ agg_unlock_node(rib_rn);
+ return answer;
+ }
+
+ /*
+ * Add withdrawn routes at this node
+ */
+ count = rfapiNhlAddNodeRoutes(rn, &rprefix, lifetime, 1, &answer, &last,
+ exclude_vnaddr, rib_rn,
+ pfx_target_original);
+ if (rib_rn)
+ agg_unlock_node(rib_rn);
+
+ // rfapiPrintNhl(NULL, answer);
+
+ /*
+ * walk up the tree until we find a node with non-deleted
+ * routes, then add them
+ */
+ for (parent = agg_node_parent(rn); parent;
+ parent = agg_node_parent(parent)) {
+ if (rfapiHasNonRemovedRoutes(parent)) {
+ break;
+ }
+ }
+
+ /*
+ * Add non-withdrawn routes from less-specific prefix
+ */
+ if (parent) {
+ const struct prefix *p = agg_node_get_prefix(parent);
+
+ rib_rn = rfd_rib_table ? agg_node_get(rfd_rib_table, p) : NULL;
+ rfapiQprefix2Rprefix(p, &rprefix);
+ count += rfapiNhlAddNodeRoutes(parent, &rprefix, lifetime, 0,
+ &answer, &last, exclude_vnaddr,
+ rib_rn, pfx_target_original);
+ count += rfapiNhlAddSubtree(parent, lifetime, &answer, &last,
+ rn, exclude_vnaddr, rfd_rib_table,
+ pfx_target_original);
+ if (rib_rn)
+ agg_unlock_node(rib_rn);
+ } else {
+ /*
+ * There is no parent with non-removed routes. Still need to
+ * add subtree of original node if it contributed routes to the
+ * answer.
+ */
+ if (count)
+ count += rfapiNhlAddSubtree(rn, lifetime, &answer,
+ &last, rn, exclude_vnaddr,
+ rfd_rib_table,
+ pfx_target_original);
+ }
+
+ vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__, count,
+ answer);
+#ifdef DEBUG_RETURNED_NHL
+ rfapiPrintNhl(NULL, answer);
+#endif
+ return answer;
+}
+
+/*
+ * Construct nexthop list of all routes in table
+ */
+struct rfapi_next_hop_entry *rfapiRouteTable2NextHopList(
+ struct agg_table *rt, uint32_t lifetime, /* put into nexthop entries */
+ struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
+ struct agg_table *rfd_rib_table, /* preload this NVE rib table */
+ struct prefix *pfx_target_original) /* query target */
+{
+ struct agg_node *rn;
+ struct rfapi_next_hop_entry *biglist = NULL;
+ struct rfapi_next_hop_entry *nhl;
+ struct rfapi_next_hop_entry *tail = NULL;
+ int count = 0;
+
+ for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
+
+ nhl = rfapiRouteNode2NextHopList(rn, lifetime, exclude_vnaddr,
+ rfd_rib_table,
+ pfx_target_original);
+ if (!tail) {
+ tail = biglist = nhl;
+ if (tail)
+ count = 1;
+ } else {
+ tail->next = nhl;
+ }
+ if (tail) {
+ while (tail->next) {
+ ++count;
+ tail = tail->next;
+ }
+ }
+ }
+
+ vnc_zlog_debug_verbose("%s: returning %d routes", __func__, count);
+ return biglist;
+}
+
+struct rfapi_next_hop_entry *rfapiEthRouteNode2NextHopList(
+ struct agg_node *rn, struct rfapi_ip_prefix *rprefix,
+ uint32_t lifetime, /* put into nexthop entries */
+ struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
+ struct agg_table *rfd_rib_table, /* preload NVE rib table */
+ struct prefix *pfx_target_original) /* query target */
+{
+ int count = 0;
+ struct rfapi_next_hop_entry *answer = NULL;
+ struct rfapi_next_hop_entry *last = NULL;
+ struct agg_node *rib_rn;
+
+ rib_rn = rfd_rib_table
+ ? agg_node_get(rfd_rib_table, agg_node_get_prefix(rn))
+ : NULL;
+
+ count = rfapiNhlAddNodeRoutes(rn, rprefix, lifetime, 0, &answer, &last,
+ NULL, rib_rn, pfx_target_original);
+
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__,
+ rn, count);
+#endif
+
+ if (!count) {
+ count = rfapiNhlAddNodeRoutes(rn, rprefix, lifetime, 1, &answer,
+ &last, exclude_vnaddr, rib_rn,
+ pfx_target_original);
+ vnc_zlog_debug_verbose("%s: node %p: %d holddown routes",
+ __func__, rn, count);
+ }
+
+ if (rib_rn)
+ agg_unlock_node(rib_rn);
+
+#ifdef DEBUG_RETURNED_NHL
+ rfapiPrintNhl(NULL, answer);
+#endif
+
+ return answer;
+}
+
+
+/*
+ * Construct nexthop list of all routes in table
+ */
+struct rfapi_next_hop_entry *rfapiEthRouteTable2NextHopList(
+ uint32_t logical_net_id, struct rfapi_ip_prefix *rprefix,
+ uint32_t lifetime, /* put into nexthop entries */
+ struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
+ struct agg_table *rfd_rib_table, /* preload NVE rib node */
+ struct prefix *pfx_target_original) /* query target */
+{
+ struct rfapi_import_table *it;
+ struct bgp *bgp = bgp_get_default();
+ struct agg_table *rt;
+ struct agg_node *rn;
+ struct rfapi_next_hop_entry *biglist = NULL;
+ struct rfapi_next_hop_entry *nhl;
+ struct rfapi_next_hop_entry *tail = NULL;
+ int count = 0;
+
+
+ it = rfapiMacImportTableGet(bgp, logical_net_id);
+ rt = it->imported_vpn[AFI_L2VPN];
+
+ for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
+
+ nhl = rfapiEthRouteNode2NextHopList(
+ rn, rprefix, lifetime, exclude_vnaddr, rfd_rib_table,
+ pfx_target_original);
+ if (!tail) {
+ tail = biglist = nhl;
+ if (tail)
+ count = 1;
+ } else {
+ tail->next = nhl;
+ }
+ if (tail) {
+ while (tail->next) {
+ ++count;
+ tail = tail->next;
+ }
+ }
+ }
+
+ vnc_zlog_debug_verbose("%s: returning %d routes", __func__, count);
+ return biglist;
+}
+
+/*
+ * Insert a new bpi to the imported route table node,
+ * keeping the list of BPIs sorted best route first
+ */
+static void rfapiBgpInfoAttachSorted(struct agg_node *rn,
+ struct bgp_path_info *info_new, afi_t afi,
+ safi_t safi)
+{
+ struct bgp *bgp;
+ struct bgp_path_info *prev;
+ struct bgp_path_info *next;
+ char pfx_buf[PREFIX2STR_BUFFER] = {};
+
+
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+
+ if (VNC_DEBUG(IMPORT_BI_ATTACH)) {
+ vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__,
+ info_new->peer);
+ vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p",
+ __func__, info_new->peer->su_remote);
+ }
+
+ for (prev = NULL, next = rn->info; next;
+ prev = next, next = next->next) {
+ enum bgp_path_selection_reason reason;
+
+ if (!bgp
+ || (!CHECK_FLAG(info_new->flags, BGP_PATH_REMOVED)
+ && CHECK_FLAG(next->flags, BGP_PATH_REMOVED))
+ || bgp_path_info_cmp_compatible(bgp, info_new, next,
+ pfx_buf, afi, safi,
+ &reason)
+ == -1) { /* -1 if 1st is better */
+ break;
+ }
+ }
+ vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__, prev, next);
+ if (prev) {
+ prev->next = info_new;
+ } else {
+ rn->info = info_new;
+ }
+ info_new->prev = prev;
+ info_new->next = next;
+ if (next)
+ next->prev = info_new;
+ bgp_attr_intern(info_new->attr);
+}
+
+static void rfapiBgpInfoDetach(struct agg_node *rn, struct bgp_path_info *bpi)
+{
+ /*
+ * Remove the route (doubly-linked)
+ */
+ // bgp_attr_unintern (&bpi->attr);
+ if (bpi->next)
+ bpi->next->prev = bpi->prev;
+ if (bpi->prev)
+ bpi->prev->next = bpi->next;
+ else
+ rn->info = bpi->next;
+}
+
+/*
+ * For L3-indexed import tables
+ */
+static int rfapi_bi_peer_rd_cmp(const void *b1, const void *b2)
+{
+ const struct bgp_path_info *bpi1 = b1;
+ const struct bgp_path_info *bpi2 = b2;
+
+ /*
+ * Compare peers
+ */
+ if (bpi1->peer < bpi2->peer)
+ return -1;
+ if (bpi1->peer > bpi2->peer)
+ return 1;
+
+ /*
+ * compare RDs
+ */
+ return vnc_prefix_cmp(
+ (const struct prefix *)&bpi1->extra->vnc.import.rd,
+ (const struct prefix *)&bpi2->extra->vnc.import.rd);
+}
+
+/*
+ * For L2-indexed import tables
+ * The BPIs in these tables should ALWAYS have an aux_prefix set because
+ * they arrive via IPv4 or IPv6 advertisements.
+ */
+static int rfapi_bi_peer_rd_aux_cmp(const void *b1, const void *b2)
+{
+ const struct bgp_path_info *bpi1 = b1;
+ const struct bgp_path_info *bpi2 = b2;
+ int rc;
+
+ /*
+ * Compare peers
+ */
+ if (bpi1->peer < bpi2->peer)
+ return -1;
+ if (bpi1->peer > bpi2->peer)
+ return 1;
+
+ /*
+ * compare RDs
+ */
+ rc = vnc_prefix_cmp((struct prefix *)&bpi1->extra->vnc.import.rd,
+ (struct prefix *)&bpi2->extra->vnc.import.rd);
+ if (rc) {
+ return rc;
+ }
+
+ /*
+ * L2 import tables can have multiple entries with the
+ * same MAC address, same RD, but different L3 addresses.
+ *
+ * Use presence of aux_prefix with AF=ethernet and prefixlen=1
+ * as magic value to signify explicit wildcarding of the aux_prefix.
+ * This magic value will not appear in bona fide bpi entries in
+ * the import table, but is allowed in the "fake" bpi used to
+ * probe the table when searching. (We have to test both b1 and b2
+ * because there is no guarantee of the order the test key and
+ * the real key will be passed)
+ */
+ if ((bpi1->extra->vnc.import.aux_prefix.family == AF_ETHERNET
+ && (bpi1->extra->vnc.import.aux_prefix.prefixlen == 1))
+ || (bpi2->extra->vnc.import.aux_prefix.family == AF_ETHERNET
+ && (bpi2->extra->vnc.import.aux_prefix.prefixlen == 1))) {
+
+ /*
+ * wildcard aux address specified
+ */
+ return 0;
+ }
+
+ return vnc_prefix_cmp(&bpi1->extra->vnc.import.aux_prefix,
+ &bpi2->extra->vnc.import.aux_prefix);
+}
+
+
+/*
+ * Index on RD and Peer
+ */
+static void rfapiItBiIndexAdd(struct agg_node *rn, /* Import table VPN node */
+ struct bgp_path_info *bpi) /* new BPI */
+{
+ struct skiplist *sl;
+ const struct prefix *p;
+
+ assert(rn);
+ assert(bpi);
+ assert(bpi->extra);
+
+ vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %pRDP", __func__, bpi,
+ bpi->peer, &bpi->extra->vnc.import.rd);
+
+ sl = RFAPI_RDINDEX_W_ALLOC(rn);
+ if (!sl) {
+ p = agg_node_get_prefix(rn);
+ if (AF_ETHERNET == p->family) {
+ sl = skiplist_new(0, rfapi_bi_peer_rd_aux_cmp, NULL);
+ } else {
+ sl = skiplist_new(0, rfapi_bi_peer_rd_cmp, NULL);
+ }
+ RFAPI_IT_EXTRA_GET(rn)->u.vpn.idx_rd = sl;
+ agg_lock_node(rn); /* for skiplist */
+ }
+ assert(!skiplist_insert(sl, (void *)bpi, (void *)bpi));
+ agg_lock_node(rn); /* for skiplist entry */
+
+ /* NB: BPIs in import tables are not refcounted */
+}
+
+static void rfapiItBiIndexDump(struct agg_node *rn)
+{
+ struct skiplist *sl;
+ void *cursor = NULL;
+ struct bgp_path_info *k;
+ struct bgp_path_info *v;
+ int rc;
+
+ sl = RFAPI_RDINDEX(rn);
+ if (!sl)
+ return;
+
+ for (rc = skiplist_next(sl, (void **)&k, (void **)&v, &cursor); !rc;
+ rc = skiplist_next(sl, (void **)&k, (void **)&v, &cursor)) {
+
+ char buf[RD_ADDRSTRLEN];
+ char buf_aux_pfx[PREFIX_STRLEN];
+
+ prefix_rd2str(
+ &k->extra->vnc.import.rd, buf, sizeof(buf),
+ bgp_get_asnotation(k->peer ? k->peer->bgp : NULL));
+ if (k->extra->vnc.import.aux_prefix.family) {
+ prefix2str(&k->extra->vnc.import.aux_prefix,
+ buf_aux_pfx, sizeof(buf_aux_pfx));
+ } else
+ strlcpy(buf_aux_pfx, "(none)", sizeof(buf_aux_pfx));
+
+ vnc_zlog_debug_verbose("bpi %p, peer %p, rd %s, aux_prefix %s",
+ k, k->peer, buf, buf_aux_pfx);
+ }
+}
+
+static struct bgp_path_info *rfapiItBiIndexSearch(
+ struct agg_node *rn, /* Import table VPN node */
+ struct prefix_rd *prd, struct peer *peer,
+ const struct prefix *aux_prefix) /* optional L3 addr for L2 ITs */
+{
+ struct skiplist *sl;
+ int rc;
+ struct bgp_path_info bpi_fake = {0};
+ struct bgp_path_info_extra bpi_extra = {0};
+ struct bgp_path_info *bpi_result;
+
+ sl = RFAPI_RDINDEX(rn);
+ if (!sl)
+ return NULL;
+
+#ifdef DEBUG_BI_SEARCH
+ {
+ char buf_aux_pfx[PREFIX_STRLEN];
+
+ if (aux_prefix) {
+ prefix2str(aux_prefix, buf_aux_pfx,
+ sizeof(buf_aux_pfx));
+ } else
+ strlcpy(buf_aux_pfx, "(nil)", sizeof(buf_aux_pfx));
+
+ vnc_zlog_debug_verbose(
+ "%s want prd=%pRDP, peer=%p, aux_prefix=%s", __func__,
+ prd, peer, buf_aux_pfx);
+ rfapiItBiIndexDump(rn);
+ }
+#endif
+
+ /* threshold is a WAG */
+ if (sl->count < 3) {
+#ifdef DEBUG_BI_SEARCH
+ vnc_zlog_debug_verbose("%s: short list algorithm", __func__);
+#endif
+ /* if short list, linear search might be faster */
+ for (bpi_result = rn->info; bpi_result;
+ bpi_result = bpi_result->next) {
+#ifdef DEBUG_BI_SEARCH
+ vnc_zlog_debug_verbose(
+ "%s: bpi has prd=%pRDP, peer=%p", __func__,
+ &bpi_result->extra->vnc.import.rd,
+ bpi_result->peer);
+#endif
+ if (peer == bpi_result->peer
+ && !prefix_cmp((struct prefix *)&bpi_result->extra
+ ->vnc.import.rd,
+ (struct prefix *)prd)) {
+
+#ifdef DEBUG_BI_SEARCH
+ vnc_zlog_debug_verbose(
+ "%s: peer and RD same, doing aux_prefix check",
+ __func__);
+#endif
+ if (!aux_prefix
+ || !prefix_cmp(
+ aux_prefix,
+ &bpi_result->extra->vnc.import
+ .aux_prefix)) {
+
+#ifdef DEBUG_BI_SEARCH
+ vnc_zlog_debug_verbose("%s: match",
+ __func__);
+#endif
+ break;
+ }
+ }
+ }
+ return bpi_result;
+ }
+
+ bpi_fake.peer = peer;
+ bpi_fake.extra = &bpi_extra;
+ bpi_fake.extra->vnc.import.rd = *prd;
+ if (aux_prefix) {
+ bpi_fake.extra->vnc.import.aux_prefix = *aux_prefix;
+ } else {
+ /* wildcard */
+ bpi_fake.extra->vnc.import.aux_prefix.family = AF_ETHERNET;
+ bpi_fake.extra->vnc.import.aux_prefix.prefixlen = 1;
+ }
+
+ rc = skiplist_search(sl, (void *)&bpi_fake, (void *)&bpi_result);
+
+ if (rc) {
+#ifdef DEBUG_BI_SEARCH
+ vnc_zlog_debug_verbose("%s: no match", __func__);
+#endif
+ return NULL;
+ }
+
+#ifdef DEBUG_BI_SEARCH
+ vnc_zlog_debug_verbose("%s: matched bpi=%p", __func__, bpi_result);
+#endif
+
+ return bpi_result;
+}
+
+static void rfapiItBiIndexDel(struct agg_node *rn, /* Import table VPN node */
+ struct bgp_path_info *bpi) /* old BPI */
+{
+ struct skiplist *sl;
+ int rc;
+
+ vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %pRDP", __func__, bpi,
+ bpi->peer, &bpi->extra->vnc.import.rd);
+
+ sl = RFAPI_RDINDEX(rn);
+ assert(sl);
+
+ rc = skiplist_delete(sl, (void *)(bpi), (void *)bpi);
+ if (rc) {
+ rfapiItBiIndexDump(rn);
+ }
+ assert(!rc);
+
+ agg_unlock_node(rn); /* for skiplist entry */
+
+ /* NB: BPIs in import tables are not refcounted */
+}
+
+/*
+ * Add a backreference at the ENCAP node to the VPN route that
+ * refers to it
+ */
+static void
+rfapiMonitorEncapAdd(struct rfapi_import_table *import_table,
+ struct prefix *p, /* VN address */
+ struct agg_node *vpn_rn, /* VPN node */
+ struct bgp_path_info *vpn_bpi) /* VPN bpi/route */
+{
+ afi_t afi = family2afi(p->family);
+ struct agg_node *rn;
+ struct rfapi_monitor_encap *m;
+
+ assert(afi);
+ rn = agg_node_get(import_table->imported_encap[afi], p); /* locks rn */
+ assert(rn);
+
+ m = XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP,
+ sizeof(struct rfapi_monitor_encap));
+
+ m->node = vpn_rn;
+ m->bpi = vpn_bpi;
+ m->rn = rn;
+
+ /* insert to encap node's list */
+ m->next = RFAPI_MONITOR_ENCAP(rn);
+ if (m->next)
+ m->next->prev = m;
+ RFAPI_MONITOR_ENCAP_W_ALLOC(rn) = m;
+
+ /* for easy lookup when deleting vpn route */
+ vpn_bpi->extra->vnc.import.hme = m;
+
+ vnc_zlog_debug_verbose(
+ "%s: it=%p, vpn_bpi=%p, afi=%d, encap rn=%p, setting vpn_bpi->extra->vnc.import.hme=%p",
+ __func__, import_table, vpn_bpi, afi, rn, m);
+
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 0);
+ bgp_attr_intern(vpn_bpi->attr);
+}
+
+static void rfapiMonitorEncapDelete(struct bgp_path_info *vpn_bpi)
+{
+ /*
+ * Remove encap monitor
+ */
+ vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__, vpn_bpi);
+ if (vpn_bpi->extra) {
+ struct rfapi_monitor_encap *hme =
+ vpn_bpi->extra->vnc.import.hme;
+
+ if (hme) {
+
+ vnc_zlog_debug_verbose("%s: hme=%p", __func__, hme);
+
+ /* Refcount checking takes too long here */
+ // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0);
+ if (hme->next)
+ hme->next->prev = hme->prev;
+ if (hme->prev)
+ hme->prev->next = hme->next;
+ else
+ RFAPI_MONITOR_ENCAP_W_ALLOC(hme->rn) =
+ hme->next;
+ /* Refcount checking takes too long here */
+ // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1);
+
+ /* see if the struct rfapi_it_extra is empty and can be
+ * freed */
+ rfapiMonitorExtraPrune(SAFI_ENCAP, hme->rn);
+
+ agg_unlock_node(hme->rn); /* decr ref count */
+ XFREE(MTYPE_RFAPI_MONITOR_ENCAP, hme);
+ vpn_bpi->extra->vnc.import.hme = NULL;
+ }
+ }
+}
+
+/*
+ * Timer callback for withdraw
+ */
+static void rfapiWithdrawTimerVPN(struct event *t)
+{
+ struct rfapi_withdraw *wcb = EVENT_ARG(t);
+ struct bgp_path_info *bpi = wcb->info;
+ struct bgp *bgp = bgp_get_default();
+ const struct prefix *p;
+ struct rfapi_monitor_vpn *moved;
+ afi_t afi;
+ bool early_exit = false;
+
+ if (bgp == NULL) {
+ vnc_zlog_debug_verbose(
+ "%s: NULL BGP pointer, assume shutdown race condition!!!",
+ __func__);
+ early_exit = true;
+ }
+ if (bgp && CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)) {
+ vnc_zlog_debug_verbose(
+ "%s: BGP delete in progress, assume shutdown race condition!!!",
+ __func__);
+ early_exit = true;
+ }
+
+ /* This callback is responsible for the withdraw object's memory */
+ if (early_exit) {
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ return;
+ }
+
+ assert(wcb->node);
+ assert(bpi);
+ assert(wcb->import_table);
+ assert(bpi->extra);
+
+ RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, wcb->lockoffset);
+
+ vnc_zlog_debug_verbose("%s: removing bpi %p at prefix %pRN", __func__,
+ bpi, wcb->node);
+
+ /*
+ * Remove the route (doubly-linked)
+ */
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_VALID)
+ && VALID_INTERIOR_TYPE(bpi->type))
+ RFAPI_MONITOR_EXTERIOR(wcb->node)->valid_interior_count--;
+
+ p = agg_node_get_prefix(wcb->node);
+ afi = family2afi(p->family);
+ wcb->import_table->holddown_count[afi] -= 1; /* keep count consistent */
+ rfapiItBiIndexDel(wcb->node, bpi);
+ rfapiBgpInfoDetach(wcb->node, bpi); /* with removed bpi */
+
+ vnc_import_bgp_exterior_del_route_interior(bgp, wcb->import_table,
+ wcb->node, bpi);
+
+
+ /*
+ * If VNC is configured to send response remove messages, AND
+ * if the removed route had a UN address, do response removal
+ * processing.
+ */
+ if (!(bgp->rfapi_cfg->flags
+ & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE)) {
+
+ int has_valid_duplicate = 0;
+ struct bgp_path_info *bpii;
+
+ /*
+ * First check if there are any OTHER routes at this node
+ * that have the same nexthop and a valid UN address. If
+ * there are (e.g., from other peers), then the route isn't
+ * really gone, so skip sending a response removal message.
+ */
+ for (bpii = wcb->node->info; bpii; bpii = bpii->next) {
+ if (rfapiVpnBiSamePtUn(bpi, bpii)) {
+ has_valid_duplicate = 1;
+ break;
+ }
+ }
+
+ vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__,
+ has_valid_duplicate);
+
+ if (!has_valid_duplicate) {
+ rfapiRibPendingDeleteRoute(bgp, wcb->import_table, afi,
+ wcb->node);
+ }
+ }
+
+ rfapiMonitorEncapDelete(bpi);
+
+ /*
+ * If there are no VPN monitors at this VPN Node A,
+ * we are done
+ */
+ if (!RFAPI_MONITOR_VPN(wcb->node)) {
+ vnc_zlog_debug_verbose("%s: no VPN monitors at this node",
+ __func__);
+ goto done;
+ }
+
+ /*
+ * rfapiMonitorMoveShorter only moves monitors if there are
+ * no remaining valid routes at the current node
+ */
+ moved = rfapiMonitorMoveShorter(wcb->node, 1);
+
+ if (moved) {
+ rfapiMonitorMovedUp(wcb->import_table, wcb->node, moved->node,
+ moved);
+ }
+
+done:
+ /*
+ * Free VPN bpi
+ */
+ rfapiBgpInfoFree(bpi);
+ wcb->info = NULL;
+
+ /*
+ * If route count at this node has gone to 0, withdraw exported prefix
+ */
+ if (!wcb->node->info) {
+ /* see if the struct rfapi_it_extra is empty and can be freed */
+ rfapiMonitorExtraPrune(SAFI_MPLS_VPN, wcb->node);
+ vnc_direct_bgp_del_prefix(bgp, wcb->import_table, wcb->node);
+ vnc_zebra_del_prefix(bgp, wcb->import_table, wcb->node);
+ } else {
+ /*
+ * nexthop change event
+ * vnc_direct_bgp_add_prefix() will recompute the VN addr
+ * ecommunity
+ */
+ vnc_direct_bgp_add_prefix(bgp, wcb->import_table, wcb->node);
+ }
+
+ RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, 1 + wcb->lockoffset);
+ agg_unlock_node(wcb->node); /* decr ref count */
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+}
+
+/*
+ * This works for multiprotocol extension, but not for plain ol'
+ * unicast IPv4 because that nexthop is stored in attr->nexthop
+ */
+void rfapiNexthop2Prefix(struct attr *attr, struct prefix *p)
+{
+ assert(p);
+ assert(attr);
+
+ memset(p, 0, sizeof(struct prefix));
+
+ switch (p->family = BGP_MP_NEXTHOP_FAMILY(attr->mp_nexthop_len)) {
+ case AF_INET:
+ p->u.prefix4 = attr->mp_nexthop_global_in;
+ p->prefixlen = IPV4_MAX_BITLEN;
+ break;
+
+ case AF_INET6:
+ p->u.prefix6 = attr->mp_nexthop_global;
+ p->prefixlen = IPV6_MAX_BITLEN;
+ break;
+
+ default:
+ vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__,
+ p->family);
+ }
+}
+
+void rfapiUnicastNexthop2Prefix(afi_t afi, struct attr *attr, struct prefix *p)
+{
+ if (afi == AFI_IP) {
+ p->family = AF_INET;
+ p->prefixlen = IPV4_MAX_BITLEN;
+ p->u.prefix4 = attr->nexthop;
+ } else {
+ rfapiNexthop2Prefix(attr, p);
+ }
+}
+
+static int rfapiAttrNexthopAddrDifferent(struct prefix *p1, struct prefix *p2)
+{
+ if (!p1 || !p2) {
+ vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__);
+ return 1;
+ }
+
+ /*
+ * Are address families the same?
+ */
+ if (p1->family != p2->family) {
+ return 1;
+ }
+
+ switch (p1->family) {
+ case AF_INET:
+ if (IPV4_ADDR_SAME(&p1->u.prefix4, &p2->u.prefix4))
+ return 0;
+ break;
+
+ case AF_INET6:
+ if (IPV6_ADDR_SAME(&p1->u.prefix6, &p2->u.prefix6))
+ return 0;
+ break;
+
+ default:
+ assert(1);
+ }
+
+ return 1;
+}
+
+static void rfapiCopyUnEncap2VPN(struct bgp_path_info *encap_bpi,
+ struct bgp_path_info *vpn_bpi)
+{
+ if (!vpn_bpi || !vpn_bpi->extra) {
+ zlog_warn("%s: no vpn bpi attr/extra, can't copy UN address",
+ __func__);
+ return;
+ }
+
+ switch (BGP_MP_NEXTHOP_FAMILY(encap_bpi->attr->mp_nexthop_len)) {
+ case AF_INET:
+
+ /*
+ * instrumentation to debug segfault of 091127
+ */
+ vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__, vpn_bpi);
+ vnc_zlog_debug_verbose("%s: vpn_bpi->extra=%p", __func__,
+ vpn_bpi->extra);
+
+ vpn_bpi->extra->vnc.import.un_family = AF_INET;
+ vpn_bpi->extra->vnc.import.un.addr4 =
+ encap_bpi->attr->mp_nexthop_global_in;
+ break;
+
+ case AF_INET6:
+ vpn_bpi->extra->vnc.import.un_family = AF_INET6;
+ vpn_bpi->extra->vnc.import.un.addr6 =
+ encap_bpi->attr->mp_nexthop_global;
+ break;
+
+ default:
+ zlog_warn("%s: invalid encap nexthop length: %d", __func__,
+ encap_bpi->attr->mp_nexthop_len);
+ vpn_bpi->extra->vnc.import.un_family = AF_UNSPEC;
+ break;
+ }
+}
+
+/*
+ * returns 0 on success, nonzero on error
+ */
+static int
+rfapiWithdrawEncapUpdateCachedUn(struct rfapi_import_table *import_table,
+ struct bgp_path_info *encap_bpi,
+ struct agg_node *vpn_rn,
+ struct bgp_path_info *vpn_bpi)
+{
+ if (!encap_bpi) {
+
+ /*
+ * clear cached UN address
+ */
+ if (!vpn_bpi || !vpn_bpi->extra) {
+ zlog_warn(
+ "%s: missing VPN bpi/extra, can't clear UN addr",
+ __func__);
+ return 1;
+ }
+ vpn_bpi->extra->vnc.import.un_family = AF_UNSPEC;
+ memset(&vpn_bpi->extra->vnc.import.un, 0,
+ sizeof(vpn_bpi->extra->vnc.import.un));
+ if (CHECK_FLAG(vpn_bpi->flags, BGP_PATH_VALID)) {
+ if (rfapiGetVncTunnelUnAddr(vpn_bpi->attr, NULL)) {
+ UNSET_FLAG(vpn_bpi->flags, BGP_PATH_VALID);
+ if (VALID_INTERIOR_TYPE(vpn_bpi->type))
+ RFAPI_MONITOR_EXTERIOR(vpn_rn)
+ ->valid_interior_count--;
+ /* signal interior route withdrawal to
+ * import-exterior */
+ vnc_import_bgp_exterior_del_route_interior(
+ bgp_get_default(), import_table, vpn_rn,
+ vpn_bpi);
+ }
+ }
+
+ } else {
+ if (!vpn_bpi) {
+ zlog_warn("%s: missing VPN bpi, can't clear UN addr",
+ __func__);
+ return 1;
+ }
+ rfapiCopyUnEncap2VPN(encap_bpi, vpn_bpi);
+ if (!CHECK_FLAG(vpn_bpi->flags, BGP_PATH_VALID)) {
+ SET_FLAG(vpn_bpi->flags, BGP_PATH_VALID);
+ if (VALID_INTERIOR_TYPE(vpn_bpi->type))
+ RFAPI_MONITOR_EXTERIOR(vpn_rn)
+ ->valid_interior_count++;
+ /* signal interior route withdrawal to import-exterior
+ */
+ vnc_import_bgp_exterior_add_route_interior(
+ bgp_get_default(), import_table, vpn_rn,
+ vpn_bpi);
+ }
+ }
+ return 0;
+}
+
+static void rfapiWithdrawTimerEncap(struct event *t)
+{
+ struct rfapi_withdraw *wcb = EVENT_ARG(t);
+ struct bgp_path_info *bpi = wcb->info;
+ int was_first_route = 0;
+ struct rfapi_monitor_encap *em;
+ struct skiplist *vpn_node_sl = skiplist_new(0, NULL, NULL);
+
+ assert(wcb->node);
+ assert(bpi);
+ assert(wcb->import_table);
+
+ RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 0);
+
+ if (wcb->node->info == bpi)
+ was_first_route = 1;
+
+ /*
+ * Remove the route/bpi and free it
+ */
+ rfapiBgpInfoDetach(wcb->node, bpi);
+ rfapiBgpInfoFree(bpi);
+
+ if (!was_first_route)
+ goto done;
+
+ for (em = RFAPI_MONITOR_ENCAP(wcb->node); em; em = em->next) {
+
+ /*
+ * Update monitoring VPN BPIs with new encap info at the
+ * head of the encap bpi chain (which could be NULL after
+ * removing the expiring bpi above)
+ */
+ if (rfapiWithdrawEncapUpdateCachedUn(wcb->import_table,
+ wcb->node->info, em->node,
+ em->bpi))
+ continue;
+
+ /*
+ * Build a list of unique VPN nodes referenced by these
+ * monitors.
+ * Use a skiplist for speed.
+ */
+ skiplist_insert(vpn_node_sl, em->node, em->node);
+ }
+
+
+ /*
+ * for each VPN node referenced in the ENCAP monitors:
+ */
+ struct agg_node *rn;
+ while (!skiplist_first(vpn_node_sl, (void **)&rn, NULL)) {
+ if (!wcb->node->info) {
+ struct rfapi_monitor_vpn *moved;
+
+ moved = rfapiMonitorMoveShorter(rn, 0);
+ if (moved) {
+ // rfapiDoRouteCallback(wcb->import_table,
+ // moved->node, moved);
+ rfapiMonitorMovedUp(wcb->import_table, rn,
+ moved->node, moved);
+ }
+ } else {
+ // rfapiDoRouteCallback(wcb->import_table, rn, NULL);
+ rfapiMonitorItNodeChanged(wcb->import_table, rn, NULL);
+ }
+ skiplist_delete_first(vpn_node_sl);
+ }
+
+done:
+ RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 1);
+ agg_unlock_node(wcb->node); /* decr ref count */
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ skiplist_free(vpn_node_sl);
+}
+
+
+/*
+ * Works for both VPN and ENCAP routes; timer_service_func is different
+ * in each case
+ */
+static void
+rfapiBiStartWithdrawTimer(struct rfapi_import_table *import_table,
+ struct agg_node *rn, struct bgp_path_info *bpi,
+ afi_t afi, safi_t safi,
+ void (*timer_service_func)(struct event *))
+{
+ uint32_t lifetime;
+ struct rfapi_withdraw *wcb;
+
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
+ /*
+ * Already on the path to being withdrawn,
+ * should already have a timer set up to
+ * delete it.
+ */
+ vnc_zlog_debug_verbose(
+ "%s: already being withdrawn, do nothing", __func__);
+ return;
+ }
+
+ rfapiGetVncLifetime(bpi->attr, &lifetime);
+ vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__, lifetime);
+
+ /*
+ * withdrawn routes get to hang around for a while
+ */
+ SET_FLAG(bpi->flags, BGP_PATH_REMOVED);
+
+ /* set timer to remove the route later */
+ lifetime = rfapiGetHolddownFromLifetime(lifetime);
+ vnc_zlog_debug_verbose("%s: using timeout %u", __func__, lifetime);
+
+ /*
+ * Stash import_table, node, and info for use by timer
+ * service routine, which is supposed to free the wcb.
+ */
+ wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw));
+ wcb->node = rn;
+ wcb->info = bpi;
+ wcb->import_table = import_table;
+ bgp_attr_intern(bpi->attr);
+
+ if (VNC_DEBUG(VERBOSE)) {
+ vnc_zlog_debug_verbose(
+ "%s: wcb values: node=%p, info=%p, import_table=%p (bpi follows)",
+ __func__, wcb->node, wcb->info, wcb->import_table);
+ rfapiPrintBi(NULL, bpi);
+ }
+
+
+ assert(bpi->extra);
+ if (lifetime > UINT32_MAX / 1001) {
+ /* sub-optimal case, but will probably never happen */
+ bpi->extra->vnc.import.timer = NULL;
+ event_add_timer(bm->master, timer_service_func, wcb, lifetime,
+ &bpi->extra->vnc.import.timer);
+ } else {
+ static uint32_t jitter;
+ uint32_t lifetime_msec;
+
+ /*
+ * the goal here is to spread out the timers so they are
+ * sortable in the skip list
+ */
+ if (++jitter >= 1000)
+ jitter = 0;
+
+ lifetime_msec = (lifetime * 1000) + jitter;
+
+ bpi->extra->vnc.import.timer = NULL;
+ event_add_timer_msec(bm->master, timer_service_func, wcb,
+ lifetime_msec,
+ &bpi->extra->vnc.import.timer);
+ }
+
+ /* re-sort route list (BGP_PATH_REMOVED routes are last) */
+ if (((struct bgp_path_info *)rn->info)->next) {
+ rfapiBgpInfoDetach(rn, bpi);
+ rfapiBgpInfoAttachSorted(rn, bpi, afi, safi);
+ }
+}
+
+
+typedef void(rfapi_bi_filtered_import_f)(struct rfapi_import_table *table,
+ int action, struct peer *peer,
+ void *rfd, const struct prefix *prefix,
+ const struct prefix *aux_prefix,
+ afi_t afi, struct prefix_rd *prd,
+ struct attr *attr, uint8_t type,
+ uint8_t sub_type, uint32_t *label);
+
+
+static void rfapiExpireEncapNow(struct rfapi_import_table *it,
+ struct agg_node *rn, struct bgp_path_info *bpi)
+{
+ struct rfapi_withdraw *wcb;
+ struct event t;
+
+ /*
+ * pretend we're an expiring timer
+ */
+ wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw));
+ wcb->info = bpi;
+ wcb->node = rn;
+ wcb->import_table = it;
+ memset(&t, 0, sizeof(t));
+ t.arg = wcb;
+ rfapiWithdrawTimerEncap(&t); /* frees wcb */
+}
+
+static int rfapiGetNexthop(struct attr *attr, struct prefix *prefix)
+{
+ switch (BGP_MP_NEXTHOP_FAMILY(attr->mp_nexthop_len)) {
+ case AF_INET:
+ prefix->family = AF_INET;
+ prefix->prefixlen = IPV4_MAX_BITLEN;
+ prefix->u.prefix4 = attr->mp_nexthop_global_in;
+ break;
+ case AF_INET6:
+ prefix->family = AF_INET6;
+ prefix->prefixlen = IPV6_MAX_BITLEN;
+ prefix->u.prefix6 = attr->mp_nexthop_global;
+ break;
+ default:
+ vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d",
+ __func__, attr->mp_nexthop_len);
+ return EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * import a bgp_path_info if its route target list intersects with the
+ * import table's route target list
+ */
+static void rfapiBgpInfoFilteredImportEncap(
+ struct rfapi_import_table *import_table, int action, struct peer *peer,
+ void *rfd, /* set for looped back routes */
+ const struct prefix *p,
+ const struct prefix *aux_prefix, /* Unused for encap routes */
+ afi_t afi, struct prefix_rd *prd,
+ struct attr *attr, /* part of bgp_path_info */
+ uint8_t type, /* part of bgp_path_info */
+ uint8_t sub_type, /* part of bgp_path_info */
+ uint32_t *label) /* part of bgp_path_info */
+{
+ struct agg_table *rt = NULL;
+ struct agg_node *rn;
+ struct bgp_path_info *info_new;
+ struct bgp_path_info *bpi;
+ struct bgp_path_info *next;
+ char buf[BUFSIZ];
+
+ struct prefix p_firstbpi_old;
+ struct prefix p_firstbpi_new;
+ int replacing = 0;
+ const char *action_str = NULL;
+ struct prefix un_prefix;
+
+ struct bgp *bgp;
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+
+ switch (action) {
+ case FIF_ACTION_UPDATE:
+ action_str = "update";
+ break;
+ case FIF_ACTION_WITHDRAW:
+ action_str = "withdraw";
+ break;
+ case FIF_ACTION_KILL:
+ action_str = "kill";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ vnc_zlog_debug_verbose(
+ "%s: entry: %s: prefix %s/%d", __func__, action_str,
+ inet_ntop(p->family, &p->u.prefix, buf, sizeof(buf)),
+ p->prefixlen);
+
+ memset(&p_firstbpi_old, 0, sizeof(p_firstbpi_old));
+ memset(&p_firstbpi_new, 0, sizeof(p_firstbpi_new));
+
+ if (action == FIF_ACTION_UPDATE) {
+ /*
+ * Compare rt lists. If no intersection, don't import this route
+ * On a withdraw, peer and RD are sufficient to determine if
+ * we should act.
+ */
+ if (!attr || !bgp_attr_get_ecommunity(attr)) {
+
+ vnc_zlog_debug_verbose(
+ "%s: attr, extra, or ecommunity missing, not importing",
+ __func__);
+ return;
+ }
+#ifdef RFAPI_REQUIRE_ENCAP_BEEC
+ if (!rfapiEcommunitiesMatchBeec(
+ bgp_attr_get_ecommunity(attr))) {
+ vnc_zlog_debug_verbose(
+ "%s: it=%p: no match for BGP Encapsulation ecommunity",
+ __func__, import_table);
+ return;
+ }
+#endif
+ if (!rfapiEcommunitiesIntersect(
+ import_table->rt_import_list,
+ bgp_attr_get_ecommunity(attr))) {
+
+ vnc_zlog_debug_verbose(
+ "%s: it=%p: no ecommunity intersection",
+ __func__, import_table);
+ return;
+ }
+
+ /*
+ * Updates must also have a nexthop address
+ */
+ memset(&un_prefix, 0,
+ sizeof(un_prefix)); /* keep valgrind happy */
+ if (rfapiGetNexthop(attr, &un_prefix)) {
+ vnc_zlog_debug_verbose("%s: missing nexthop address",
+ __func__);
+ return;
+ }
+ }
+
+ /*
+ * Figure out which radix tree the route would go into
+ */
+ switch (afi) {
+ case AFI_IP:
+ case AFI_IP6:
+ rt = import_table->imported_encap[afi];
+ break;
+
+ case AFI_UNSPEC:
+ case AFI_L2VPN:
+ case AFI_MAX:
+ flog_err(EC_LIB_DEVELOPMENT, "%s: bad afi %d", __func__, afi);
+ return;
+ }
+
+ /*
+ * agg_node_lookup returns a node only if there is at least
+ * one route attached.
+ */
+ rn = agg_node_lookup(rt, p);
+
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p",
+ __func__, import_table, rn);
+#endif
+
+ if (rn) {
+
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 1);
+ agg_unlock_node(rn); /* undo lock in agg_node_lookup */
+
+
+ /*
+ * capture nexthop of first bpi
+ */
+ if (rn->info) {
+ rfapiNexthop2Prefix(
+ ((struct bgp_path_info *)(rn->info))->attr,
+ &p_firstbpi_old);
+ }
+
+ for (bpi = rn->info; bpi; bpi = bpi->next) {
+
+ /*
+ * Does this bgp_path_info refer to the same route
+ * as we are trying to add?
+ */
+ vnc_zlog_debug_verbose("%s: comparing BPI %p", __func__,
+ bpi);
+
+
+ /*
+ * Compare RDs
+ *
+ * RD of import table bpi is in
+ * bpi->extra->vnc.import.rd RD of info_orig is in prd
+ */
+ if (!bpi->extra) {
+ vnc_zlog_debug_verbose("%s: no bpi->extra",
+ __func__);
+ continue;
+ }
+ if (prefix_cmp(
+ (struct prefix *)&bpi->extra->vnc.import.rd,
+ (struct prefix *)prd)) {
+
+ vnc_zlog_debug_verbose("%s: prd does not match",
+ __func__);
+ continue;
+ }
+
+ /*
+ * Compare peers
+ */
+ if (bpi->peer != peer) {
+ vnc_zlog_debug_verbose(
+ "%s: peer does not match", __func__);
+ continue;
+ }
+
+ vnc_zlog_debug_verbose("%s: found matching bpi",
+ __func__);
+
+ /* Same route. Delete this bpi, replace with new one */
+
+ if (action == FIF_ACTION_WITHDRAW) {
+
+ vnc_zlog_debug_verbose(
+ "%s: withdrawing at prefix %pRN",
+ __func__, rn);
+
+ rfapiBiStartWithdrawTimer(
+ import_table, rn, bpi, afi, SAFI_ENCAP,
+ rfapiWithdrawTimerEncap);
+
+ } else {
+ vnc_zlog_debug_verbose(
+ "%s: %s at prefix %pRN", __func__,
+ ((action == FIF_ACTION_KILL)
+ ? "killing"
+ : "replacing"),
+ rn);
+
+ /*
+ * If this route is waiting to be deleted
+ * because of
+ * a previous withdraw, we must cancel its
+ * timer.
+ */
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
+ && bpi->extra->vnc.import.timer) {
+ struct rfapi_withdraw *wcb = EVENT_ARG(
+ bpi->extra->vnc.import.timer);
+
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ EVENT_OFF(bpi->extra->vnc.import.timer);
+ }
+
+ if (action == FIF_ACTION_UPDATE) {
+ rfapiBgpInfoDetach(rn, bpi);
+ rfapiBgpInfoFree(bpi);
+ replacing = 1;
+ } else {
+ /*
+ * Kill: do export stuff when removing
+ * bpi
+ */
+ struct rfapi_withdraw *wcb;
+ struct event t;
+
+ /*
+ * pretend we're an expiring timer
+ */
+ wcb = XCALLOC(
+ MTYPE_RFAPI_WITHDRAW,
+ sizeof(struct rfapi_withdraw));
+ wcb->info = bpi;
+ wcb->node = rn;
+ wcb->import_table = import_table;
+ memset(&t, 0, sizeof(t));
+ t.arg = wcb;
+ rfapiWithdrawTimerEncap(
+ &t); /* frees wcb */
+ }
+ }
+
+ break;
+ }
+ }
+
+ if (rn)
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, replacing ? 1 : 0);
+
+ if (action == FIF_ACTION_WITHDRAW || action == FIF_ACTION_KILL)
+ return;
+
+ info_new =
+ rfapiBgpInfoCreate(attr, peer, rfd, prd, type, sub_type, NULL);
+
+ if (rn) {
+ if (!replacing)
+ agg_lock_node(rn); /* incr ref count for new BPI */
+ } else {
+ rn = agg_node_get(rt, p);
+ }
+
+ vnc_zlog_debug_verbose("%s: (afi=%d, rn=%p) inserting at prefix %pRN",
+ __func__, afi, rn, rn);
+
+ rfapiBgpInfoAttachSorted(rn, info_new, afi, SAFI_ENCAP);
+
+ /*
+ * Delete holddown routes from same NVE. See details in
+ * rfapiBgpInfoFilteredImportVPN()
+ */
+ for (bpi = info_new->next; bpi; bpi = next) {
+
+ struct prefix pfx_un;
+ int un_match = 0;
+
+ next = bpi->next;
+ if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED))
+ continue;
+
+ /*
+ * We already match the VN address (it is the prefix
+ * of the route node)
+ */
+
+ if (!rfapiGetNexthop(bpi->attr, &pfx_un)
+ && prefix_same(&pfx_un, &un_prefix)) {
+
+ un_match = 1;
+ }
+
+ if (!un_match)
+ continue;
+
+ vnc_zlog_debug_verbose(
+ "%s: removing holddown bpi matching NVE of new route",
+ __func__);
+ if (bpi->extra->vnc.import.timer) {
+ struct rfapi_withdraw *wcb =
+ EVENT_ARG(bpi->extra->vnc.import.timer);
+
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ EVENT_OFF(bpi->extra->vnc.import.timer);
+ }
+ rfapiExpireEncapNow(import_table, rn, bpi);
+ }
+
+ rfapiNexthop2Prefix(((struct bgp_path_info *)(rn->info))->attr,
+ &p_firstbpi_new);
+
+ /*
+ * If the nexthop address of the selected Encap route (i.e.,
+ * the UN address) has changed, then we must update the VPN
+ * routes that refer to this Encap route and possibly force
+ * rfapi callbacks.
+ */
+ if (rfapiAttrNexthopAddrDifferent(&p_firstbpi_old, &p_firstbpi_new)) {
+
+ struct rfapi_monitor_encap *m;
+ struct rfapi_monitor_encap *mnext;
+
+ struct agg_node *referenced_vpn_prefix;
+
+ /*
+ * Optimized approach: build radix tree on the fly to
+ * hold list of VPN nodes referenced by the ENCAP monitors
+ *
+ * The nodes in this table correspond to prefixes of VPN routes.
+ * The "info" pointer of the node points to a chain of
+ * struct rfapi_monitor_encap, each of which refers to a
+ * specific VPN node.
+ */
+ struct agg_table *referenced_vpn_table;
+
+ referenced_vpn_table = agg_table_init();
+
+/*
+ * iterate over the set of monitors at this ENCAP node.
+ */
+#ifdef DEBUG_ENCAP_MONITOR
+ vnc_zlog_debug_verbose("%s: examining monitors at rn=%p",
+ __func__, rn);
+#endif
+ for (m = RFAPI_MONITOR_ENCAP(rn); m; m = m->next) {
+ const struct prefix *p;
+
+ /*
+ * For each referenced bpi/route, copy the ENCAP route's
+ * nexthop to the VPN route's cached UN address field
+ * and set
+ * the address family of the cached UN address field.
+ */
+ rfapiCopyUnEncap2VPN(info_new, m->bpi);
+ if (!CHECK_FLAG(m->bpi->flags, BGP_PATH_VALID)) {
+ SET_FLAG(m->bpi->flags, BGP_PATH_VALID);
+ if (VALID_INTERIOR_TYPE(m->bpi->type))
+ RFAPI_MONITOR_EXTERIOR(m->node)
+ ->valid_interior_count++;
+ vnc_import_bgp_exterior_add_route_interior(
+ bgp, import_table, m->node, m->bpi);
+ }
+
+ /*
+ * Build a list of unique VPN nodes referenced by these
+ * monitors
+ *
+ * There could be more than one VPN node here with a
+ * given
+ * prefix. Those are currently in an unsorted linear
+ * list
+ * per prefix.
+ */
+ p = agg_node_get_prefix(m->node);
+ referenced_vpn_prefix =
+ agg_node_get(referenced_vpn_table, p);
+ assert(referenced_vpn_prefix);
+ for (mnext = referenced_vpn_prefix->info; mnext;
+ mnext = mnext->next) {
+
+ if (mnext->node == m->node)
+ break;
+ }
+
+ if (mnext) {
+ /*
+ * already have an entry for this VPN node
+ */
+ agg_unlock_node(referenced_vpn_prefix);
+ } else {
+ mnext = XCALLOC(
+ MTYPE_RFAPI_MONITOR_ENCAP,
+ sizeof(struct rfapi_monitor_encap));
+ mnext->node = m->node;
+ mnext->next = referenced_vpn_prefix->info;
+ referenced_vpn_prefix->info = mnext;
+ }
+ }
+
+ /*
+ * for each VPN node referenced in the ENCAP monitors:
+ */
+ for (referenced_vpn_prefix =
+ agg_route_top(referenced_vpn_table);
+ referenced_vpn_prefix;
+ referenced_vpn_prefix =
+ agg_route_next(referenced_vpn_prefix)) {
+
+ while ((m = referenced_vpn_prefix->info)) {
+
+ struct agg_node *n;
+
+ rfapiMonitorMoveLonger(m->node);
+ for (n = m->node; n; n = agg_node_parent(n)) {
+ // rfapiDoRouteCallback(import_table, n,
+ // NULL);
+ }
+ rfapiMonitorItNodeChanged(import_table, m->node,
+ NULL);
+
+ referenced_vpn_prefix->info = m->next;
+ agg_unlock_node(referenced_vpn_prefix);
+ XFREE(MTYPE_RFAPI_MONITOR_ENCAP, m);
+ }
+ }
+ agg_table_finish(referenced_vpn_table);
+ }
+
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 0);
+}
+
+static void rfapiExpireVpnNow(struct rfapi_import_table *it,
+ struct agg_node *rn, struct bgp_path_info *bpi,
+ int lockoffset)
+{
+ struct rfapi_withdraw *wcb;
+ struct event t;
+
+ /*
+ * pretend we're an expiring timer
+ */
+ wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw));
+ wcb->info = bpi;
+ wcb->node = rn;
+ wcb->import_table = it;
+ wcb->lockoffset = lockoffset;
+ memset(&t, 0, sizeof(t));
+ t.arg = wcb;
+ rfapiWithdrawTimerVPN(&t); /* frees wcb */
+}
+
+
+/*
+ * import a bgp_path_info if its route target list intersects with the
+ * import table's route target list
+ */
+void rfapiBgpInfoFilteredImportVPN(
+ struct rfapi_import_table *import_table, int action, struct peer *peer,
+ void *rfd, /* set for looped back routes */
+ const struct prefix *p,
+ const struct prefix *aux_prefix, /* AFI_L2VPN: optional IP */
+ afi_t afi, struct prefix_rd *prd,
+ struct attr *attr, /* part of bgp_path_info */
+ uint8_t type, /* part of bgp_path_info */
+ uint8_t sub_type, /* part of bgp_path_info */
+ uint32_t *label) /* part of bgp_path_info */
+{
+ struct agg_table *rt = NULL;
+ struct agg_node *rn;
+ struct agg_node *n;
+ struct bgp_path_info *info_new;
+ struct bgp_path_info *bpi;
+ struct bgp_path_info *next;
+ char buf[BUFSIZ];
+ struct prefix vn_prefix;
+ struct prefix un_prefix;
+ int un_prefix_valid = 0;
+ struct agg_node *ern;
+ int replacing = 0;
+ int original_had_routes = 0;
+ struct prefix original_nexthop;
+ const char *action_str = NULL;
+ int is_it_ce = 0;
+
+ struct bgp *bgp;
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+
+ switch (action) {
+ case FIF_ACTION_UPDATE:
+ action_str = "update";
+ break;
+ case FIF_ACTION_WITHDRAW:
+ action_str = "withdraw";
+ break;
+ case FIF_ACTION_KILL:
+ action_str = "kill";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (import_table == bgp->rfapi->it_ce)
+ is_it_ce = 1;
+
+ vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s",
+ __func__, (is_it_ce ? "CE-IT " : ""), action_str,
+ rfapi_ntop(p->family, &p->u.prefix, buf, BUFSIZ),
+ p->prefixlen, import_table, afi2str(afi));
+
+ VNC_ITRCCK;
+
+ /*
+ * Compare rt lists. If no intersection, don't import this route
+ * On a withdraw, peer and RD are sufficient to determine if
+ * we should act.
+ */
+ if (action == FIF_ACTION_UPDATE) {
+ if (!attr || !bgp_attr_get_ecommunity(attr)) {
+
+ vnc_zlog_debug_verbose(
+ "%s: attr, extra, or ecommunity missing, not importing",
+ __func__);
+ return;
+ }
+ if ((import_table != bgp->rfapi->it_ce) &&
+ !rfapiEcommunitiesIntersect(
+ import_table->rt_import_list,
+ bgp_attr_get_ecommunity(attr))) {
+
+ vnc_zlog_debug_verbose(
+ "%s: it=%p: no ecommunity intersection",
+ __func__, import_table);
+ return;
+ }
+
+ memset(&vn_prefix, 0,
+ sizeof(vn_prefix)); /* keep valgrind happy */
+ if (rfapiGetNexthop(attr, &vn_prefix)) {
+ /* missing nexthop address would be a bad, bad thing */
+ vnc_zlog_debug_verbose("%s: missing nexthop", __func__);
+ return;
+ }
+ }
+
+ /*
+ * Figure out which radix tree the route would go into
+ */
+ switch (afi) {
+ case AFI_IP:
+ case AFI_IP6:
+ case AFI_L2VPN:
+ rt = import_table->imported_vpn[afi];
+ break;
+
+ case AFI_UNSPEC:
+ case AFI_MAX:
+ flog_err(EC_LIB_DEVELOPMENT, "%s: bad afi %d", __func__, afi);
+ return;
+ }
+
+ /* clear it */
+ memset(&original_nexthop, 0, sizeof(original_nexthop));
+
+ /*
+ * agg_node_lookup returns a node only if there is at least
+ * one route attached.
+ */
+ rn = agg_node_lookup(rt, p);
+
+ vnc_zlog_debug_verbose("%s: rn=%p", __func__, rn);
+
+ if (rn) {
+
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 1);
+ agg_unlock_node(rn); /* undo lock in agg_node_lookup */
+
+ if (rn->info)
+ original_had_routes = 1;
+
+ if (VNC_DEBUG(VERBOSE)) {
+ vnc_zlog_debug_verbose("%s: showing IT node on entry",
+ __func__);
+ rfapiShowItNode(NULL, rn); /* debug */
+ }
+
+ /*
+ * Look for same route (will have same RD and peer)
+ */
+ bpi = rfapiItBiIndexSearch(rn, prd, peer, aux_prefix);
+
+ if (bpi) {
+
+ /*
+ * This was an old test when we iterated over the
+ * BPIs linearly. Since we're now looking up with
+ * RD and peer, comparing types should not be
+ * needed. Changed to assertion.
+ *
+ * Compare types. Doing so prevents a RFP-originated
+ * route from matching an imported route, for example.
+ */
+ if (VNC_DEBUG(VERBOSE) && bpi->type != type)
+ /* should be handled by RDs, but warn for now */
+ zlog_warn("%s: type mismatch! (bpi=%d, arg=%d)",
+ __func__, bpi->type, type);
+
+ vnc_zlog_debug_verbose("%s: found matching bpi",
+ __func__);
+
+ /*
+ * In the special CE table, withdrawals occur without
+ * holddown
+ */
+ if (import_table == bgp->rfapi->it_ce) {
+ vnc_direct_bgp_del_route_ce(bgp, rn, bpi);
+ if (action == FIF_ACTION_WITHDRAW)
+ action = FIF_ACTION_KILL;
+ }
+
+ if (action == FIF_ACTION_WITHDRAW) {
+
+ int washolddown = CHECK_FLAG(bpi->flags,
+ BGP_PATH_REMOVED);
+
+ vnc_zlog_debug_verbose(
+ "%s: withdrawing at prefix %pRN%s",
+ __func__, rn,
+ (washolddown
+ ? " (already being withdrawn)"
+ : ""));
+
+ VNC_ITRCCK;
+ if (!washolddown) {
+ rfapiBiStartWithdrawTimer(
+ import_table, rn, bpi, afi,
+ SAFI_MPLS_VPN,
+ rfapiWithdrawTimerVPN);
+
+ RFAPI_UPDATE_ITABLE_COUNT(
+ bpi, import_table, afi, -1);
+ import_table->holddown_count[afi] += 1;
+ }
+ VNC_ITRCCK;
+ } else {
+ vnc_zlog_debug_verbose(
+ "%s: %s at prefix %pRN", __func__,
+ ((action == FIF_ACTION_KILL)
+ ? "killing"
+ : "replacing"),
+ rn);
+
+ /*
+ * If this route is waiting to be deleted
+ * because of
+ * a previous withdraw, we must cancel its
+ * timer.
+ */
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
+ && bpi->extra->vnc.import.timer) {
+ struct rfapi_withdraw *wcb = EVENT_ARG(
+ bpi->extra->vnc.import.timer);
+
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ EVENT_OFF(bpi->extra->vnc.import.timer);
+
+ import_table->holddown_count[afi] -= 1;
+ RFAPI_UPDATE_ITABLE_COUNT(
+ bpi, import_table, afi, 1);
+ }
+ /*
+ * decrement remote count (if route is remote)
+ * because
+ * we are going to remove it below
+ */
+ RFAPI_UPDATE_ITABLE_COUNT(bpi, import_table,
+ afi, -1);
+ if (action == FIF_ACTION_UPDATE) {
+ replacing = 1;
+
+ /*
+ * make copy of original nexthop so we
+ * can see if it changed
+ */
+ rfapiGetNexthop(bpi->attr,
+ &original_nexthop);
+
+ /*
+ * remove bpi without doing any export
+ * processing
+ */
+ if (CHECK_FLAG(bpi->flags,
+ BGP_PATH_VALID)
+ && VALID_INTERIOR_TYPE(bpi->type))
+ RFAPI_MONITOR_EXTERIOR(rn)
+ ->valid_interior_count--;
+ rfapiItBiIndexDel(rn, bpi);
+ rfapiBgpInfoDetach(rn, bpi);
+ rfapiMonitorEncapDelete(bpi);
+ vnc_import_bgp_exterior_del_route_interior(
+ bgp, import_table, rn, bpi);
+ rfapiBgpInfoFree(bpi);
+ } else {
+ /* Kill */
+ /*
+ * remove bpi and do export processing
+ */
+ import_table->holddown_count[afi] += 1;
+ rfapiExpireVpnNow(import_table, rn, bpi,
+ 0);
+ }
+ }
+ }
+ }
+
+ if (rn)
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, replacing ? 1 : 0);
+
+ if (action == FIF_ACTION_WITHDRAW || action == FIF_ACTION_KILL) {
+ VNC_ITRCCK;
+ return;
+ }
+
+ info_new =
+ rfapiBgpInfoCreate(attr, peer, rfd, prd, type, sub_type, label);
+
+ /*
+ * lookup un address in encap table
+ */
+ ern = agg_node_match(import_table->imported_encap[afi], &vn_prefix);
+ if (ern) {
+ rfapiCopyUnEncap2VPN(ern->info, info_new);
+ agg_unlock_node(ern); /* undo lock in route_note_match */
+ } else {
+ /* Not a big deal, just means VPN route got here first */
+ vnc_zlog_debug_verbose("%s: no encap route for vn addr %pFX",
+ __func__, &vn_prefix);
+ info_new->extra->vnc.import.un_family = AF_UNSPEC;
+ }
+
+ if (rn) {
+ if (!replacing)
+ agg_lock_node(rn);
+ } else {
+ /*
+ * No need to increment reference count, so only "get"
+ * if the node is not there already
+ */
+ rn = agg_node_get(rt, p);
+ }
+
+ /*
+ * For ethernet routes, if there is an accompanying IP address,
+ * save it in the bpi
+ */
+ if ((AFI_L2VPN == afi) && aux_prefix) {
+
+ vnc_zlog_debug_verbose("%s: setting BPI's aux_prefix",
+ __func__);
+ info_new->extra->vnc.import.aux_prefix = *aux_prefix;
+ }
+
+ vnc_zlog_debug_verbose("%s: inserting bpi %p at prefix %pRN #%d",
+ __func__, info_new, rn,
+ agg_node_get_lock_count(rn));
+
+ rfapiBgpInfoAttachSorted(rn, info_new, afi, SAFI_MPLS_VPN);
+ rfapiItBiIndexAdd(rn, info_new);
+ if (!rfapiGetUnAddrOfVpnBi(info_new, NULL)) {
+ if (VALID_INTERIOR_TYPE(info_new->type))
+ RFAPI_MONITOR_EXTERIOR(rn)->valid_interior_count++;
+ SET_FLAG(info_new->flags, BGP_PATH_VALID);
+ }
+ RFAPI_UPDATE_ITABLE_COUNT(info_new, import_table, afi, 1);
+ vnc_import_bgp_exterior_add_route_interior(bgp, import_table, rn,
+ info_new);
+
+ if (import_table == bgp->rfapi->it_ce)
+ vnc_direct_bgp_add_route_ce(bgp, rn, info_new);
+
+ if (VNC_DEBUG(VERBOSE)) {
+ vnc_zlog_debug_verbose("%s: showing IT node", __func__);
+ rfapiShowItNode(NULL, rn); /* debug */
+ }
+
+ rfapiMonitorEncapAdd(import_table, &vn_prefix, rn, info_new);
+
+ if (!rfapiGetUnAddrOfVpnBi(info_new, &un_prefix)) {
+
+ /*
+ * if we have a valid UN address (either via Encap route
+ * or via tunnel attribute), then we should attempt
+ * to move any monitors at less-specific nodes to this node
+ */
+ rfapiMonitorMoveLonger(rn);
+
+ un_prefix_valid = 1;
+ }
+
+ /*
+ * 101129 Enhancement: if we add a route (implication: it is not
+ * in holddown), delete all other routes from this nve at this
+ * node that are in holddown, regardless of peer.
+ *
+ * Reasons it's OK to do that:
+ *
+ * - if the holddown route being deleted originally came from BGP VPN,
+ * it is already gone from BGP (implication of holddown), so there
+ * won't be any added inconsistency with the BGP RIB.
+ *
+ * - once a fresh route is added at a prefix, any routes in holddown
+ * at that prefix will not show up in RFP responses, so deleting
+ * the holddown routes won't affect the contents of responses.
+ *
+ * - lifetimes are supposed to be consistent, so there should not
+ * be a case where the fresh route has a shorter lifetime than
+ * the holddown route, so we don't expect the fresh route to
+ * disappear and complete its holddown time before the existing
+ * holddown routes time out. Therefore, we won't have a situation
+ * where we expect the existing holddown routes to be hidden and
+ * then to reappear sometime later (as holddown routes) in a
+ * RFP response.
+ *
+ * Among other things, this would enable us to skirt the problem
+ * of local holddown routes that refer to NVE descriptors that
+ * have already been closed (if the same NVE triggers a subsequent
+ * rfapi_open(), the new peer is different and doesn't match the
+ * peer of the holddown route, so the stale holddown route still
+ * hangs around until it times out instead of just being replaced
+ * by the fresh route).
+ */
+ /*
+ * We know that the new bpi will have been inserted before any routes
+ * in holddown, so we can skip any that came before it
+ */
+ for (bpi = info_new->next; bpi; bpi = next) {
+
+ struct prefix pfx_vn;
+ struct prefix pfx_un;
+ int un_match = 0;
+ int remote_peer_match = 0;
+
+ next = bpi->next;
+
+ /*
+ * Must be holddown
+ */
+ if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED))
+ continue;
+
+ /*
+ * Must match VN address (nexthop of VPN route)
+ */
+ if (rfapiGetNexthop(bpi->attr, &pfx_vn))
+ continue;
+ if (!prefix_same(&pfx_vn, &vn_prefix))
+ continue;
+
+ if (un_prefix_valid && /* new route UN addr */
+ !rfapiGetUnAddrOfVpnBi(bpi, &pfx_un)
+ && /* old route UN addr */
+ prefix_same(&pfx_un, &un_prefix)) { /* compare */
+ un_match = 1;
+ }
+ if (!RFAPI_LOCAL_BI(bpi) && !RFAPI_LOCAL_BI(info_new) &&
+ sockunion_same(&bpi->peer->connection->su,
+ &info_new->peer->connection->su)) {
+ /* old & new are both remote, same peer */
+ remote_peer_match = 1;
+ }
+
+ if (!un_match && !remote_peer_match)
+ continue;
+
+ vnc_zlog_debug_verbose(
+ "%s: removing holddown bpi matching NVE of new route",
+ __func__);
+ if (bpi->extra->vnc.import.timer) {
+ struct rfapi_withdraw *wcb =
+ EVENT_ARG(bpi->extra->vnc.import.timer);
+
+ XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
+ EVENT_OFF(bpi->extra->vnc.import.timer);
+ }
+ rfapiExpireVpnNow(import_table, rn, bpi, 0);
+ }
+
+ if (!original_had_routes) {
+ /*
+ * We went from 0 usable routes to 1 usable route. Perform the
+ * "Adding a Route" export process.
+ */
+ vnc_direct_bgp_add_prefix(bgp, import_table, rn);
+ vnc_zebra_add_prefix(bgp, import_table, rn);
+ } else {
+ /*
+ * Check for nexthop change event
+ * Note: the prefix_same() test below detects two situations:
+ * 1. route is replaced, new route has different nexthop
+ * 2. new route is added (original_nexthop is 0)
+ */
+ struct prefix new_nexthop;
+
+ rfapiGetNexthop(attr, &new_nexthop);
+ if (!prefix_same(&original_nexthop, &new_nexthop)) {
+ /*
+ * nexthop change event
+ * vnc_direct_bgp_add_prefix() will recompute VN addr
+ * ecommunity
+ */
+ vnc_direct_bgp_add_prefix(bgp, import_table, rn);
+ }
+ }
+
+ if (!(bgp->rfapi_cfg->flags & BGP_VNC_CONFIG_CALLBACK_DISABLE)) {
+ for (n = rn; n; n = agg_node_parent(n)) {
+ // rfapiDoRouteCallback(import_table, n, NULL);
+ }
+ rfapiMonitorItNodeChanged(import_table, rn, NULL);
+ }
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 0);
+ VNC_ITRCCK;
+}
+
+static void rfapiBgpInfoFilteredImportBadSafi(
+ struct rfapi_import_table *import_table, int action, struct peer *peer,
+ void *rfd, /* set for looped back routes */
+ const struct prefix *p,
+ const struct prefix *aux_prefix, /* AFI_L2VPN: optional IP */
+ afi_t afi, struct prefix_rd *prd,
+ struct attr *attr, /* part of bgp_path_info */
+ uint8_t type, /* part of bgp_path_info */
+ uint8_t sub_type, /* part of bgp_path_info */
+ uint32_t *label) /* part of bgp_path_info */
+{
+ vnc_zlog_debug_verbose("%s: Error, bad safi", __func__);
+}
+
+static rfapi_bi_filtered_import_f *
+rfapiBgpInfoFilteredImportFunction(safi_t safi)
+{
+ switch (safi) {
+ case SAFI_MPLS_VPN:
+ return rfapiBgpInfoFilteredImportVPN;
+
+ case SAFI_ENCAP:
+ return rfapiBgpInfoFilteredImportEncap;
+
+ case SAFI_UNSPEC:
+ case SAFI_UNICAST:
+ case SAFI_MULTICAST:
+ case SAFI_EVPN:
+ case SAFI_LABELED_UNICAST:
+ case SAFI_FLOWSPEC:
+ case SAFI_MAX:
+ /* not expected */
+ flog_err(EC_LIB_DEVELOPMENT, "%s: bad safi %d", __func__, safi);
+ return rfapiBgpInfoFilteredImportBadSafi;
+ }
+
+ assert(!"Reached end of function when we were not expecting to");
+}
+
+void rfapiProcessUpdate(struct peer *peer,
+ void *rfd, /* set when looped from RFP/RFAPI */
+ const struct prefix *p, struct prefix_rd *prd,
+ struct attr *attr, afi_t afi, safi_t safi, uint8_t type,
+ uint8_t sub_type, uint32_t *label)
+{
+ struct bgp *bgp;
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+ int has_ip_route = 1;
+ uint32_t lni = 0;
+
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+ assert(bgp);
+
+ h = bgp->rfapi;
+ assert(h);
+
+ /*
+ * look at high-order byte of RD. FF means MAC
+ * address is present (VNC L2VPN)
+ */
+ if ((safi == SAFI_MPLS_VPN)
+ && (decode_rd_type(prd->val) == RD_TYPE_VNC_ETH)) {
+ struct prefix pfx_mac_buf;
+ struct prefix pfx_nexthop_buf;
+ int rc;
+
+ /*
+ * Set flag if prefix and nexthop are the same - don't
+ * add the route to normal IP-based import tables
+ */
+ if (!rfapiGetNexthop(attr, &pfx_nexthop_buf)) {
+ if (!prefix_cmp(&pfx_nexthop_buf, p)) {
+ has_ip_route = 0;
+ }
+ }
+
+ memset(&pfx_mac_buf, 0, sizeof(pfx_mac_buf));
+ pfx_mac_buf.family = AF_ETHERNET;
+ pfx_mac_buf.prefixlen = 48;
+ memcpy(&pfx_mac_buf.u.prefix_eth.octet, prd->val + 2, 6);
+
+ /*
+ * Find rt containing LNI (Logical Network ID), which
+ * _should_ always be present when mac address is present
+ */
+ rc = rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(attr), &lni);
+
+ vnc_zlog_debug_verbose(
+ "%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p",
+ __func__, rc, lni, attr);
+ if (!rc) {
+ it = rfapiMacImportTableGet(bgp, lni);
+
+ rfapiBgpInfoFilteredImportVPN(
+ it, FIF_ACTION_UPDATE, peer, rfd,
+ &pfx_mac_buf, /* prefix */
+ p, /* aux prefix: IP addr */
+ AFI_L2VPN, prd, attr, type, sub_type, label);
+ }
+ }
+
+ if (!has_ip_route)
+ return;
+
+ /*
+ * Iterate over all import tables; do a filtered import
+ * for the afi/safi combination
+ */
+ for (it = h->imports; it; it = it->next) {
+ (*rfapiBgpInfoFilteredImportFunction(safi))(
+ it, FIF_ACTION_UPDATE, peer, rfd, p, /* prefix */
+ NULL, afi, prd, attr, type, sub_type, label);
+ }
+
+ if (safi == SAFI_MPLS_VPN) {
+ vnc_direct_bgp_rh_add_route(bgp, afi, p, peer, attr);
+ rfapiBgpInfoFilteredImportVPN(
+ bgp->rfapi->it_ce, FIF_ACTION_UPDATE, peer, rfd,
+ p, /* prefix */
+ NULL, afi, prd, attr, type, sub_type, label);
+ }
+}
+
+
+void rfapiProcessWithdraw(struct peer *peer, void *rfd, const struct prefix *p,
+ struct prefix_rd *prd, struct attr *attr, afi_t afi,
+ safi_t safi, uint8_t type, int kill)
+{
+ struct bgp *bgp;
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+ assert(bgp);
+
+ h = bgp->rfapi;
+ assert(h);
+
+ /*
+ * look at high-order byte of RD. FF means MAC
+ * address is present (VNC L2VPN)
+ */
+ if (h->import_mac != NULL && safi == SAFI_MPLS_VPN
+ && decode_rd_type(prd->val) == RD_TYPE_VNC_ETH) {
+ struct prefix pfx_mac_buf;
+ void *cursor = NULL;
+ int rc;
+
+ memset(&pfx_mac_buf, 0, sizeof(pfx_mac_buf));
+ pfx_mac_buf.family = AF_ETHERNET;
+ pfx_mac_buf.prefixlen = 48;
+ memcpy(&pfx_mac_buf.u.prefix_eth, prd->val + 2, 6);
+
+ /*
+ * withdraw does not contain attrs, so we don't have
+ * access to the route's LNI, which would ordinarily
+ * select the specific mac-based import table. Instead,
+ * we must iterate over all mac-based tables and rely
+ * on the RD to match.
+ *
+ * If this approach is too slow, add an index where
+ * key is {RD, peer} and value is the import table
+ */
+ for (rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor);
+ rc == 0; rc = skiplist_next(h->import_mac, NULL,
+ (void **)&it, &cursor)) {
+
+#ifdef DEBUG_L2_EXTRA
+ vnc_zlog_debug_verbose(
+ "%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)",
+ __func__, it);
+#endif
+
+ rfapiBgpInfoFilteredImportVPN(
+ it,
+ (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW),
+ peer, rfd, &pfx_mac_buf, /* prefix */
+ p, /* aux_prefix: IP */
+ AFI_L2VPN, prd, attr, type, 0,
+ NULL); /* sub_type & label unused for withdraw
+ */
+ }
+ }
+
+ /*
+ * XXX For the case where the withdraw involves an L2
+ * route with no IP information, we rely on the lack
+ * of RT-list intersection to filter out the withdraw
+ * from the IP-based import tables below
+ */
+
+ /*
+ * Iterate over all import tables; do a filtered import
+ * for the afi/safi combination
+ */
+
+ for (it = h->imports; it; it = it->next) {
+ (*rfapiBgpInfoFilteredImportFunction(safi))(
+ it, (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW),
+ peer, rfd, p, /* prefix */
+ NULL, afi, prd, attr, type, 0,
+ NULL); /* sub_type & label unused for withdraw */
+ }
+
+ /* TBD the deletion should happen after the lifetime expires */
+ if (safi == SAFI_MPLS_VPN)
+ vnc_direct_bgp_rh_del_route(bgp, afi, p, peer);
+
+ if (safi == SAFI_MPLS_VPN) {
+ rfapiBgpInfoFilteredImportVPN(
+ bgp->rfapi->it_ce,
+ (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW), peer,
+ rfd, p, /* prefix */
+ NULL, afi, prd, attr, type, 0,
+ NULL); /* sub_type & label unused for withdraw */
+ }
+}
+
+/*
+ * TBD optimized withdraw timer algorithm for case of many
+ * routes expiring at the same time due to peer drop.
+ */
+/*
+ * 1. Visit all BPIs in all ENCAP import tables.
+ *
+ * a. If a bpi's peer is the failed peer, remove the bpi.
+ * b. If the removed ENCAP bpi was first in the list of
+ * BPIs at this ENCAP node, loop over all monitors
+ * at this node:
+ *
+ * (1) for each ENCAP monitor, loop over all its
+ * VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK
+ * flags.
+ *
+ * 2. Visit all BPIs in all VPN import tables.
+ * a. If a bpi's peer is the failed peer, remove the bpi.
+ * b. loop over all the VPN node monitors and set their
+ * RFAPI_MON_FLAG_NEEDCALLBACK flags
+ * c. If there are no BPIs left at this VPN node,
+ *
+ */
+
+
+/* surprise, this gets called from peer_delete(), from rfapi_close() */
+static void rfapiProcessPeerDownRt(struct peer *peer,
+ struct rfapi_import_table *import_table,
+ afi_t afi, safi_t safi)
+{
+ struct agg_node *rn;
+ struct bgp_path_info *bpi;
+ struct agg_table *rt = NULL;
+ void (*timer_service_func)(struct event *) = NULL;
+
+ assert(afi == AFI_IP || afi == AFI_IP6);
+
+ VNC_ITRCCK;
+
+ switch (safi) {
+ case SAFI_MPLS_VPN:
+ rt = import_table->imported_vpn[afi];
+ timer_service_func = rfapiWithdrawTimerVPN;
+ break;
+ case SAFI_ENCAP:
+ rt = import_table->imported_encap[afi];
+ timer_service_func = rfapiWithdrawTimerEncap;
+ break;
+ case SAFI_UNSPEC:
+ case SAFI_UNICAST:
+ case SAFI_MULTICAST:
+ case SAFI_EVPN:
+ case SAFI_LABELED_UNICAST:
+ case SAFI_FLOWSPEC:
+ case SAFI_MAX:
+ /* Suppress uninitialized variable warning */
+ rt = NULL;
+ timer_service_func = NULL;
+ assert(0);
+ }
+
+ for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
+ for (bpi = rn->info; bpi; bpi = bpi->next) {
+ if (bpi->peer == peer) {
+
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
+ /* already in holddown, skip */
+ continue;
+ }
+
+ if (safi == SAFI_MPLS_VPN) {
+ RFAPI_UPDATE_ITABLE_COUNT(
+ bpi, import_table, afi, -1);
+ import_table->holddown_count[afi] += 1;
+ }
+ rfapiBiStartWithdrawTimer(import_table, rn, bpi,
+ afi, safi,
+ timer_service_func);
+ }
+ }
+ }
+ VNC_ITRCCK;
+}
+
+/*
+ * This gets called when a peer connection drops. We have to remove
+ * all the routes from this peer.
+ *
+ * Current approach is crude. TBD Optimize by setting fewer timers and
+ * grouping withdrawn routes so we can generate callbacks more
+ * efficiently.
+ */
+void rfapiProcessPeerDown(struct peer *peer)
+{
+ struct bgp *bgp;
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+
+ /*
+ * If this peer is a "dummy" peer structure atached to a RFAPI
+ * nve_descriptor, we don't need to walk the import tables
+ * because the routes are already withdrawn by rfapi_close()
+ */
+ if (CHECK_FLAG(peer->flags, PEER_FLAG_IS_RFAPI_HD))
+ return;
+
+ /*
+ * 1. Visit all BPIs in all ENCAP import tables.
+ * Start withdraw timer on the BPIs that match peer.
+ *
+ * 2. Visit All BPIs in all VPN import tables.
+ * Start withdraw timer on the BPIs that match peer.
+ */
+
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+ if (!bgp)
+ return;
+
+ h = bgp->rfapi;
+ assert(h);
+
+ for (it = h->imports; it; it = it->next) {
+ rfapiProcessPeerDownRt(peer, it, AFI_IP, SAFI_ENCAP);
+ rfapiProcessPeerDownRt(peer, it, AFI_IP6, SAFI_ENCAP);
+ rfapiProcessPeerDownRt(peer, it, AFI_IP, SAFI_MPLS_VPN);
+ rfapiProcessPeerDownRt(peer, it, AFI_IP6, SAFI_MPLS_VPN);
+ }
+
+ if (h->it_ce) {
+ rfapiProcessPeerDownRt(peer, h->it_ce, AFI_IP, SAFI_MPLS_VPN);
+ rfapiProcessPeerDownRt(peer, h->it_ce, AFI_IP6, SAFI_MPLS_VPN);
+ }
+}
+
+/*
+ * Import an entire RIB (for an afi/safi) to an import table RIB,
+ * filtered according to the import table's RT list
+ *
+ * TBD: does this function need additions to match rfapiProcessUpdate()
+ * for, e.g., L2 handling?
+ */
+static void rfapiBgpTableFilteredImport(struct bgp *bgp,
+ struct rfapi_import_table *it,
+ afi_t afi, safi_t safi)
+{
+ struct bgp_dest *dest1;
+ struct bgp_dest *dest2;
+
+ /* Only these SAFIs have 2-level RIBS */
+ assert(safi == SAFI_MPLS_VPN || safi == SAFI_ENCAP);
+
+ /*
+ * Now visit all the rd nodes and the nodes of all the
+ * route tables attached to them, and import the routes
+ * if they have matching route targets
+ */
+ for (dest1 = bgp_table_top(bgp->rib[afi][safi]); dest1;
+ dest1 = bgp_route_next(dest1)) {
+
+ if (bgp_dest_has_bgp_path_info_data(dest1)) {
+
+ for (dest2 = bgp_table_top(
+ bgp_dest_get_bgp_table_info(dest1));
+ dest2; dest2 = bgp_route_next(dest2)) {
+
+ struct bgp_path_info *bpi;
+
+ for (bpi = bgp_dest_get_bgp_path_info(dest2);
+ bpi; bpi = bpi->next) {
+ uint32_t label = 0;
+
+ if (CHECK_FLAG(bpi->flags,
+ BGP_PATH_REMOVED))
+ continue;
+
+ if (bpi->extra)
+ label = decode_label(
+ &bpi->extra->label[0]);
+ (*rfapiBgpInfoFilteredImportFunction(
+ safi))(
+ it, /* which import table */
+ FIF_ACTION_UPDATE, bpi->peer,
+ NULL,
+ bgp_dest_get_prefix(dest2),
+ NULL, afi,
+ (struct prefix_rd *)
+ bgp_dest_get_prefix(
+ dest1),
+ bpi->attr, bpi->type,
+ bpi->sub_type, &label);
+ }
+ }
+ }
+ }
+}
+
+
+/* per-bgp-instance rfapi data */
+struct rfapi *bgp_rfapi_new(struct bgp *bgp)
+{
+ struct rfapi *h;
+ afi_t afi;
+ struct rfapi_rfp_cfg *cfg = NULL;
+ struct rfapi_rfp_cb_methods *cbm = NULL;
+
+ assert(bgp->rfapi_cfg == NULL);
+
+ h = XCALLOC(MTYPE_RFAPI, sizeof(struct rfapi));
+
+ for (afi = AFI_IP; afi < AFI_MAX; afi++) {
+ h->un[afi] = agg_table_init();
+ }
+
+ /*
+ * initialize the ce import table
+ */
+ h->it_ce = XCALLOC(MTYPE_RFAPI_IMPORTTABLE,
+ sizeof(struct rfapi_import_table));
+ h->it_ce->imported_vpn[AFI_IP] = agg_table_init();
+ h->it_ce->imported_vpn[AFI_IP6] = agg_table_init();
+ h->it_ce->imported_encap[AFI_IP] = agg_table_init();
+ h->it_ce->imported_encap[AFI_IP6] = agg_table_init();
+ rfapiBgpTableFilteredImport(bgp, h->it_ce, AFI_IP, SAFI_MPLS_VPN);
+ rfapiBgpTableFilteredImport(bgp, h->it_ce, AFI_IP6, SAFI_MPLS_VPN);
+
+ /*
+ * Set up work queue for deferred rfapi_close operations
+ */
+ h->deferred_close_q =
+ work_queue_new(bm->master, "rfapi deferred close");
+ h->deferred_close_q->spec.workfunc = rfapi_deferred_close_workfunc;
+ h->deferred_close_q->spec.data = h;
+
+ h->rfp = rfp_start(bm->master, &cfg, &cbm);
+ bgp->rfapi_cfg = bgp_rfapi_cfg_new(cfg);
+ if (cbm != NULL) {
+ h->rfp_methods = *cbm;
+ }
+ return h;
+}
+
+void bgp_rfapi_destroy(struct bgp *bgp, struct rfapi *h)
+{
+ afi_t afi;
+
+ if (bgp == NULL || h == NULL)
+ return;
+
+ if (h->resolve_nve_nexthop) {
+ skiplist_free(h->resolve_nve_nexthop);
+ h->resolve_nve_nexthop = NULL;
+ }
+
+ rfapiImportTableFlush(h->it_ce);
+
+ if (h->import_mac) {
+ struct rfapi_import_table *it;
+ void *cursor;
+ int rc;
+
+ for (cursor = NULL,
+ rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor);
+ !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor)) {
+
+ rfapiImportTableFlush(it);
+ XFREE(MTYPE_RFAPI_IMPORTTABLE, it);
+ }
+ skiplist_free(h->import_mac);
+ h->import_mac = NULL;
+ }
+
+ work_queue_free_and_null(&h->deferred_close_q);
+
+ if (h->rfp != NULL)
+ rfp_stop(h->rfp);
+
+ for (afi = AFI_IP; afi < AFI_MAX; afi++) {
+ agg_table_finish(h->un[afi]);
+ }
+
+ XFREE(MTYPE_RFAPI_IMPORTTABLE, h->it_ce);
+ XFREE(MTYPE_RFAPI, h);
+}
+
+struct rfapi_import_table *
+rfapiImportTableRefAdd(struct bgp *bgp, struct ecommunity *rt_import_list,
+ struct rfapi_nve_group_cfg *rfg)
+{
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+ afi_t afi;
+
+ h = bgp->rfapi;
+ assert(h);
+
+ for (it = h->imports; it; it = it->next) {
+ if (ecommunity_cmp(it->rt_import_list, rt_import_list))
+ break;
+ }
+
+ vnc_zlog_debug_verbose("%s: matched it=%p", __func__, it);
+
+ if (!it) {
+ it = XCALLOC(MTYPE_RFAPI_IMPORTTABLE,
+ sizeof(struct rfapi_import_table));
+ it->next = h->imports;
+ h->imports = it;
+
+ it->rt_import_list = ecommunity_dup(rt_import_list);
+ it->rfg = rfg;
+ it->monitor_exterior_orphans =
+ skiplist_new(0, NULL, prefix_free_lists);
+
+ /*
+ * fill import route tables from RIBs
+ *
+ * Potential area for optimization. If this occurs when
+ * tables are large (e.g., the operator adds a nve group
+ * with a new RT list to a running system), it could take
+ * a while.
+ *
+ */
+ for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
+
+ it->imported_vpn[afi] = agg_table_init();
+ it->imported_encap[afi] = agg_table_init();
+
+ rfapiBgpTableFilteredImport(bgp, it, afi,
+ SAFI_MPLS_VPN);
+ rfapiBgpTableFilteredImport(bgp, it, afi, SAFI_ENCAP);
+
+ vnc_import_bgp_exterior_redist_enable_it(bgp, afi, it);
+ }
+ }
+
+ it->refcount += 1;
+
+ return it;
+}
+
+/*
+ * skiplist element free function
+ */
+static void delete_rem_pfx_na_free(void *na)
+{
+ uint32_t *pCounter = ((struct rfapi_nve_addr *)na)->info;
+
+ *pCounter += 1;
+ XFREE(MTYPE_RFAPI_NVE_ADDR, na);
+}
+
+/*
+ * Common deleter for IP and MAC import tables
+ */
+static void rfapiDeleteRemotePrefixesIt(
+ struct bgp *bgp, struct rfapi_import_table *it, struct prefix *un,
+ struct prefix *vn, struct prefix *p, int delete_active,
+ int delete_holddown, uint32_t *pARcount, uint32_t *pAHcount,
+ uint32_t *pHRcount, uint32_t *pHHcount,
+ struct skiplist *uniq_active_nves, struct skiplist *uniq_holddown_nves)
+{
+ afi_t afi;
+
+#ifdef DEBUG_L2_EXTRA
+ {
+ char buf_pfx[PREFIX_STRLEN];
+
+ if (p) {
+ prefix2str(p, buf_pfx, sizeof(buf_pfx));
+ } else {
+ buf_pfx[0] = '*';
+ buf_pfx[1] = 0;
+ }
+
+ vnc_zlog_debug_verbose(
+ "%s: entry, p=%s, delete_active=%d, delete_holddown=%d",
+ __func__, buf_pfx, delete_active, delete_holddown);
+ }
+#endif
+
+ for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
+
+ struct agg_table *rt;
+ struct agg_node *rn;
+
+ if (p && (family2afi(p->family) != afi)) {
+ continue;
+ }
+
+ rt = it->imported_vpn[afi];
+ if (!rt)
+ continue;
+
+ vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__,
+ afi);
+
+ for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
+ struct bgp_path_info *bpi;
+ struct bgp_path_info *next;
+ const struct prefix *rn_p = agg_node_get_prefix(rn);
+
+ if (p && VNC_DEBUG(IMPORT_DEL_REMOTE))
+ vnc_zlog_debug_any("%s: want %pFX, have %pRN",
+ __func__, p, rn);
+
+ if (p && prefix_cmp(p, rn_p))
+ continue;
+
+ vnc_zlog_debug_verbose("%s: rn pfx=%pRN", __func__, rn);
+
+ /* TBD is this valid for afi == AFI_L2VPN? */
+ RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 1);
+
+ for (bpi = rn->info; bpi; bpi = next) {
+ next = bpi->next;
+
+ struct prefix qpt;
+ struct prefix qct;
+ int qpt_valid = 0;
+ int qct_valid = 0;
+ int is_active = 0;
+
+ vnc_zlog_debug_verbose("%s: examining bpi %p",
+ __func__, bpi);
+
+ if (!rfapiGetNexthop(bpi->attr, &qpt))
+ qpt_valid = 1;
+
+ if (vn) {
+ if (!qpt_valid
+ || !prefix_match(vn, &qpt)) {
+#ifdef DEBUG_L2_EXTRA
+ vnc_zlog_debug_verbose(
+ "%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)",
+ __func__);
+#endif
+ continue;
+ }
+ }
+
+ if (!rfapiGetUnAddrOfVpnBi(bpi, &qct))
+ qct_valid = 1;
+
+ if (un) {
+ if (!qct_valid
+ || !prefix_match(un, &qct)) {
+#ifdef DEBUG_L2_EXTRA
+ vnc_zlog_debug_verbose(
+ "%s: continue at un && !qct_valid || !prefix_match(un, &qct)",
+ __func__);
+#endif
+ continue;
+ }
+ }
+
+
+ /*
+ * Blow bpi away
+ */
+ /*
+ * If this route is waiting to be deleted
+ * because of
+ * a previous withdraw, we must cancel its
+ * timer.
+ */
+ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
+ if (!delete_holddown)
+ continue;
+ if (bpi->extra->vnc.import.timer) {
+ struct rfapi_withdraw *wcb =
+ EVENT_ARG(
+ bpi->extra->vnc
+ .import
+ .timer);
+
+ wcb->import_table
+ ->holddown_count[afi] -=
+ 1;
+ RFAPI_UPDATE_ITABLE_COUNT(
+ bpi, wcb->import_table,
+ afi, 1);
+ XFREE(MTYPE_RFAPI_WITHDRAW,
+ wcb);
+ EVENT_OFF(bpi->extra->vnc.import
+ .timer);
+ }
+ } else {
+ if (!delete_active)
+ continue;
+ is_active = 1;
+ }
+
+ vnc_zlog_debug_verbose(
+ "%s: deleting bpi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)",
+ __func__, bpi, qct_valid, qpt_valid,
+ delete_holddown, delete_active);
+
+
+ /*
+ * add nve to list
+ */
+ if (qct_valid && qpt_valid) {
+
+ struct rfapi_nve_addr na;
+ struct rfapi_nve_addr *nap;
+
+ memset(&na, 0, sizeof(na));
+ assert(!rfapiQprefix2Raddr(&qct,
+ &na.un));
+ assert(!rfapiQprefix2Raddr(&qpt,
+ &na.vn));
+
+ if (skiplist_search(
+ (is_active
+ ? uniq_active_nves
+ : uniq_holddown_nves),
+ &na, (void **)&nap)) {
+ char line[BUFSIZ];
+
+ nap = XCALLOC(
+ MTYPE_RFAPI_NVE_ADDR,
+ sizeof(struct
+ rfapi_nve_addr));
+ *nap = na;
+ nap->info = is_active
+ ? pAHcount
+ : pHHcount;
+ skiplist_insert(
+ (is_active
+ ? uniq_active_nves
+ : uniq_holddown_nves),
+ nap, nap);
+
+ rfapiNveAddr2Str(nap, line,
+ BUFSIZ);
+ }
+ }
+
+ vnc_direct_bgp_rh_del_route(bgp, afi, rn_p,
+ bpi->peer);
+
+ RFAPI_UPDATE_ITABLE_COUNT(bpi, it, afi, -1);
+ it->holddown_count[afi] += 1;
+ rfapiExpireVpnNow(it, rn, bpi, 1);
+
+ vnc_zlog_debug_verbose(
+ "%s: incrementing count (is_active=%d)",
+ __func__, is_active);
+
+ if (is_active)
+ ++*pARcount;
+ else
+ ++*pHRcount;
+ }
+ }
+ }
+}
+
+
+/*
+ * For use by the "clear vnc prefixes" command
+ */
+/*------------------------------------------
+ * rfapiDeleteRemotePrefixes
+ *
+ * UI helper: For use by the "clear vnc prefixes" command
+ *
+ * input:
+ * un if set, tunnel must match this prefix
+ * vn if set, nexthop prefix must match this prefix
+ * p if set, prefix must match this prefix
+ * it if set, only look in this import table
+ *
+ * output
+ * pARcount number of active routes deleted
+ * pAHcount number of active nves deleted
+ * pHRcount number of holddown routes deleted
+ * pHHcount number of holddown nves deleted
+ *
+ * return value:
+ * void
+ --------------------------------------------*/
+void rfapiDeleteRemotePrefixes(struct prefix *un, struct prefix *vn,
+ struct prefix *p,
+ struct rfapi_import_table *arg_it,
+ int delete_active, int delete_holddown,
+ uint32_t *pARcount, uint32_t *pAHcount,
+ uint32_t *pHRcount, uint32_t *pHHcount)
+{
+ struct bgp *bgp;
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+ uint32_t deleted_holddown_route_count = 0;
+ uint32_t deleted_active_route_count = 0;
+ uint32_t deleted_holddown_nve_count = 0;
+ uint32_t deleted_active_nve_count = 0;
+ struct skiplist *uniq_holddown_nves;
+ struct skiplist *uniq_active_nves;
+
+ VNC_ITRCCK;
+
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+ /* If no bgp instantiated yet, no vnc prefixes exist */
+ if (!bgp)
+ return;
+
+ h = bgp->rfapi;
+ assert(h);
+
+ uniq_holddown_nves =
+ skiplist_new(0, rfapi_nve_addr_cmp, delete_rem_pfx_na_free);
+ uniq_active_nves =
+ skiplist_new(0, rfapi_nve_addr_cmp, delete_rem_pfx_na_free);
+
+ /*
+ * Iterate over all import tables; do a filtered import
+ * for the afi/safi combination
+ */
+
+ if (arg_it)
+ it = arg_it;
+ else
+ it = h->imports;
+ for (; it;) {
+
+ vnc_zlog_debug_verbose(
+ "%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p",
+ __func__, it);
+
+ rfapiDeleteRemotePrefixesIt(
+ bgp, it, un, vn, p, delete_active, delete_holddown,
+ &deleted_active_route_count, &deleted_active_nve_count,
+ &deleted_holddown_route_count,
+ &deleted_holddown_nve_count, uniq_active_nves,
+ uniq_holddown_nves);
+
+ if (arg_it)
+ it = NULL;
+ else
+ it = it->next;
+ }
+
+ /*
+ * Now iterate over L2 import tables
+ */
+ if (h->import_mac && !(p && (p->family != AF_ETHERNET))) {
+
+ void *cursor = NULL;
+ int rc;
+
+ for (cursor = NULL,
+ rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor);
+ !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor)) {
+
+ vnc_zlog_debug_verbose(
+ "%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p",
+ __func__, it);
+
+ rfapiDeleteRemotePrefixesIt(
+ bgp, it, un, vn, p, delete_active,
+ delete_holddown, &deleted_active_route_count,
+ &deleted_active_nve_count,
+ &deleted_holddown_route_count,
+ &deleted_holddown_nve_count, uniq_active_nves,
+ uniq_holddown_nves);
+ }
+ }
+
+ /*
+ * our custom element freeing function above counts as it deletes
+ */
+ skiplist_free(uniq_holddown_nves);
+ skiplist_free(uniq_active_nves);
+
+ if (pARcount)
+ *pARcount = deleted_active_route_count;
+ if (pAHcount)
+ *pAHcount = deleted_active_nve_count;
+ if (pHRcount)
+ *pHRcount = deleted_holddown_route_count;
+ if (pHHcount)
+ *pHHcount = deleted_holddown_nve_count;
+
+ VNC_ITRCCK;
+}
+
+/*------------------------------------------
+ * rfapiCountRemoteRoutes
+ *
+ * UI helper: count VRF routes from BGP side
+ *
+ * input:
+ *
+ * output
+ * pALRcount count of active local routes
+ * pARRcount count of active remote routes
+ * pHRcount count of holddown routes
+ * pIRcount count of direct imported routes
+ *
+ * return value:
+ * void
+ --------------------------------------------*/
+void rfapiCountAllItRoutes(int *pALRcount, /* active local routes */
+ int *pARRcount, /* active remote routes */
+ int *pHRcount, /* holddown routes */
+ int *pIRcount) /* imported routes */
+{
+ struct bgp *bgp;
+ struct rfapi *h;
+ struct rfapi_import_table *it;
+ afi_t afi;
+
+ int total_active_local = 0;
+ int total_active_remote = 0;
+ int total_holddown = 0;
+ int total_imported = 0;
+
+ bgp = bgp_get_default(); /* assume 1 instance for now */
+ assert(bgp);
+
+ h = bgp->rfapi;
+ assert(h);
+
+ /*
+ * Iterate over all import tables; do a filtered import
+ * for the afi/safi combination
+ */
+
+ for (it = h->imports; it; it = it->next) {
+
+ for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
+
+ total_active_local += it->local_count[afi];
+ total_active_remote += it->remote_count[afi];
+ total_holddown += it->holddown_count[afi];
+ total_imported += it->imported_count[afi];
+ }
+ }
+
+ void *cursor;
+ int rc;
+
+ if (h->import_mac) {
+ for (cursor = NULL,
+ rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor);
+ !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it,
+ &cursor)) {
+
+ total_active_local += it->local_count[AFI_L2VPN];
+ total_active_remote += it->remote_count[AFI_L2VPN];
+ total_holddown += it->holddown_count[AFI_L2VPN];
+ total_imported += it->imported_count[AFI_L2VPN];
+ }
+ }
+
+
+ if (pALRcount) {
+ *pALRcount = total_active_local;
+ }
+ if (pARRcount) {
+ *pARRcount = total_active_remote;
+ }
+ if (pHRcount) {
+ *pHRcount = total_holddown;
+ }
+ if (pIRcount) {
+ *pIRcount = total_imported;
+ }
+}
+
+/*------------------------------------------
+ * rfapiGetHolddownFromLifetime
+ *
+ * calculate holddown value based on lifetime
+ *
+ * input:
+ * lifetime lifetime
+ *
+ * return value:
+ * Holddown value based on lifetime, holddown_factor,
+ * and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
+ *
+ --------------------------------------------*/
+/* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */
+uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime)
+{
+ uint32_t factor;
+ struct bgp *bgp;
+
+ bgp = bgp_get_default();
+ if (bgp && bgp->rfapi_cfg)
+ factor = bgp->rfapi_cfg->rfp_cfg.holddown_factor;
+ else
+ factor = RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR;
+
+ if (factor < 100 || lifetime < RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY)
+ lifetime = lifetime * factor / 100;
+ if (lifetime < RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY)
+ return lifetime;
+ else
+ return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY;
+}