diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-09 13:16:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-09 13:16:35 +0000 |
commit | e2bbf175a2184bd76f6c54ccf8456babeb1a46fc (patch) | |
tree | f0b76550d6e6f500ada964a3a4ee933a45e5a6f1 /bgpd/rfapi/rfapi_import.c | |
parent | Initial commit. (diff) | |
download | frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.tar.xz frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.zip |
Adding upstream version 9.1.upstream/9.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'bgpd/rfapi/rfapi_import.c')
-rw-r--r-- | bgpd/rfapi/rfapi_import.c | 4818 |
1 files changed, 4818 insertions, 0 deletions
diff --git a/bgpd/rfapi/rfapi_import.c b/bgpd/rfapi/rfapi_import.c new file mode 100644 index 0000000..a93e186 --- /dev/null +++ b/bgpd/rfapi/rfapi_import.c @@ -0,0 +1,4818 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2009-2016, LabN Consulting, L.L.C. + */ + +/* + * File: rfapi_import.c + * Purpose: Handle import of routes from BGP to RFAPI + */ + +#include "lib/zebra.h" +#include "lib/prefix.h" +#include "lib/agg_table.h" +#include "lib/vty.h" +#include "lib/memory.h" +#include "lib/log.h" +#include "lib/skiplist.h" +#include "frrevent.h" +#include "lib/stream.h" +#include "lib/lib_errors.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */ +#include "bgpd/bgp_vnc_types.h" +#include "bgpd/bgp_rd.h" + +#include "bgpd/rfapi/rfapi.h" +#include "bgpd/rfapi/bgp_rfapi_cfg.h" +#include "bgpd/rfapi/rfapi_backend.h" +#include "bgpd/rfapi/rfapi_import.h" +#include "bgpd/rfapi/rfapi_private.h" +#include "bgpd/rfapi/rfapi_monitor.h" +#include "bgpd/rfapi/rfapi_nve_addr.h" +#include "bgpd/rfapi/rfapi_vty.h" +#include "bgpd/rfapi/vnc_export_bgp.h" +#include "bgpd/rfapi/vnc_export_bgp_p.h" +#include "bgpd/rfapi/vnc_zebra.h" +#include "bgpd/rfapi/vnc_import_bgp.h" +#include "bgpd/rfapi/vnc_import_bgp_p.h" +#include "bgpd/rfapi/rfapi_rib.h" +#include "bgpd/rfapi/rfapi_encap_tlv.h" +#include "bgpd/rfapi/vnc_debug.h" + +#ifdef HAVE_GLIBC_BACKTRACE +/* for backtrace and friends */ +#include <execinfo.h> +#endif /* HAVE_GLIBC_BACKTRACE */ + +#undef DEBUG_MONITOR_MOVE_SHORTER +#undef DEBUG_RETURNED_NHL +#undef DEBUG_ROUTE_COUNTERS +#undef DEBUG_ENCAP_MONITOR +#undef DEBUG_L2_EXTRA +#undef DEBUG_IT_NODES +#undef DEBUG_BI_SEARCH + +/* + * Allocated for each withdraw timer instance; freed when the timer + * expires or is canceled + */ +struct rfapi_withdraw { + struct rfapi_import_table *import_table; + struct agg_node *node; + struct bgp_path_info *info; + safi_t safi; /* used only for bulk operations */ + /* + * For import table node reference count checking (i.e., debugging). + * Normally when a timer expires, lockoffset should be 0. However, if + * the timer expiration function is called directly (e.g., + * rfapiExpireVpnNow), the node could be locked by a preceding + * agg_route_top() or agg_route_next() in a loop, so we need to pass + * this value in. + */ + int lockoffset; +}; + +/* + * DEBUG FUNCTION + * It's evil and fiendish. It's compiler-dependent. + * ? Might need LDFLAGS -rdynamic to produce all function names + */ +void rfapiDebugBacktrace(void) +{ +#ifdef HAVE_GLIBC_BACKTRACE +#define RFAPI_DEBUG_BACKTRACE_NENTRIES 200 + void *buf[RFAPI_DEBUG_BACKTRACE_NENTRIES]; + char **syms; + size_t i; + size_t size; + + size = backtrace(buf, RFAPI_DEBUG_BACKTRACE_NENTRIES); + syms = backtrace_symbols(buf, size); + + for (i = 0; i < size && i < RFAPI_DEBUG_BACKTRACE_NENTRIES; ++i) { + vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i, syms[i]); + } + + free(syms); +#else +#endif +} + +/* + * DEBUG FUNCTION + * Count remote routes and compare with actively-maintained values. + * Abort if they disagree. + */ +void rfapiCheckRouteCount(void) +{ + struct bgp *bgp = bgp_get_default(); + struct rfapi *h; + struct rfapi_import_table *it; + afi_t afi; + + assert(bgp); + + h = bgp->rfapi; + assert(h); + + for (it = h->imports; it; it = it->next) { + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + + struct agg_table *rt; + struct agg_node *rn; + + int holddown_count = 0; + int imported_count = 0; + int remote_count = 0; + + rt = it->imported_vpn[afi]; + + for (rn = agg_route_top(rt); rn; + rn = agg_route_next(rn)) { + struct bgp_path_info *bpi; + struct bgp_path_info *next; + + for (bpi = rn->info; bpi; bpi = next) { + next = bpi->next; + + if (CHECK_FLAG(bpi->flags, + BGP_PATH_REMOVED)) { + ++holddown_count; + + } else { + if (!RFAPI_LOCAL_BI(bpi)) { + if (RFAPI_DIRECT_IMPORT_BI( + bpi)) { + ++imported_count; + } else { + ++remote_count; + } + } + } + } + } + + if (it->holddown_count[afi] != holddown_count) { + vnc_zlog_debug_verbose( + "%s: it->holddown_count %d != holddown_count %d", + __func__, it->holddown_count[afi], + holddown_count); + assert(0); + } + if (it->remote_count[afi] != remote_count) { + vnc_zlog_debug_verbose( + "%s: it->remote_count %d != remote_count %d", + __func__, it->remote_count[afi], + remote_count); + assert(0); + } + if (it->imported_count[afi] != imported_count) { + vnc_zlog_debug_verbose( + "%s: it->imported_count %d != imported_count %d", + __func__, it->imported_count[afi], + imported_count); + assert(0); + } + } + } +} + +#ifdef DEBUG_ROUTE_COUNTERS +#define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0) +#else +#define VNC_ITRCCK +#endif + +/* + * Validate reference count for a node in an import table + * + * Normally lockoffset is 0 for nodes in quiescent state. However, + * agg_unlock_node will delete the node if it is called when + * node->lock == 1, and we have to validate the refcount before + * the node is deleted. In this case, we specify lockoffset 1. + */ +void rfapiCheckRefcount(struct agg_node *rn, safi_t safi, int lockoffset) +{ + unsigned int count_bpi = 0; + unsigned int count_monitor = 0; + struct bgp_path_info *bpi; + struct rfapi_monitor_encap *hme; + struct rfapi_monitor_vpn *hmv; + + for (bpi = rn->info; bpi; bpi = bpi->next) + ++count_bpi; + + + if (rn->aggregate) { + ++count_monitor; /* rfapi_it_extra */ + + switch (safi) { + void *cursor; + int rc; + + case SAFI_ENCAP: + for (hme = RFAPI_MONITOR_ENCAP(rn); hme; + hme = hme->next) + ++count_monitor; + break; + + case SAFI_MPLS_VPN: + + for (hmv = RFAPI_MONITOR_VPN(rn); hmv; hmv = hmv->next) + ++count_monitor; + + if (RFAPI_MONITOR_EXTERIOR(rn)->source) { + ++count_monitor; /* sl */ + cursor = NULL; + for (rc = skiplist_next( + RFAPI_MONITOR_EXTERIOR(rn)->source, + NULL, NULL, &cursor); + !rc; + rc = skiplist_next( + RFAPI_MONITOR_EXTERIOR(rn)->source, + NULL, NULL, &cursor)) { + + ++count_monitor; /* sl entry */ + } + } + break; + + case SAFI_UNSPEC: + case SAFI_UNICAST: + case SAFI_MULTICAST: + case SAFI_EVPN: + case SAFI_LABELED_UNICAST: + case SAFI_FLOWSPEC: + case SAFI_MAX: + assert(!"Passed in safi should be impossible"); + } + } + + if (count_bpi + count_monitor + lockoffset + != agg_node_get_lock_count(rn)) { + vnc_zlog_debug_verbose( + "%s: count_bpi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d", + __func__, count_bpi, count_monitor, lockoffset, + agg_node_get_lock_count(rn)); + assert(0); + } +} + +/* + * Perform deferred rfapi_close operations that were queued + * during callbacks. + */ +static wq_item_status rfapi_deferred_close_workfunc(struct work_queue *q, + void *data) +{ + struct rfapi_descriptor *rfd = data; + struct rfapi *h = q->spec.data; + + assert(!(h->flags & RFAPI_INCALLBACK)); + rfapi_close(rfd); + vnc_zlog_debug_verbose("%s: completed deferred close on handle %p", + __func__, rfd); + return WQ_SUCCESS; +} + +/* + * Extract layer 2 option from Encap TLVS in BGP attrs + */ +int rfapiGetL2o(struct attr *attr, struct rfapi_l2address_option *l2o) +{ + if (attr) { + struct bgp_attr_encap_subtlv *pEncap; + + for (pEncap = bgp_attr_get_vnc_subtlvs(attr); pEncap; + pEncap = pEncap->next) { + + if (pEncap->type == BGP_VNC_SUBTLV_TYPE_RFPOPTION) { + if (pEncap->value[0] + == RFAPI_VN_OPTION_TYPE_L2ADDR) { + + if (pEncap->value[1] == 14) { + memcpy(l2o->macaddr.octet, + pEncap->value + 2, + ETH_ALEN); + l2o->label = + ((pEncap->value[10] + >> 4) + & 0x0f) + + ((pEncap->value[9] + << 4) + & 0xff0) + + ((pEncap->value[8] + << 12) + & 0xff000); + + l2o->local_nve_id = + pEncap->value[12]; + + l2o->logical_net_id = + (pEncap->value[15] + & 0xff) + + ((pEncap->value[14] + << 8) + & 0xff00) + + ((pEncap->value[13] + << 16) + & 0xff0000); + } + + return 0; + } + } + } + } + + return ENOENT; +} + +/* + * Extract the lifetime from the Tunnel Encap attribute of a route in + * an import table + */ +int rfapiGetVncLifetime(struct attr *attr, uint32_t *lifetime) +{ + struct bgp_attr_encap_subtlv *pEncap; + + *lifetime = RFAPI_INFINITE_LIFETIME; /* default to infinite */ + + if (attr) { + + for (pEncap = bgp_attr_get_vnc_subtlvs(attr); pEncap; + pEncap = pEncap->next) { + + if (pEncap->type + == BGP_VNC_SUBTLV_TYPE_LIFETIME) { /* lifetime */ + if (pEncap->length == 4) { + memcpy(lifetime, pEncap->value, 4); + *lifetime = ntohl(*lifetime); + return 0; + } + } + } + } + + return ENOENT; +} + +/* + * Look for UN address in Encap attribute + */ +int rfapiGetVncTunnelUnAddr(struct attr *attr, struct prefix *p) +{ + struct bgp_attr_encap_subtlv *pEncap; + bgp_encap_types tun_type = BGP_ENCAP_TYPE_MPLS;/*Default tunnel type*/ + + bgp_attr_extcom_tunnel_type(attr, &tun_type); + if (tun_type == BGP_ENCAP_TYPE_MPLS) { + if (!p) + return 0; + /* MPLS carries UN address in next hop */ + rfapiNexthop2Prefix(attr, p); + if (p->family != AF_UNSPEC) + return 0; + + return ENOENT; + } + if (attr) { + for (pEncap = attr->encap_subtlvs; pEncap; + pEncap = pEncap->next) { + + if (pEncap->type + == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT) { /* un + addr + */ + switch (pEncap->length) { + case 8: + if (p) { + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + memcpy(p->u.val, pEncap->value, + 4); + } + return 0; + + case 20: + if (p) { + p->family = AF_INET6; + p->prefixlen = IPV6_MAX_BITLEN; + memcpy(p->u.val, pEncap->value, + 16); + } + return 0; + } + } + } + } + + return ENOENT; +} + +/* + * Get UN address wherever it might be + */ +int rfapiGetUnAddrOfVpnBi(struct bgp_path_info *bpi, struct prefix *p) +{ + /* If it's in this route's VNC attribute, we're done */ + if (!rfapiGetVncTunnelUnAddr(bpi->attr, p)) + return 0; + /* + * Otherwise, see if it's cached from a corresponding ENCAP SAFI + * advertisement + */ + if (bpi->extra) { + switch (bpi->extra->vnc.import.un_family) { + case AF_INET: + if (p) { + p->family = bpi->extra->vnc.import.un_family; + p->u.prefix4 = bpi->extra->vnc.import.un.addr4; + p->prefixlen = IPV4_MAX_BITLEN; + } + return 0; + case AF_INET6: + if (p) { + p->family = bpi->extra->vnc.import.un_family; + p->u.prefix6 = bpi->extra->vnc.import.un.addr6; + p->prefixlen = IPV6_MAX_BITLEN; + } + return 0; + default: + if (p) + p->family = AF_UNSPEC; +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose( + "%s: bpi->extra->vnc.import.un_family is 0, no UN addr", + __func__); +#endif + break; + } + } + + return ENOENT; +} + + +/* + * Make a new bgp_path_info from gathered parameters + */ +static struct bgp_path_info *rfapiBgpInfoCreate(struct attr *attr, + struct peer *peer, void *rfd, + struct prefix_rd *prd, + uint8_t type, uint8_t sub_type, + uint32_t *label) +{ + struct bgp_path_info *new; + + new = info_make(type, sub_type, 0, peer, attr, NULL); + + new->attr = bgp_attr_intern(attr); + + bgp_path_info_extra_get(new); + if (prd) { + new->extra->vnc.import.rd = *prd; + new->extra->vnc.import.create_time = monotime(NULL); + } + if (label) + encode_label(*label, &new->extra->label[0]); + + peer_lock(peer); + + return new; +} + +/* + * Frees bgp_path_info as used in import tables (parts are not + * allocated exactly the way they are in the main RIBs) + */ +static void rfapiBgpInfoFree(struct bgp_path_info *goner) +{ + if (!goner) + return; + + if (goner->peer) { + vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d", + __func__, goner->peer, + goner->peer->lock); + peer_unlock(goner->peer); + } + + bgp_attr_unintern(&goner->attr); + + if (goner->extra) + bgp_path_info_extra_free(&goner->extra); + XFREE(MTYPE_BGP_ROUTE, goner); +} + +struct rfapi_import_table *rfapiMacImportTableGetNoAlloc(struct bgp *bgp, + uint32_t lni) +{ + struct rfapi *h; + struct rfapi_import_table *it = NULL; + uintptr_t lni_as_ptr = lni; + + h = bgp->rfapi; + if (!h) + return NULL; + + if (!h->import_mac) + return NULL; + + if (skiplist_search(h->import_mac, (void *)lni_as_ptr, (void **)&it)) + return NULL; + + return it; +} + +struct rfapi_import_table *rfapiMacImportTableGet(struct bgp *bgp, uint32_t lni) +{ + struct rfapi *h; + struct rfapi_import_table *it = NULL; + uintptr_t lni_as_ptr = lni; + + h = bgp->rfapi; + assert(h); + + if (!h->import_mac) { + /* default cmp is good enough for LNI */ + h->import_mac = skiplist_new(0, NULL, NULL); + } + + if (skiplist_search(h->import_mac, (void *)lni_as_ptr, (void **)&it)) { + + struct ecommunity *enew; + struct ecommunity_val eval; + afi_t afi; + + it = XCALLOC(MTYPE_RFAPI_IMPORTTABLE, + sizeof(struct rfapi_import_table)); + /* set RT list of new import table based on LNI */ + memset((char *)&eval, 0, sizeof(eval)); + eval.val[0] = 0; /* VNC L2VPN */ + eval.val[1] = 2; /* VNC L2VPN */ + eval.val[5] = (lni >> 16) & 0xff; + eval.val[6] = (lni >> 8) & 0xff; + eval.val[7] = (lni >> 0) & 0xff; + + enew = ecommunity_new(); + ecommunity_add_val(enew, &eval, false, false); + it->rt_import_list = enew; + + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + it->imported_vpn[afi] = agg_table_init(); + it->imported_encap[afi] = agg_table_init(); + } + + it->l2_logical_net_id = lni; + + skiplist_insert(h->import_mac, (void *)lni_as_ptr, it); + } + + assert(it); + return it; +} + +/* + * Implement MONITOR_MOVE_SHORTER(original_node) from + * RFAPI-Import-Event-Handling.txt + * + * Returns pointer to the list of moved monitors + */ +static struct rfapi_monitor_vpn * +rfapiMonitorMoveShorter(struct agg_node *original_vpn_node, int lockoffset) +{ + struct bgp_path_info *bpi; + struct agg_node *par; + struct rfapi_monitor_vpn *m; + struct rfapi_monitor_vpn *mlast; + struct rfapi_monitor_vpn *moved; + int movecount = 0; + int parent_already_refcounted = 0; + + RFAPI_CHECK_REFCOUNT(original_vpn_node, SAFI_MPLS_VPN, lockoffset); + +#ifdef DEBUG_MONITOR_MOVE_SHORTER + { + vnc_zlog_debug_verbose("%s: called with node pfx=%pFX", + __func__, &original_vpn_node->p); + } +#endif + + /* + * 1. If there is at least one bpi (either regular route or + * route marked as withdrawn, with a pending timer) at + * original_node with a valid UN address, we're done. Return. + */ + for (bpi = original_vpn_node->info; bpi; bpi = bpi->next) { + struct prefix pfx; + + if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) { +#ifdef DEBUG_MONITOR_MOVE_SHORTER + vnc_zlog_debug_verbose( + "%s: have valid UN at original node, no change", + __func__); +#endif + return NULL; + } + } + + /* + * 2. Travel up the tree (toward less-specific prefixes) from + * original_node to find the first node that has at least + * one route (even if it is only a withdrawn route) with a + * valid UN address. Call this node "Node P." + */ + for (par = agg_node_parent(original_vpn_node); par; + par = agg_node_parent(par)) { + for (bpi = par->info; bpi; bpi = bpi->next) { + struct prefix pfx; + if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) { + break; + } + } + if (bpi) + break; + } + + if (par) { + RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, 0); + } + + /* + * If no less-specific routes, try to use the 0/0 node + */ + if (!par) { + const struct prefix *p; + /* this isn't necessarily 0/0 */ + par = agg_route_table_top(original_vpn_node); + + if (par) + p = agg_node_get_prefix(par); + /* + * If we got the top node but it wasn't 0/0, + * ignore it + */ + if (par && p->prefixlen) { + agg_unlock_node(par); /* maybe free */ + par = NULL; + } + + if (par) { + ++parent_already_refcounted; + } + } + + /* + * Create 0/0 node if it isn't there + */ + if (!par) { + struct prefix pfx_default; + const struct prefix *p = agg_node_get_prefix(original_vpn_node); + + memset(&pfx_default, 0, sizeof(pfx_default)); + pfx_default.family = p->family; + + /* creates default node if none exists */ + par = agg_node_get(agg_get_table(original_vpn_node), + &pfx_default); + ++parent_already_refcounted; + } + + /* + * 3. Move each of the monitors found at original_node to Node P. + * These are "Moved Monitors." + * + */ + + /* + * Attach at end so that the list pointer we return points + * only to the moved routes + */ + for (m = RFAPI_MONITOR_VPN(par), mlast = NULL; m; + mlast = m, m = m->next) + ; + + if (mlast) { + moved = mlast->next = RFAPI_MONITOR_VPN(original_vpn_node); + } else { + moved = RFAPI_MONITOR_VPN_W_ALLOC(par) = + RFAPI_MONITOR_VPN(original_vpn_node); + } + if (RFAPI_MONITOR_VPN( + original_vpn_node)) /* check agg, so not allocated */ + RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node) = NULL; + + /* + * update the node pointers on the monitors + */ + for (m = moved; m; m = m->next) { + ++movecount; + m->node = par; + } + + RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, + parent_already_refcounted - movecount); + while (movecount > parent_already_refcounted) { + agg_lock_node(par); + ++parent_already_refcounted; + } + while (movecount < parent_already_refcounted) { + /* unlikely, but code defensively */ + agg_unlock_node(par); + --parent_already_refcounted; + } + RFAPI_CHECK_REFCOUNT(original_vpn_node, SAFI_MPLS_VPN, + movecount + lockoffset); + while (movecount--) { + agg_unlock_node(original_vpn_node); + } + +#ifdef DEBUG_MONITOR_MOVE_SHORTER + { + vnc_zlog_debug_verbose("%s: moved to node pfx=%pFX", __func__, + &par->p); + } +#endif + + + return moved; +} + +/* + * Implement MONITOR_MOVE_LONGER(new_node) from + * RFAPI-Import-Event-Handling.txt + */ +static void rfapiMonitorMoveLonger(struct agg_node *new_vpn_node) +{ + struct rfapi_monitor_vpn *monitor; + struct rfapi_monitor_vpn *mlast; + struct bgp_path_info *bpi; + struct agg_node *par; + const struct prefix *new_vpn_node_p = agg_node_get_prefix(new_vpn_node); + + RFAPI_CHECK_REFCOUNT(new_vpn_node, SAFI_MPLS_VPN, 0); + + /* + * Make sure we have at least one valid route at the new node + */ + for (bpi = new_vpn_node->info; bpi; bpi = bpi->next) { + struct prefix pfx; + if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) + break; + } + + if (!bpi) { + vnc_zlog_debug_verbose( + "%s: no valid routes at node %p, so not attempting moves", + __func__, new_vpn_node); + return; + } + + /* + * Find first parent node that has monitors + */ + for (par = agg_node_parent(new_vpn_node); par; + par = agg_node_parent(par)) { + if (RFAPI_MONITOR_VPN(par)) + break; + } + + if (!par) { + vnc_zlog_debug_verbose( + "%s: no parent nodes with monitors, done", __func__); + return; + } + + /* + * Check each of these monitors to see of their longest-match + * is now the updated node. Move any such monitors to the more- + * specific updated node + */ + for (mlast = NULL, monitor = RFAPI_MONITOR_VPN(par); monitor;) { + /* + * If new longest match for monitor prefix is the new + * route's prefix, move monitor to new route's prefix + */ + if (prefix_match(new_vpn_node_p, &monitor->p)) { + /* detach */ + if (mlast) { + mlast->next = monitor->next; + } else { + RFAPI_MONITOR_VPN_W_ALLOC(par) = monitor->next; + } + + + /* attach */ + monitor->next = RFAPI_MONITOR_VPN(new_vpn_node); + RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node) = monitor; + monitor->node = new_vpn_node; + + agg_lock_node(new_vpn_node); /* incr refcount */ + + monitor = mlast ? mlast->next : RFAPI_MONITOR_VPN(par); + + RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, 1); + /* decr refcount after we're done with par as this might + * free it */ + agg_unlock_node(par); + + continue; + } + mlast = monitor; + monitor = monitor->next; + } + + RFAPI_CHECK_REFCOUNT(new_vpn_node, SAFI_MPLS_VPN, 0); +} + + +static void rfapiBgpInfoChainFree(struct bgp_path_info *bpi) +{ + struct bgp_path_info *next; + + while (bpi) { + + /* + * If there is a timer waiting to delete this bpi, cancel + * the timer and delete immediately + */ + if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED) + && bpi->extra->vnc.import.timer) { + struct rfapi_withdraw *wcb = + EVENT_ARG(bpi->extra->vnc.import.timer); + + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + EVENT_OFF(bpi->extra->vnc.import.timer); + } + + next = bpi->next; + bpi->next = NULL; + rfapiBgpInfoFree(bpi); + bpi = next; + } +} + +static void rfapiImportTableFlush(struct rfapi_import_table *it) +{ + afi_t afi; + + /* + * Free ecommunity + */ + ecommunity_free(&it->rt_import_list); + it->rt_import_list = NULL; + + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + + struct agg_node *rn; + struct agg_table *at; + + at = it->imported_vpn[afi]; + if (at) { + for (rn = agg_route_top(at); rn; + rn = agg_route_next(rn)) { + /* + * Each route_node has: + * aggregate: points to rfapi_it_extra with + * monitor chain(s) + * info: points to chain of bgp_path_info + */ + /* free bgp_path_info and its children */ + rfapiBgpInfoChainFree(rn->info); + rn->info = NULL; + + rfapiMonitorExtraFlush(SAFI_MPLS_VPN, rn); + } + agg_table_finish(at); + } + + if (at) { + at = it->imported_encap[afi]; + for (rn = agg_route_top(at); rn; + rn = agg_route_next(rn)) { + /* free bgp_path_info and its children */ + rfapiBgpInfoChainFree(rn->info); + rn->info = NULL; + + rfapiMonitorExtraFlush(SAFI_ENCAP, rn); + } + agg_table_finish(at); + } + } + if (it->monitor_exterior_orphans) { + skiplist_free(it->monitor_exterior_orphans); + } +} + +void rfapiImportTableRefDelByIt(struct bgp *bgp, + struct rfapi_import_table *it_target) +{ + struct rfapi *h; + struct rfapi_import_table *it; + struct rfapi_import_table *prev = NULL; + + assert(it_target); + + h = bgp->rfapi; + assert(h); + + for (it = h->imports; it; prev = it, it = it->next) { + if (it == it_target) + break; + } + + assert(it); + assert(it->refcount); + + it->refcount -= 1; + + if (!it->refcount) { + if (prev) { + prev->next = it->next; + } else { + h->imports = it->next; + } + rfapiImportTableFlush(it); + XFREE(MTYPE_RFAPI_IMPORTTABLE, it); + } +} + +#ifdef RFAPI_REQUIRE_ENCAP_BEEC +/* + * Look for magic BGP Encapsulation Extended Community value + * Format in RFC 5512 Sect. 4.5 + */ +static int rfapiEcommunitiesMatchBeec(struct ecommunity *ecom, + bgp_encap_types type) +{ + int i; + + if (!ecom) + return 0; + + for (i = 0; i < (ecom->size * ECOMMUNITY_SIZE); i += ECOMMUNITY_SIZE) { + + uint8_t *ep; + + ep = ecom->val + i; + + if (ep[0] == ECOMMUNITY_ENCODE_OPAQUE + && ep[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP + && ep[6] == ((type && 0xff00) >> 8) + && ep[7] == (type & 0xff)) { + + return 1; + } + } + return 0; +} +#endif + +int rfapiEcommunitiesIntersect(struct ecommunity *e1, struct ecommunity *e2) +{ + uint32_t i, j; + + if (!e1 || !e2) + return 0; + + { + char *s1, *s2; + s1 = ecommunity_ecom2str(e1, ECOMMUNITY_FORMAT_DISPLAY, 0); + s2 = ecommunity_ecom2str(e2, ECOMMUNITY_FORMAT_DISPLAY, 0); + vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__, s1, s2); + XFREE(MTYPE_ECOMMUNITY_STR, s1); + XFREE(MTYPE_ECOMMUNITY_STR, s2); + } + + for (i = 0; i < e1->size; ++i) { + for (j = 0; j < e2->size; ++j) { + if (!memcmp(e1->val + (i * ECOMMUNITY_SIZE), + e2->val + (j * ECOMMUNITY_SIZE), + ECOMMUNITY_SIZE)) { + + return 1; + } + } + } + return 0; +} + +int rfapiEcommunityGetLNI(struct ecommunity *ecom, uint32_t *lni) +{ + if (ecom) { + uint32_t i; + + for (i = 0; i < ecom->size; ++i) { + uint8_t *p = ecom->val + (i * ECOMMUNITY_SIZE); + + if ((*(p + 0) == 0x00) && (*(p + 1) == 0x02)) { + + *lni = (*(p + 5) << 16) | (*(p + 6) << 8) + | (*(p + 7)); + return 0; + } + } + } + return ENOENT; +} + +int rfapiEcommunityGetEthernetTag(struct ecommunity *ecom, uint16_t *tag_id) +{ + struct bgp *bgp = bgp_get_default(); + *tag_id = 0; /* default to untagged */ + if (ecom) { + uint32_t i; + + for (i = 0; i < ecom->size; ++i) { + as_t as = 0; + int encode = 0; + const uint8_t *p = ecom->val + (i * ECOMMUNITY_SIZE); + + /* High-order octet of type. */ + encode = *p++; + + if (*p++ == ECOMMUNITY_ROUTE_TARGET) { + if (encode == ECOMMUNITY_ENCODE_AS4) { + p = ptr_get_be32(p, &as); + } else if (encode == ECOMMUNITY_ENCODE_AS) { + as = (*p++ << 8); + as |= (*p++); + p += 2; /* skip next two, tag/vid + always in lowest bytes */ + } + if (as == bgp->as) { + *tag_id = *p++ << 8; + *tag_id |= (*p++); + return 0; + } + } + } + } + return ENOENT; +} + +static int rfapiVpnBiNhEqualsPt(struct bgp_path_info *bpi, + struct rfapi_ip_addr *hpt) +{ + uint8_t family; + + if (!hpt || !bpi) + return 0; + + family = BGP_MP_NEXTHOP_FAMILY(bpi->attr->mp_nexthop_len); + + if (hpt->addr_family != family) + return 0; + + switch (family) { + case AF_INET: + if (bpi->attr->mp_nexthop_global_in.s_addr + != hpt->addr.v4.s_addr) + return 0; + break; + + case AF_INET6: + if (IPV6_ADDR_CMP(&bpi->attr->mp_nexthop_global, &hpt->addr.v6)) + return 0; + break; + + default: + return 0; + } + + return 1; +} + + +/* + * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses + */ +static int rfapiVpnBiSamePtUn(struct bgp_path_info *bpi1, + struct bgp_path_info *bpi2) +{ + struct prefix pfx_un1; + struct prefix pfx_un2; + + if (!bpi1 || !bpi2) + return 0; + + /* + * VN address comparisons + */ + + if (BGP_MP_NEXTHOP_FAMILY(bpi1->attr->mp_nexthop_len) + != BGP_MP_NEXTHOP_FAMILY(bpi2->attr->mp_nexthop_len)) { + return 0; + } + + switch (BGP_MP_NEXTHOP_FAMILY(bpi1->attr->mp_nexthop_len)) { + case AF_INET: + if (bpi1->attr->mp_nexthop_global_in.s_addr + != bpi2->attr->mp_nexthop_global_in.s_addr) + return 0; + break; + + case AF_INET6: + if (IPV6_ADDR_CMP(&bpi1->attr->mp_nexthop_global, + &bpi2->attr->mp_nexthop_global)) + return 0; + break; + + default: + return 0; + } + + memset(&pfx_un1, 0, sizeof(pfx_un1)); + memset(&pfx_un2, 0, sizeof(pfx_un2)); + + /* + * UN address comparisons + */ + if (rfapiGetVncTunnelUnAddr(bpi1->attr, &pfx_un1)) { + if (bpi1->extra) { + pfx_un1.family = bpi1->extra->vnc.import.un_family; + switch (bpi1->extra->vnc.import.un_family) { + case AF_INET: + pfx_un1.u.prefix4 = + bpi1->extra->vnc.import.un.addr4; + break; + case AF_INET6: + pfx_un1.u.prefix6 = + bpi1->extra->vnc.import.un.addr6; + break; + default: + pfx_un1.family = AF_UNSPEC; + break; + } + } + } + + if (rfapiGetVncTunnelUnAddr(bpi2->attr, &pfx_un2)) { + if (bpi2->extra) { + pfx_un2.family = bpi2->extra->vnc.import.un_family; + switch (bpi2->extra->vnc.import.un_family) { + case AF_INET: + pfx_un2.u.prefix4 = + bpi2->extra->vnc.import.un.addr4; + break; + case AF_INET6: + pfx_un2.u.prefix6 = + bpi2->extra->vnc.import.un.addr6; + break; + default: + pfx_un2.family = AF_UNSPEC; + break; + } + } + } + + if (pfx_un1.family == AF_UNSPEC || pfx_un2.family == AF_UNSPEC) + return 0; + + if (pfx_un1.family != pfx_un2.family) + return 0; + + switch (pfx_un1.family) { + case AF_INET: + if (!IPV4_ADDR_SAME(&pfx_un1.u.prefix4, &pfx_un2.u.prefix4)) + return 0; + break; + case AF_INET6: + if (!IPV6_ADDR_SAME(&pfx_un1.u.prefix6, &pfx_un2.u.prefix6)) + return 0; + break; + } + + + return 1; +} + +uint8_t rfapiRfpCost(struct attr *attr) +{ + if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF)) { + if (attr->local_pref > 255) { + return 0; + } + return 255 - attr->local_pref; + } + + return 255; +} + +/*------------------------------------------ + * rfapi_extract_l2o + * + * Find Layer 2 options in an option chain + * + * input: + * pHop option chain + * + * output: + * l2o layer 2 options extracted + * + * return value: + * 0 OK + * 1 no options found + * + --------------------------------------------*/ +int rfapi_extract_l2o( + struct bgp_tea_options *pHop, /* chain of options */ + struct rfapi_l2address_option *l2o) /* return extracted value */ +{ + struct bgp_tea_options *p; + + for (p = pHop; p; p = p->next) { + if ((p->type == RFAPI_VN_OPTION_TYPE_L2ADDR) + && (p->length >= 8)) { + + char *v = p->value; + + memcpy(&l2o->macaddr, v, 6); + + l2o->label = ((v[6] << 12) & 0xff000) + + ((v[7] << 4) & 0xff0) + + ((v[8] >> 4) & 0xf); + + l2o->local_nve_id = (uint8_t)v[10]; + + l2o->logical_net_id = + (v[11] << 16) + (v[12] << 8) + (v[13] << 0); + + return 0; + } + } + return 1; +} + +static struct rfapi_next_hop_entry * +rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix *rprefix, + struct bgp_path_info *bpi, /* route to encode */ + uint32_t lifetime, /* use this in nhe */ + struct agg_node *rn) /* req for L2 eth addr */ +{ + struct rfapi_next_hop_entry *new; + int have_vnc_tunnel_un = 0; + const struct prefix *p = agg_node_get_prefix(rn); + +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose("%s: entry, bpi %p, rn %p", __func__, bpi, rn); +#endif + + new = XCALLOC(MTYPE_RFAPI_NEXTHOP, sizeof(struct rfapi_next_hop_entry)); + + new->prefix = *rprefix; + + if (bpi->extra + && decode_rd_type(bpi->extra->vnc.import.rd.val) + == RD_TYPE_VNC_ETH) { + /* ethernet */ + + struct rfapi_vn_option *vo; + + vo = XCALLOC(MTYPE_RFAPI_VN_OPTION, + sizeof(struct rfapi_vn_option)); + + vo->type = RFAPI_VN_OPTION_TYPE_L2ADDR; + + memcpy(&vo->v.l2addr.macaddr, &p->u.prefix_eth.octet, ETH_ALEN); + /* only low 3 bytes of this are significant */ + (void)rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(bpi->attr), + &vo->v.l2addr.logical_net_id); + (void)rfapiEcommunityGetEthernetTag( + bgp_attr_get_ecommunity(bpi->attr), + &vo->v.l2addr.tag_id); + + /* local_nve_id comes from lower byte of RD type */ + vo->v.l2addr.local_nve_id = bpi->extra->vnc.import.rd.val[1]; + + /* label comes from MP_REACH_NLRI label */ + vo->v.l2addr.label = decode_label(&bpi->extra->label[0]); + + new->vn_options = vo; + + /* + * If there is an auxiliary prefix (i.e., host IP address), + * use it as the nexthop prefix instead of the query prefix + */ + if (bpi->extra->vnc.import.aux_prefix.family) { + rfapiQprefix2Rprefix(&bpi->extra->vnc.import.aux_prefix, + &new->prefix); + } + } + + bgp_encap_types tun_type = BGP_ENCAP_TYPE_MPLS; /*Default*/ + new->prefix.cost = rfapiRfpCost(bpi->attr); + + struct bgp_attr_encap_subtlv *pEncap; + + switch (BGP_MP_NEXTHOP_FAMILY(bpi->attr->mp_nexthop_len)) { + case AF_INET: + new->vn_address.addr_family = AF_INET; + new->vn_address.addr.v4 = bpi->attr->mp_nexthop_global_in; + break; + + case AF_INET6: + new->vn_address.addr_family = AF_INET6; + new->vn_address.addr.v6 = bpi->attr->mp_nexthop_global; + break; + + default: + zlog_warn("%s: invalid vpn nexthop length: %d", __func__, + bpi->attr->mp_nexthop_len); + rfapi_free_next_hop_list(new); + return NULL; + } + + for (pEncap = bgp_attr_get_vnc_subtlvs(bpi->attr); pEncap; + pEncap = pEncap->next) { + switch (pEncap->type) { + case BGP_VNC_SUBTLV_TYPE_LIFETIME: + /* use configured lifetime, not attr lifetime */ + break; + + default: + zlog_warn("%s: unknown VNC option type %d", __func__, + pEncap->type); + + break; + } + } + + bgp_attr_extcom_tunnel_type(bpi->attr, &tun_type); + if (tun_type == BGP_ENCAP_TYPE_MPLS) { + struct prefix p; + /* MPLS carries UN address in next hop */ + rfapiNexthop2Prefix(bpi->attr, &p); + if (p.family != AF_UNSPEC) { + rfapiQprefix2Raddr(&p, &new->un_address); + have_vnc_tunnel_un = 1; + } + } + + for (pEncap = bpi->attr->encap_subtlvs; pEncap; pEncap = pEncap->next) { + switch (pEncap->type) { + case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT: + /* + * Overrides ENCAP UN address, if any + */ + switch (pEncap->length) { + + case 8: + new->un_address.addr_family = AF_INET; + memcpy(&new->un_address.addr.v4, pEncap->value, + 4); + have_vnc_tunnel_un = 1; + break; + + case 20: + new->un_address.addr_family = AF_INET6; + memcpy(&new->un_address.addr.v6, pEncap->value, + 16); + have_vnc_tunnel_un = 1; + break; + + default: + zlog_warn( + "%s: invalid tunnel subtlv UN addr length (%d) for bpi %p", + __func__, pEncap->length, bpi); + } + break; + + default: + zlog_warn("%s: unknown Encap Attribute option type %d", + __func__, pEncap->type); + break; + } + } + + new->un_options = rfapi_encap_tlv_to_un_option(bpi->attr); + +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d", __func__, + __LINE__, have_vnc_tunnel_un); +#endif + + if (!have_vnc_tunnel_un && bpi->extra) { + /* + * use cached UN address from ENCAP route + */ + new->un_address.addr_family = bpi->extra->vnc.import.un_family; + switch (new->un_address.addr_family) { + case AF_INET: + new->un_address.addr.v4 = + bpi->extra->vnc.import.un.addr4; + break; + case AF_INET6: + new->un_address.addr.v6 = + bpi->extra->vnc.import.un.addr6; + break; + default: + zlog_warn("%s: invalid UN addr family (%d) for bpi %p", + __func__, new->un_address.addr_family, bpi); + rfapi_free_next_hop_list(new); + return NULL; + } + } + + new->lifetime = lifetime; + return new; +} + +int rfapiHasNonRemovedRoutes(struct agg_node *rn) +{ + struct bgp_path_info *bpi; + + for (bpi = rn->info; bpi; bpi = bpi->next) { + struct prefix pfx; + + if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED) + && (bpi->extra && !rfapiGetUnAddrOfVpnBi(bpi, &pfx))) { + + return 1; + } + } + return 0; +} + +#ifdef DEBUG_IT_NODES +/* + * DEBUG FUNCTION + */ +void rfapiDumpNode(struct agg_node *rn) +{ + struct bgp_path_info *bpi; + + vnc_zlog_debug_verbose("%s: rn=%p", __func__, rn); + for (bpi = rn->info; bpi; bpi = bpi->next) { + struct prefix pfx; + int ctrc = rfapiGetUnAddrOfVpnBi(bpi, &pfx); + int nr; + + if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED) + && (bpi->extra && !ctrc)) { + + nr = 1; + } else { + nr = 0; + } + + vnc_zlog_debug_verbose( + " bpi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bpi, + nr, bpi->flags, bpi->extra, ctrc); + } +} +#endif + +static int rfapiNhlAddNodeRoutes( + struct agg_node *rn, /* in */ + struct rfapi_ip_prefix *rprefix, /* in */ + uint32_t lifetime, /* in */ + int removed, /* in */ + struct rfapi_next_hop_entry **head, /* in/out */ + struct rfapi_next_hop_entry **tail, /* in/out */ + struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */ + struct agg_node *rfd_rib_node, /* preload this NVE rib node */ + struct prefix *pfx_target_original) /* query target */ +{ + struct bgp_path_info *bpi; + struct rfapi_next_hop_entry *new; + struct prefix pfx_un; + struct skiplist *seen_nexthops; + int count = 0; + const struct prefix *p = agg_node_get_prefix(rn); + int is_l2 = (p->family == AF_ETHERNET); + + if (rfd_rib_node) { + struct agg_table *atable = agg_get_table(rfd_rib_node); + struct rfapi_descriptor *rfd; + + if (atable) { + rfd = agg_get_table_info(atable); + + if (rfapiRibFTDFilterRecentPrefix(rfd, rn, + pfx_target_original)) + return 0; + } + } + + seen_nexthops = + skiplist_new(0, vnc_prefix_cmp, prefix_free_lists); + + for (bpi = rn->info; bpi; bpi = bpi->next) { + + struct prefix pfx_vn; + struct prefix *newpfx; + + if (removed && !CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) { +#ifdef DEBUG_RETURNED_NHL + vnc_zlog_debug_verbose( + "%s: want holddown, this route not holddown, skip", + __func__); +#endif + continue; + } + if (!removed && CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) { + continue; + } + + if (!bpi->extra) { + continue; + } + + /* + * Check for excluded VN address + */ + if (rfapiVpnBiNhEqualsPt(bpi, exclude_vnaddr)) + continue; + + /* + * Check for VN address (nexthop) copied already + */ + if (is_l2) { + /* L2 routes: semantic nexthop in aux_prefix; VN addr + * ain't it */ + pfx_vn = bpi->extra->vnc.import.aux_prefix; + } else { + rfapiNexthop2Prefix(bpi->attr, &pfx_vn); + } + if (!skiplist_search(seen_nexthops, &pfx_vn, NULL)) { +#ifdef DEBUG_RETURNED_NHL + vnc_zlog_debug_verbose( + "%s: already put VN/nexthop %pFX, skip", + __func__, &pfx_vn); +#endif + continue; + } + + if (rfapiGetUnAddrOfVpnBi(bpi, &pfx_un)) { +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose( + "%s: failed to get UN address of this VPN bpi", + __func__); +#endif + continue; + } + + newpfx = prefix_new(); + *newpfx = pfx_vn; + skiplist_insert(seen_nexthops, newpfx, newpfx); + + new = rfapiRouteInfo2NextHopEntry(rprefix, bpi, lifetime, rn); + if (new) { + if (rfapiRibPreloadBi(rfd_rib_node, &pfx_vn, &pfx_un, + lifetime, bpi)) { + /* duplicate filtered by RIB */ + rfapi_free_next_hop_list(new); + new = NULL; + } + } + + if (new) { + if (*tail) { + (*tail)->next = new; + } else { + *head = new; + } + *tail = new; + ++count; + } + } + + skiplist_free(seen_nexthops); + + return count; +} + + +/* + * Breadth-first + * + * omit_node is meant for the situation where we are adding a subtree + * of a parent of some original requested node. The response already + * contains the original requested node, and we don't want to duplicate + * its routes in the list, so we skip it if the right or left node + * matches (of course, we still travel down its child subtrees). + */ +static int rfapiNhlAddSubtree( + struct agg_node *rn, /* in */ + uint32_t lifetime, /* in */ + struct rfapi_next_hop_entry **head, /* in/out */ + struct rfapi_next_hop_entry **tail, /* in/out */ + struct agg_node *omit_node, /* in */ + struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */ + struct agg_table *rfd_rib_table, /* preload here */ + struct prefix *pfx_target_original) /* query target */ +{ + struct rfapi_ip_prefix rprefix; + int rcount = 0; + + /* FIXME: need to find a better way here to work without sticking our + * hands in node->link */ + if (agg_node_left(rn) && agg_node_left(rn) != omit_node) { + if (agg_node_left(rn)->info) { + const struct prefix *p = + agg_node_get_prefix(agg_node_left(rn)); + int count = 0; + struct agg_node *rib_rn = NULL; + + rfapiQprefix2Rprefix(p, &rprefix); + if (rfd_rib_table) + rib_rn = agg_node_get(rfd_rib_table, p); + + count = rfapiNhlAddNodeRoutes( + agg_node_left(rn), &rprefix, lifetime, 0, head, + tail, exclude_vnaddr, rib_rn, + pfx_target_original); + if (!count) { + count = rfapiNhlAddNodeRoutes( + agg_node_left(rn), &rprefix, lifetime, + 1, head, tail, exclude_vnaddr, rib_rn, + pfx_target_original); + } + rcount += count; + if (rib_rn) + agg_unlock_node(rib_rn); + } + } + + if (agg_node_right(rn) && agg_node_right(rn) != omit_node) { + if (agg_node_right(rn)->info) { + const struct prefix *p = + agg_node_get_prefix(agg_node_right(rn)); + int count = 0; + struct agg_node *rib_rn = NULL; + + rfapiQprefix2Rprefix(p, &rprefix); + if (rfd_rib_table) + rib_rn = agg_node_get(rfd_rib_table, p); + + count = rfapiNhlAddNodeRoutes( + agg_node_right(rn), &rprefix, lifetime, 0, head, + tail, exclude_vnaddr, rib_rn, + pfx_target_original); + if (!count) { + count = rfapiNhlAddNodeRoutes( + agg_node_right(rn), &rprefix, lifetime, + 1, head, tail, exclude_vnaddr, rib_rn, + pfx_target_original); + } + rcount += count; + if (rib_rn) + agg_unlock_node(rib_rn); + } + } + + if (agg_node_left(rn)) { + rcount += rfapiNhlAddSubtree( + agg_node_left(rn), lifetime, head, tail, omit_node, + exclude_vnaddr, rfd_rib_table, pfx_target_original); + } + if (agg_node_right(rn)) { + rcount += rfapiNhlAddSubtree( + agg_node_right(rn), lifetime, head, tail, omit_node, + exclude_vnaddr, rfd_rib_table, pfx_target_original); + } + + return rcount; +} + +/* + * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt + * + * Construct an rfapi nexthop list based on the routes attached to + * the specified node. + * + * If there are any routes that do NOT have BGP_PATH_REMOVED set, + * return those only. If there are ONLY routes with BGP_PATH_REMOVED, + * then return those, and also include all the non-removed routes from the + * next less-specific node (i.e., this node's parent) at the end. + */ +struct rfapi_next_hop_entry *rfapiRouteNode2NextHopList( + struct agg_node *rn, uint32_t lifetime, /* put into nexthop entries */ + struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */ + struct agg_table *rfd_rib_table, /* preload here */ + struct prefix *pfx_target_original) /* query target */ +{ + struct rfapi_ip_prefix rprefix; + struct rfapi_next_hop_entry *answer = NULL; + struct rfapi_next_hop_entry *last = NULL; + struct agg_node *parent; + const struct prefix *p = agg_node_get_prefix(rn); + int count = 0; + struct agg_node *rib_rn; + +#ifdef DEBUG_RETURNED_NHL + vnc_zlog_debug_verbose("%s: called with node pfx=%rRN", __func__, rn); + rfapiDebugBacktrace(); +#endif + + rfapiQprefix2Rprefix(p, &rprefix); + + rib_rn = rfd_rib_table ? agg_node_get(rfd_rib_table, p) : NULL; + + /* + * Add non-withdrawn routes at this node + */ + count = rfapiNhlAddNodeRoutes(rn, &rprefix, lifetime, 0, &answer, &last, + exclude_vnaddr, rib_rn, + pfx_target_original); + + /* + * If the list has at least one entry, it's finished + */ + if (count) { + count += rfapiNhlAddSubtree(rn, lifetime, &answer, &last, NULL, + exclude_vnaddr, rfd_rib_table, + pfx_target_original); + vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__, + count, answer); +#ifdef DEBUG_RETURNED_NHL + rfapiPrintNhl(NULL, answer); +#endif + if (rib_rn) + agg_unlock_node(rib_rn); + return answer; + } + + /* + * Add withdrawn routes at this node + */ + count = rfapiNhlAddNodeRoutes(rn, &rprefix, lifetime, 1, &answer, &last, + exclude_vnaddr, rib_rn, + pfx_target_original); + if (rib_rn) + agg_unlock_node(rib_rn); + + // rfapiPrintNhl(NULL, answer); + + /* + * walk up the tree until we find a node with non-deleted + * routes, then add them + */ + for (parent = agg_node_parent(rn); parent; + parent = agg_node_parent(parent)) { + if (rfapiHasNonRemovedRoutes(parent)) { + break; + } + } + + /* + * Add non-withdrawn routes from less-specific prefix + */ + if (parent) { + const struct prefix *p = agg_node_get_prefix(parent); + + rib_rn = rfd_rib_table ? agg_node_get(rfd_rib_table, p) : NULL; + rfapiQprefix2Rprefix(p, &rprefix); + count += rfapiNhlAddNodeRoutes(parent, &rprefix, lifetime, 0, + &answer, &last, exclude_vnaddr, + rib_rn, pfx_target_original); + count += rfapiNhlAddSubtree(parent, lifetime, &answer, &last, + rn, exclude_vnaddr, rfd_rib_table, + pfx_target_original); + if (rib_rn) + agg_unlock_node(rib_rn); + } else { + /* + * There is no parent with non-removed routes. Still need to + * add subtree of original node if it contributed routes to the + * answer. + */ + if (count) + count += rfapiNhlAddSubtree(rn, lifetime, &answer, + &last, rn, exclude_vnaddr, + rfd_rib_table, + pfx_target_original); + } + + vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__, count, + answer); +#ifdef DEBUG_RETURNED_NHL + rfapiPrintNhl(NULL, answer); +#endif + return answer; +} + +/* + * Construct nexthop list of all routes in table + */ +struct rfapi_next_hop_entry *rfapiRouteTable2NextHopList( + struct agg_table *rt, uint32_t lifetime, /* put into nexthop entries */ + struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */ + struct agg_table *rfd_rib_table, /* preload this NVE rib table */ + struct prefix *pfx_target_original) /* query target */ +{ + struct agg_node *rn; + struct rfapi_next_hop_entry *biglist = NULL; + struct rfapi_next_hop_entry *nhl; + struct rfapi_next_hop_entry *tail = NULL; + int count = 0; + + for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) { + + nhl = rfapiRouteNode2NextHopList(rn, lifetime, exclude_vnaddr, + rfd_rib_table, + pfx_target_original); + if (!tail) { + tail = biglist = nhl; + if (tail) + count = 1; + } else { + tail->next = nhl; + } + if (tail) { + while (tail->next) { + ++count; + tail = tail->next; + } + } + } + + vnc_zlog_debug_verbose("%s: returning %d routes", __func__, count); + return biglist; +} + +struct rfapi_next_hop_entry *rfapiEthRouteNode2NextHopList( + struct agg_node *rn, struct rfapi_ip_prefix *rprefix, + uint32_t lifetime, /* put into nexthop entries */ + struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */ + struct agg_table *rfd_rib_table, /* preload NVE rib table */ + struct prefix *pfx_target_original) /* query target */ +{ + int count = 0; + struct rfapi_next_hop_entry *answer = NULL; + struct rfapi_next_hop_entry *last = NULL; + struct agg_node *rib_rn; + + rib_rn = rfd_rib_table + ? agg_node_get(rfd_rib_table, agg_node_get_prefix(rn)) + : NULL; + + count = rfapiNhlAddNodeRoutes(rn, rprefix, lifetime, 0, &answer, &last, + NULL, rib_rn, pfx_target_original); + +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__, + rn, count); +#endif + + if (!count) { + count = rfapiNhlAddNodeRoutes(rn, rprefix, lifetime, 1, &answer, + &last, exclude_vnaddr, rib_rn, + pfx_target_original); + vnc_zlog_debug_verbose("%s: node %p: %d holddown routes", + __func__, rn, count); + } + + if (rib_rn) + agg_unlock_node(rib_rn); + +#ifdef DEBUG_RETURNED_NHL + rfapiPrintNhl(NULL, answer); +#endif + + return answer; +} + + +/* + * Construct nexthop list of all routes in table + */ +struct rfapi_next_hop_entry *rfapiEthRouteTable2NextHopList( + uint32_t logical_net_id, struct rfapi_ip_prefix *rprefix, + uint32_t lifetime, /* put into nexthop entries */ + struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */ + struct agg_table *rfd_rib_table, /* preload NVE rib node */ + struct prefix *pfx_target_original) /* query target */ +{ + struct rfapi_import_table *it; + struct bgp *bgp = bgp_get_default(); + struct agg_table *rt; + struct agg_node *rn; + struct rfapi_next_hop_entry *biglist = NULL; + struct rfapi_next_hop_entry *nhl; + struct rfapi_next_hop_entry *tail = NULL; + int count = 0; + + + it = rfapiMacImportTableGet(bgp, logical_net_id); + rt = it->imported_vpn[AFI_L2VPN]; + + for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) { + + nhl = rfapiEthRouteNode2NextHopList( + rn, rprefix, lifetime, exclude_vnaddr, rfd_rib_table, + pfx_target_original); + if (!tail) { + tail = biglist = nhl; + if (tail) + count = 1; + } else { + tail->next = nhl; + } + if (tail) { + while (tail->next) { + ++count; + tail = tail->next; + } + } + } + + vnc_zlog_debug_verbose("%s: returning %d routes", __func__, count); + return biglist; +} + +/* + * Insert a new bpi to the imported route table node, + * keeping the list of BPIs sorted best route first + */ +static void rfapiBgpInfoAttachSorted(struct agg_node *rn, + struct bgp_path_info *info_new, afi_t afi, + safi_t safi) +{ + struct bgp *bgp; + struct bgp_path_info *prev; + struct bgp_path_info *next; + char pfx_buf[PREFIX2STR_BUFFER] = {}; + + + bgp = bgp_get_default(); /* assume 1 instance for now */ + + if (VNC_DEBUG(IMPORT_BI_ATTACH)) { + vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__, + info_new->peer); + vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p", + __func__, info_new->peer->su_remote); + } + + for (prev = NULL, next = rn->info; next; + prev = next, next = next->next) { + enum bgp_path_selection_reason reason; + + if (!bgp + || (!CHECK_FLAG(info_new->flags, BGP_PATH_REMOVED) + && CHECK_FLAG(next->flags, BGP_PATH_REMOVED)) + || bgp_path_info_cmp_compatible(bgp, info_new, next, + pfx_buf, afi, safi, + &reason) + == -1) { /* -1 if 1st is better */ + break; + } + } + vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__, prev, next); + if (prev) { + prev->next = info_new; + } else { + rn->info = info_new; + } + info_new->prev = prev; + info_new->next = next; + if (next) + next->prev = info_new; + bgp_attr_intern(info_new->attr); +} + +static void rfapiBgpInfoDetach(struct agg_node *rn, struct bgp_path_info *bpi) +{ + /* + * Remove the route (doubly-linked) + */ + // bgp_attr_unintern (&bpi->attr); + if (bpi->next) + bpi->next->prev = bpi->prev; + if (bpi->prev) + bpi->prev->next = bpi->next; + else + rn->info = bpi->next; +} + +/* + * For L3-indexed import tables + */ +static int rfapi_bi_peer_rd_cmp(const void *b1, const void *b2) +{ + const struct bgp_path_info *bpi1 = b1; + const struct bgp_path_info *bpi2 = b2; + + /* + * Compare peers + */ + if (bpi1->peer < bpi2->peer) + return -1; + if (bpi1->peer > bpi2->peer) + return 1; + + /* + * compare RDs + */ + return vnc_prefix_cmp( + (const struct prefix *)&bpi1->extra->vnc.import.rd, + (const struct prefix *)&bpi2->extra->vnc.import.rd); +} + +/* + * For L2-indexed import tables + * The BPIs in these tables should ALWAYS have an aux_prefix set because + * they arrive via IPv4 or IPv6 advertisements. + */ +static int rfapi_bi_peer_rd_aux_cmp(const void *b1, const void *b2) +{ + const struct bgp_path_info *bpi1 = b1; + const struct bgp_path_info *bpi2 = b2; + int rc; + + /* + * Compare peers + */ + if (bpi1->peer < bpi2->peer) + return -1; + if (bpi1->peer > bpi2->peer) + return 1; + + /* + * compare RDs + */ + rc = vnc_prefix_cmp((struct prefix *)&bpi1->extra->vnc.import.rd, + (struct prefix *)&bpi2->extra->vnc.import.rd); + if (rc) { + return rc; + } + + /* + * L2 import tables can have multiple entries with the + * same MAC address, same RD, but different L3 addresses. + * + * Use presence of aux_prefix with AF=ethernet and prefixlen=1 + * as magic value to signify explicit wildcarding of the aux_prefix. + * This magic value will not appear in bona fide bpi entries in + * the import table, but is allowed in the "fake" bpi used to + * probe the table when searching. (We have to test both b1 and b2 + * because there is no guarantee of the order the test key and + * the real key will be passed) + */ + if ((bpi1->extra->vnc.import.aux_prefix.family == AF_ETHERNET + && (bpi1->extra->vnc.import.aux_prefix.prefixlen == 1)) + || (bpi2->extra->vnc.import.aux_prefix.family == AF_ETHERNET + && (bpi2->extra->vnc.import.aux_prefix.prefixlen == 1))) { + + /* + * wildcard aux address specified + */ + return 0; + } + + return vnc_prefix_cmp(&bpi1->extra->vnc.import.aux_prefix, + &bpi2->extra->vnc.import.aux_prefix); +} + + +/* + * Index on RD and Peer + */ +static void rfapiItBiIndexAdd(struct agg_node *rn, /* Import table VPN node */ + struct bgp_path_info *bpi) /* new BPI */ +{ + struct skiplist *sl; + const struct prefix *p; + + assert(rn); + assert(bpi); + assert(bpi->extra); + + vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %pRDP", __func__, bpi, + bpi->peer, &bpi->extra->vnc.import.rd); + + sl = RFAPI_RDINDEX_W_ALLOC(rn); + if (!sl) { + p = agg_node_get_prefix(rn); + if (AF_ETHERNET == p->family) { + sl = skiplist_new(0, rfapi_bi_peer_rd_aux_cmp, NULL); + } else { + sl = skiplist_new(0, rfapi_bi_peer_rd_cmp, NULL); + } + RFAPI_IT_EXTRA_GET(rn)->u.vpn.idx_rd = sl; + agg_lock_node(rn); /* for skiplist */ + } + assert(!skiplist_insert(sl, (void *)bpi, (void *)bpi)); + agg_lock_node(rn); /* for skiplist entry */ + + /* NB: BPIs in import tables are not refcounted */ +} + +static void rfapiItBiIndexDump(struct agg_node *rn) +{ + struct skiplist *sl; + void *cursor = NULL; + struct bgp_path_info *k; + struct bgp_path_info *v; + int rc; + + sl = RFAPI_RDINDEX(rn); + if (!sl) + return; + + for (rc = skiplist_next(sl, (void **)&k, (void **)&v, &cursor); !rc; + rc = skiplist_next(sl, (void **)&k, (void **)&v, &cursor)) { + + char buf[RD_ADDRSTRLEN]; + char buf_aux_pfx[PREFIX_STRLEN]; + + prefix_rd2str( + &k->extra->vnc.import.rd, buf, sizeof(buf), + bgp_get_asnotation(k->peer ? k->peer->bgp : NULL)); + if (k->extra->vnc.import.aux_prefix.family) { + prefix2str(&k->extra->vnc.import.aux_prefix, + buf_aux_pfx, sizeof(buf_aux_pfx)); + } else + strlcpy(buf_aux_pfx, "(none)", sizeof(buf_aux_pfx)); + + vnc_zlog_debug_verbose("bpi %p, peer %p, rd %s, aux_prefix %s", + k, k->peer, buf, buf_aux_pfx); + } +} + +static struct bgp_path_info *rfapiItBiIndexSearch( + struct agg_node *rn, /* Import table VPN node */ + struct prefix_rd *prd, struct peer *peer, + const struct prefix *aux_prefix) /* optional L3 addr for L2 ITs */ +{ + struct skiplist *sl; + int rc; + struct bgp_path_info bpi_fake = {0}; + struct bgp_path_info_extra bpi_extra = {0}; + struct bgp_path_info *bpi_result; + + sl = RFAPI_RDINDEX(rn); + if (!sl) + return NULL; + +#ifdef DEBUG_BI_SEARCH + { + char buf_aux_pfx[PREFIX_STRLEN]; + + if (aux_prefix) { + prefix2str(aux_prefix, buf_aux_pfx, + sizeof(buf_aux_pfx)); + } else + strlcpy(buf_aux_pfx, "(nil)", sizeof(buf_aux_pfx)); + + vnc_zlog_debug_verbose( + "%s want prd=%pRDP, peer=%p, aux_prefix=%s", __func__, + prd, peer, buf_aux_pfx); + rfapiItBiIndexDump(rn); + } +#endif + + /* threshold is a WAG */ + if (sl->count < 3) { +#ifdef DEBUG_BI_SEARCH + vnc_zlog_debug_verbose("%s: short list algorithm", __func__); +#endif + /* if short list, linear search might be faster */ + for (bpi_result = rn->info; bpi_result; + bpi_result = bpi_result->next) { +#ifdef DEBUG_BI_SEARCH + vnc_zlog_debug_verbose( + "%s: bpi has prd=%pRDP, peer=%p", __func__, + &bpi_result->extra->vnc.import.rd, + bpi_result->peer); +#endif + if (peer == bpi_result->peer + && !prefix_cmp((struct prefix *)&bpi_result->extra + ->vnc.import.rd, + (struct prefix *)prd)) { + +#ifdef DEBUG_BI_SEARCH + vnc_zlog_debug_verbose( + "%s: peer and RD same, doing aux_prefix check", + __func__); +#endif + if (!aux_prefix + || !prefix_cmp( + aux_prefix, + &bpi_result->extra->vnc.import + .aux_prefix)) { + +#ifdef DEBUG_BI_SEARCH + vnc_zlog_debug_verbose("%s: match", + __func__); +#endif + break; + } + } + } + return bpi_result; + } + + bpi_fake.peer = peer; + bpi_fake.extra = &bpi_extra; + bpi_fake.extra->vnc.import.rd = *prd; + if (aux_prefix) { + bpi_fake.extra->vnc.import.aux_prefix = *aux_prefix; + } else { + /* wildcard */ + bpi_fake.extra->vnc.import.aux_prefix.family = AF_ETHERNET; + bpi_fake.extra->vnc.import.aux_prefix.prefixlen = 1; + } + + rc = skiplist_search(sl, (void *)&bpi_fake, (void *)&bpi_result); + + if (rc) { +#ifdef DEBUG_BI_SEARCH + vnc_zlog_debug_verbose("%s: no match", __func__); +#endif + return NULL; + } + +#ifdef DEBUG_BI_SEARCH + vnc_zlog_debug_verbose("%s: matched bpi=%p", __func__, bpi_result); +#endif + + return bpi_result; +} + +static void rfapiItBiIndexDel(struct agg_node *rn, /* Import table VPN node */ + struct bgp_path_info *bpi) /* old BPI */ +{ + struct skiplist *sl; + int rc; + + vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %pRDP", __func__, bpi, + bpi->peer, &bpi->extra->vnc.import.rd); + + sl = RFAPI_RDINDEX(rn); + assert(sl); + + rc = skiplist_delete(sl, (void *)(bpi), (void *)bpi); + if (rc) { + rfapiItBiIndexDump(rn); + } + assert(!rc); + + agg_unlock_node(rn); /* for skiplist entry */ + + /* NB: BPIs in import tables are not refcounted */ +} + +/* + * Add a backreference at the ENCAP node to the VPN route that + * refers to it + */ +static void +rfapiMonitorEncapAdd(struct rfapi_import_table *import_table, + struct prefix *p, /* VN address */ + struct agg_node *vpn_rn, /* VPN node */ + struct bgp_path_info *vpn_bpi) /* VPN bpi/route */ +{ + afi_t afi = family2afi(p->family); + struct agg_node *rn; + struct rfapi_monitor_encap *m; + + assert(afi); + rn = agg_node_get(import_table->imported_encap[afi], p); /* locks rn */ + assert(rn); + + m = XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP, + sizeof(struct rfapi_monitor_encap)); + + m->node = vpn_rn; + m->bpi = vpn_bpi; + m->rn = rn; + + /* insert to encap node's list */ + m->next = RFAPI_MONITOR_ENCAP(rn); + if (m->next) + m->next->prev = m; + RFAPI_MONITOR_ENCAP_W_ALLOC(rn) = m; + + /* for easy lookup when deleting vpn route */ + vpn_bpi->extra->vnc.import.hme = m; + + vnc_zlog_debug_verbose( + "%s: it=%p, vpn_bpi=%p, afi=%d, encap rn=%p, setting vpn_bpi->extra->vnc.import.hme=%p", + __func__, import_table, vpn_bpi, afi, rn, m); + + RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 0); + bgp_attr_intern(vpn_bpi->attr); +} + +static void rfapiMonitorEncapDelete(struct bgp_path_info *vpn_bpi) +{ + /* + * Remove encap monitor + */ + vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__, vpn_bpi); + if (vpn_bpi->extra) { + struct rfapi_monitor_encap *hme = + vpn_bpi->extra->vnc.import.hme; + + if (hme) { + + vnc_zlog_debug_verbose("%s: hme=%p", __func__, hme); + + /* Refcount checking takes too long here */ + // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0); + if (hme->next) + hme->next->prev = hme->prev; + if (hme->prev) + hme->prev->next = hme->next; + else + RFAPI_MONITOR_ENCAP_W_ALLOC(hme->rn) = + hme->next; + /* Refcount checking takes too long here */ + // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1); + + /* see if the struct rfapi_it_extra is empty and can be + * freed */ + rfapiMonitorExtraPrune(SAFI_ENCAP, hme->rn); + + agg_unlock_node(hme->rn); /* decr ref count */ + XFREE(MTYPE_RFAPI_MONITOR_ENCAP, hme); + vpn_bpi->extra->vnc.import.hme = NULL; + } + } +} + +/* + * Timer callback for withdraw + */ +static void rfapiWithdrawTimerVPN(struct event *t) +{ + struct rfapi_withdraw *wcb = EVENT_ARG(t); + struct bgp_path_info *bpi = wcb->info; + struct bgp *bgp = bgp_get_default(); + const struct prefix *p; + struct rfapi_monitor_vpn *moved; + afi_t afi; + bool early_exit = false; + + if (bgp == NULL) { + vnc_zlog_debug_verbose( + "%s: NULL BGP pointer, assume shutdown race condition!!!", + __func__); + early_exit = true; + } + if (bgp && CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { + vnc_zlog_debug_verbose( + "%s: BGP delete in progress, assume shutdown race condition!!!", + __func__); + early_exit = true; + } + + /* This callback is responsible for the withdraw object's memory */ + if (early_exit) { + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + return; + } + + assert(wcb->node); + assert(bpi); + assert(wcb->import_table); + assert(bpi->extra); + + RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, wcb->lockoffset); + + vnc_zlog_debug_verbose("%s: removing bpi %p at prefix %pRN", __func__, + bpi, wcb->node); + + /* + * Remove the route (doubly-linked) + */ + if (CHECK_FLAG(bpi->flags, BGP_PATH_VALID) + && VALID_INTERIOR_TYPE(bpi->type)) + RFAPI_MONITOR_EXTERIOR(wcb->node)->valid_interior_count--; + + p = agg_node_get_prefix(wcb->node); + afi = family2afi(p->family); + wcb->import_table->holddown_count[afi] -= 1; /* keep count consistent */ + rfapiItBiIndexDel(wcb->node, bpi); + rfapiBgpInfoDetach(wcb->node, bpi); /* with removed bpi */ + + vnc_import_bgp_exterior_del_route_interior(bgp, wcb->import_table, + wcb->node, bpi); + + + /* + * If VNC is configured to send response remove messages, AND + * if the removed route had a UN address, do response removal + * processing. + */ + if (!(bgp->rfapi_cfg->flags + & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE)) { + + int has_valid_duplicate = 0; + struct bgp_path_info *bpii; + + /* + * First check if there are any OTHER routes at this node + * that have the same nexthop and a valid UN address. If + * there are (e.g., from other peers), then the route isn't + * really gone, so skip sending a response removal message. + */ + for (bpii = wcb->node->info; bpii; bpii = bpii->next) { + if (rfapiVpnBiSamePtUn(bpi, bpii)) { + has_valid_duplicate = 1; + break; + } + } + + vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__, + has_valid_duplicate); + + if (!has_valid_duplicate) { + rfapiRibPendingDeleteRoute(bgp, wcb->import_table, afi, + wcb->node); + } + } + + rfapiMonitorEncapDelete(bpi); + + /* + * If there are no VPN monitors at this VPN Node A, + * we are done + */ + if (!RFAPI_MONITOR_VPN(wcb->node)) { + vnc_zlog_debug_verbose("%s: no VPN monitors at this node", + __func__); + goto done; + } + + /* + * rfapiMonitorMoveShorter only moves monitors if there are + * no remaining valid routes at the current node + */ + moved = rfapiMonitorMoveShorter(wcb->node, 1); + + if (moved) { + rfapiMonitorMovedUp(wcb->import_table, wcb->node, moved->node, + moved); + } + +done: + /* + * Free VPN bpi + */ + rfapiBgpInfoFree(bpi); + wcb->info = NULL; + + /* + * If route count at this node has gone to 0, withdraw exported prefix + */ + if (!wcb->node->info) { + /* see if the struct rfapi_it_extra is empty and can be freed */ + rfapiMonitorExtraPrune(SAFI_MPLS_VPN, wcb->node); + vnc_direct_bgp_del_prefix(bgp, wcb->import_table, wcb->node); + vnc_zebra_del_prefix(bgp, wcb->import_table, wcb->node); + } else { + /* + * nexthop change event + * vnc_direct_bgp_add_prefix() will recompute the VN addr + * ecommunity + */ + vnc_direct_bgp_add_prefix(bgp, wcb->import_table, wcb->node); + } + + RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, 1 + wcb->lockoffset); + agg_unlock_node(wcb->node); /* decr ref count */ + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); +} + +/* + * This works for multiprotocol extension, but not for plain ol' + * unicast IPv4 because that nexthop is stored in attr->nexthop + */ +void rfapiNexthop2Prefix(struct attr *attr, struct prefix *p) +{ + assert(p); + assert(attr); + + memset(p, 0, sizeof(struct prefix)); + + switch (p->family = BGP_MP_NEXTHOP_FAMILY(attr->mp_nexthop_len)) { + case AF_INET: + p->u.prefix4 = attr->mp_nexthop_global_in; + p->prefixlen = IPV4_MAX_BITLEN; + break; + + case AF_INET6: + p->u.prefix6 = attr->mp_nexthop_global; + p->prefixlen = IPV6_MAX_BITLEN; + break; + + default: + vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__, + p->family); + } +} + +void rfapiUnicastNexthop2Prefix(afi_t afi, struct attr *attr, struct prefix *p) +{ + if (afi == AFI_IP) { + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + p->u.prefix4 = attr->nexthop; + } else { + rfapiNexthop2Prefix(attr, p); + } +} + +static int rfapiAttrNexthopAddrDifferent(struct prefix *p1, struct prefix *p2) +{ + if (!p1 || !p2) { + vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__); + return 1; + } + + /* + * Are address families the same? + */ + if (p1->family != p2->family) { + return 1; + } + + switch (p1->family) { + case AF_INET: + if (IPV4_ADDR_SAME(&p1->u.prefix4, &p2->u.prefix4)) + return 0; + break; + + case AF_INET6: + if (IPV6_ADDR_SAME(&p1->u.prefix6, &p2->u.prefix6)) + return 0; + break; + + default: + assert(1); + } + + return 1; +} + +static void rfapiCopyUnEncap2VPN(struct bgp_path_info *encap_bpi, + struct bgp_path_info *vpn_bpi) +{ + if (!vpn_bpi || !vpn_bpi->extra) { + zlog_warn("%s: no vpn bpi attr/extra, can't copy UN address", + __func__); + return; + } + + switch (BGP_MP_NEXTHOP_FAMILY(encap_bpi->attr->mp_nexthop_len)) { + case AF_INET: + + /* + * instrumentation to debug segfault of 091127 + */ + vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__, vpn_bpi); + vnc_zlog_debug_verbose("%s: vpn_bpi->extra=%p", __func__, + vpn_bpi->extra); + + vpn_bpi->extra->vnc.import.un_family = AF_INET; + vpn_bpi->extra->vnc.import.un.addr4 = + encap_bpi->attr->mp_nexthop_global_in; + break; + + case AF_INET6: + vpn_bpi->extra->vnc.import.un_family = AF_INET6; + vpn_bpi->extra->vnc.import.un.addr6 = + encap_bpi->attr->mp_nexthop_global; + break; + + default: + zlog_warn("%s: invalid encap nexthop length: %d", __func__, + encap_bpi->attr->mp_nexthop_len); + vpn_bpi->extra->vnc.import.un_family = AF_UNSPEC; + break; + } +} + +/* + * returns 0 on success, nonzero on error + */ +static int +rfapiWithdrawEncapUpdateCachedUn(struct rfapi_import_table *import_table, + struct bgp_path_info *encap_bpi, + struct agg_node *vpn_rn, + struct bgp_path_info *vpn_bpi) +{ + if (!encap_bpi) { + + /* + * clear cached UN address + */ + if (!vpn_bpi || !vpn_bpi->extra) { + zlog_warn( + "%s: missing VPN bpi/extra, can't clear UN addr", + __func__); + return 1; + } + vpn_bpi->extra->vnc.import.un_family = AF_UNSPEC; + memset(&vpn_bpi->extra->vnc.import.un, 0, + sizeof(vpn_bpi->extra->vnc.import.un)); + if (CHECK_FLAG(vpn_bpi->flags, BGP_PATH_VALID)) { + if (rfapiGetVncTunnelUnAddr(vpn_bpi->attr, NULL)) { + UNSET_FLAG(vpn_bpi->flags, BGP_PATH_VALID); + if (VALID_INTERIOR_TYPE(vpn_bpi->type)) + RFAPI_MONITOR_EXTERIOR(vpn_rn) + ->valid_interior_count--; + /* signal interior route withdrawal to + * import-exterior */ + vnc_import_bgp_exterior_del_route_interior( + bgp_get_default(), import_table, vpn_rn, + vpn_bpi); + } + } + + } else { + if (!vpn_bpi) { + zlog_warn("%s: missing VPN bpi, can't clear UN addr", + __func__); + return 1; + } + rfapiCopyUnEncap2VPN(encap_bpi, vpn_bpi); + if (!CHECK_FLAG(vpn_bpi->flags, BGP_PATH_VALID)) { + SET_FLAG(vpn_bpi->flags, BGP_PATH_VALID); + if (VALID_INTERIOR_TYPE(vpn_bpi->type)) + RFAPI_MONITOR_EXTERIOR(vpn_rn) + ->valid_interior_count++; + /* signal interior route withdrawal to import-exterior + */ + vnc_import_bgp_exterior_add_route_interior( + bgp_get_default(), import_table, vpn_rn, + vpn_bpi); + } + } + return 0; +} + +static void rfapiWithdrawTimerEncap(struct event *t) +{ + struct rfapi_withdraw *wcb = EVENT_ARG(t); + struct bgp_path_info *bpi = wcb->info; + int was_first_route = 0; + struct rfapi_monitor_encap *em; + struct skiplist *vpn_node_sl = skiplist_new(0, NULL, NULL); + + assert(wcb->node); + assert(bpi); + assert(wcb->import_table); + + RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 0); + + if (wcb->node->info == bpi) + was_first_route = 1; + + /* + * Remove the route/bpi and free it + */ + rfapiBgpInfoDetach(wcb->node, bpi); + rfapiBgpInfoFree(bpi); + + if (!was_first_route) + goto done; + + for (em = RFAPI_MONITOR_ENCAP(wcb->node); em; em = em->next) { + + /* + * Update monitoring VPN BPIs with new encap info at the + * head of the encap bpi chain (which could be NULL after + * removing the expiring bpi above) + */ + if (rfapiWithdrawEncapUpdateCachedUn(wcb->import_table, + wcb->node->info, em->node, + em->bpi)) + continue; + + /* + * Build a list of unique VPN nodes referenced by these + * monitors. + * Use a skiplist for speed. + */ + skiplist_insert(vpn_node_sl, em->node, em->node); + } + + + /* + * for each VPN node referenced in the ENCAP monitors: + */ + struct agg_node *rn; + while (!skiplist_first(vpn_node_sl, (void **)&rn, NULL)) { + if (!wcb->node->info) { + struct rfapi_monitor_vpn *moved; + + moved = rfapiMonitorMoveShorter(rn, 0); + if (moved) { + // rfapiDoRouteCallback(wcb->import_table, + // moved->node, moved); + rfapiMonitorMovedUp(wcb->import_table, rn, + moved->node, moved); + } + } else { + // rfapiDoRouteCallback(wcb->import_table, rn, NULL); + rfapiMonitorItNodeChanged(wcb->import_table, rn, NULL); + } + skiplist_delete_first(vpn_node_sl); + } + +done: + RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 1); + agg_unlock_node(wcb->node); /* decr ref count */ + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + skiplist_free(vpn_node_sl); +} + + +/* + * Works for both VPN and ENCAP routes; timer_service_func is different + * in each case + */ +static void +rfapiBiStartWithdrawTimer(struct rfapi_import_table *import_table, + struct agg_node *rn, struct bgp_path_info *bpi, + afi_t afi, safi_t safi, + void (*timer_service_func)(struct event *)) +{ + uint32_t lifetime; + struct rfapi_withdraw *wcb; + + if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) { + /* + * Already on the path to being withdrawn, + * should already have a timer set up to + * delete it. + */ + vnc_zlog_debug_verbose( + "%s: already being withdrawn, do nothing", __func__); + return; + } + + rfapiGetVncLifetime(bpi->attr, &lifetime); + vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__, lifetime); + + /* + * withdrawn routes get to hang around for a while + */ + SET_FLAG(bpi->flags, BGP_PATH_REMOVED); + + /* set timer to remove the route later */ + lifetime = rfapiGetHolddownFromLifetime(lifetime); + vnc_zlog_debug_verbose("%s: using timeout %u", __func__, lifetime); + + /* + * Stash import_table, node, and info for use by timer + * service routine, which is supposed to free the wcb. + */ + wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw)); + wcb->node = rn; + wcb->info = bpi; + wcb->import_table = import_table; + bgp_attr_intern(bpi->attr); + + if (VNC_DEBUG(VERBOSE)) { + vnc_zlog_debug_verbose( + "%s: wcb values: node=%p, info=%p, import_table=%p (bpi follows)", + __func__, wcb->node, wcb->info, wcb->import_table); + rfapiPrintBi(NULL, bpi); + } + + + assert(bpi->extra); + if (lifetime > UINT32_MAX / 1001) { + /* sub-optimal case, but will probably never happen */ + bpi->extra->vnc.import.timer = NULL; + event_add_timer(bm->master, timer_service_func, wcb, lifetime, + &bpi->extra->vnc.import.timer); + } else { + static uint32_t jitter; + uint32_t lifetime_msec; + + /* + * the goal here is to spread out the timers so they are + * sortable in the skip list + */ + if (++jitter >= 1000) + jitter = 0; + + lifetime_msec = (lifetime * 1000) + jitter; + + bpi->extra->vnc.import.timer = NULL; + event_add_timer_msec(bm->master, timer_service_func, wcb, + lifetime_msec, + &bpi->extra->vnc.import.timer); + } + + /* re-sort route list (BGP_PATH_REMOVED routes are last) */ + if (((struct bgp_path_info *)rn->info)->next) { + rfapiBgpInfoDetach(rn, bpi); + rfapiBgpInfoAttachSorted(rn, bpi, afi, safi); + } +} + + +typedef void(rfapi_bi_filtered_import_f)(struct rfapi_import_table *table, + int action, struct peer *peer, + void *rfd, const struct prefix *prefix, + const struct prefix *aux_prefix, + afi_t afi, struct prefix_rd *prd, + struct attr *attr, uint8_t type, + uint8_t sub_type, uint32_t *label); + + +static void rfapiExpireEncapNow(struct rfapi_import_table *it, + struct agg_node *rn, struct bgp_path_info *bpi) +{ + struct rfapi_withdraw *wcb; + struct event t; + + /* + * pretend we're an expiring timer + */ + wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw)); + wcb->info = bpi; + wcb->node = rn; + wcb->import_table = it; + memset(&t, 0, sizeof(t)); + t.arg = wcb; + rfapiWithdrawTimerEncap(&t); /* frees wcb */ +} + +static int rfapiGetNexthop(struct attr *attr, struct prefix *prefix) +{ + switch (BGP_MP_NEXTHOP_FAMILY(attr->mp_nexthop_len)) { + case AF_INET: + prefix->family = AF_INET; + prefix->prefixlen = IPV4_MAX_BITLEN; + prefix->u.prefix4 = attr->mp_nexthop_global_in; + break; + case AF_INET6: + prefix->family = AF_INET6; + prefix->prefixlen = IPV6_MAX_BITLEN; + prefix->u.prefix6 = attr->mp_nexthop_global; + break; + default: + vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d", + __func__, attr->mp_nexthop_len); + return EINVAL; + } + return 0; +} + +/* + * import a bgp_path_info if its route target list intersects with the + * import table's route target list + */ +static void rfapiBgpInfoFilteredImportEncap( + struct rfapi_import_table *import_table, int action, struct peer *peer, + void *rfd, /* set for looped back routes */ + const struct prefix *p, + const struct prefix *aux_prefix, /* Unused for encap routes */ + afi_t afi, struct prefix_rd *prd, + struct attr *attr, /* part of bgp_path_info */ + uint8_t type, /* part of bgp_path_info */ + uint8_t sub_type, /* part of bgp_path_info */ + uint32_t *label) /* part of bgp_path_info */ +{ + struct agg_table *rt = NULL; + struct agg_node *rn; + struct bgp_path_info *info_new; + struct bgp_path_info *bpi; + struct bgp_path_info *next; + char buf[BUFSIZ]; + + struct prefix p_firstbpi_old; + struct prefix p_firstbpi_new; + int replacing = 0; + const char *action_str = NULL; + struct prefix un_prefix; + + struct bgp *bgp; + bgp = bgp_get_default(); /* assume 1 instance for now */ + + switch (action) { + case FIF_ACTION_UPDATE: + action_str = "update"; + break; + case FIF_ACTION_WITHDRAW: + action_str = "withdraw"; + break; + case FIF_ACTION_KILL: + action_str = "kill"; + break; + default: + assert(0); + break; + } + + vnc_zlog_debug_verbose( + "%s: entry: %s: prefix %s/%d", __func__, action_str, + inet_ntop(p->family, &p->u.prefix, buf, sizeof(buf)), + p->prefixlen); + + memset(&p_firstbpi_old, 0, sizeof(p_firstbpi_old)); + memset(&p_firstbpi_new, 0, sizeof(p_firstbpi_new)); + + if (action == FIF_ACTION_UPDATE) { + /* + * Compare rt lists. If no intersection, don't import this route + * On a withdraw, peer and RD are sufficient to determine if + * we should act. + */ + if (!attr || !bgp_attr_get_ecommunity(attr)) { + + vnc_zlog_debug_verbose( + "%s: attr, extra, or ecommunity missing, not importing", + __func__); + return; + } +#ifdef RFAPI_REQUIRE_ENCAP_BEEC + if (!rfapiEcommunitiesMatchBeec( + bgp_attr_get_ecommunity(attr))) { + vnc_zlog_debug_verbose( + "%s: it=%p: no match for BGP Encapsulation ecommunity", + __func__, import_table); + return; + } +#endif + if (!rfapiEcommunitiesIntersect( + import_table->rt_import_list, + bgp_attr_get_ecommunity(attr))) { + + vnc_zlog_debug_verbose( + "%s: it=%p: no ecommunity intersection", + __func__, import_table); + return; + } + + /* + * Updates must also have a nexthop address + */ + memset(&un_prefix, 0, + sizeof(un_prefix)); /* keep valgrind happy */ + if (rfapiGetNexthop(attr, &un_prefix)) { + vnc_zlog_debug_verbose("%s: missing nexthop address", + __func__); + return; + } + } + + /* + * Figure out which radix tree the route would go into + */ + switch (afi) { + case AFI_IP: + case AFI_IP6: + rt = import_table->imported_encap[afi]; + break; + + case AFI_UNSPEC: + case AFI_L2VPN: + case AFI_MAX: + flog_err(EC_LIB_DEVELOPMENT, "%s: bad afi %d", __func__, afi); + return; + } + + /* + * agg_node_lookup returns a node only if there is at least + * one route attached. + */ + rn = agg_node_lookup(rt, p); + +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p", + __func__, import_table, rn); +#endif + + if (rn) { + + RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 1); + agg_unlock_node(rn); /* undo lock in agg_node_lookup */ + + + /* + * capture nexthop of first bpi + */ + if (rn->info) { + rfapiNexthop2Prefix( + ((struct bgp_path_info *)(rn->info))->attr, + &p_firstbpi_old); + } + + for (bpi = rn->info; bpi; bpi = bpi->next) { + + /* + * Does this bgp_path_info refer to the same route + * as we are trying to add? + */ + vnc_zlog_debug_verbose("%s: comparing BPI %p", __func__, + bpi); + + + /* + * Compare RDs + * + * RD of import table bpi is in + * bpi->extra->vnc.import.rd RD of info_orig is in prd + */ + if (!bpi->extra) { + vnc_zlog_debug_verbose("%s: no bpi->extra", + __func__); + continue; + } + if (prefix_cmp( + (struct prefix *)&bpi->extra->vnc.import.rd, + (struct prefix *)prd)) { + + vnc_zlog_debug_verbose("%s: prd does not match", + __func__); + continue; + } + + /* + * Compare peers + */ + if (bpi->peer != peer) { + vnc_zlog_debug_verbose( + "%s: peer does not match", __func__); + continue; + } + + vnc_zlog_debug_verbose("%s: found matching bpi", + __func__); + + /* Same route. Delete this bpi, replace with new one */ + + if (action == FIF_ACTION_WITHDRAW) { + + vnc_zlog_debug_verbose( + "%s: withdrawing at prefix %pRN", + __func__, rn); + + rfapiBiStartWithdrawTimer( + import_table, rn, bpi, afi, SAFI_ENCAP, + rfapiWithdrawTimerEncap); + + } else { + vnc_zlog_debug_verbose( + "%s: %s at prefix %pRN", __func__, + ((action == FIF_ACTION_KILL) + ? "killing" + : "replacing"), + rn); + + /* + * If this route is waiting to be deleted + * because of + * a previous withdraw, we must cancel its + * timer. + */ + if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED) + && bpi->extra->vnc.import.timer) { + struct rfapi_withdraw *wcb = EVENT_ARG( + bpi->extra->vnc.import.timer); + + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + EVENT_OFF(bpi->extra->vnc.import.timer); + } + + if (action == FIF_ACTION_UPDATE) { + rfapiBgpInfoDetach(rn, bpi); + rfapiBgpInfoFree(bpi); + replacing = 1; + } else { + /* + * Kill: do export stuff when removing + * bpi + */ + struct rfapi_withdraw *wcb; + struct event t; + + /* + * pretend we're an expiring timer + */ + wcb = XCALLOC( + MTYPE_RFAPI_WITHDRAW, + sizeof(struct rfapi_withdraw)); + wcb->info = bpi; + wcb->node = rn; + wcb->import_table = import_table; + memset(&t, 0, sizeof(t)); + t.arg = wcb; + rfapiWithdrawTimerEncap( + &t); /* frees wcb */ + } + } + + break; + } + } + + if (rn) + RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, replacing ? 1 : 0); + + if (action == FIF_ACTION_WITHDRAW || action == FIF_ACTION_KILL) + return; + + info_new = + rfapiBgpInfoCreate(attr, peer, rfd, prd, type, sub_type, NULL); + + if (rn) { + if (!replacing) + agg_lock_node(rn); /* incr ref count for new BPI */ + } else { + rn = agg_node_get(rt, p); + } + + vnc_zlog_debug_verbose("%s: (afi=%d, rn=%p) inserting at prefix %pRN", + __func__, afi, rn, rn); + + rfapiBgpInfoAttachSorted(rn, info_new, afi, SAFI_ENCAP); + + /* + * Delete holddown routes from same NVE. See details in + * rfapiBgpInfoFilteredImportVPN() + */ + for (bpi = info_new->next; bpi; bpi = next) { + + struct prefix pfx_un; + int un_match = 0; + + next = bpi->next; + if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) + continue; + + /* + * We already match the VN address (it is the prefix + * of the route node) + */ + + if (!rfapiGetNexthop(bpi->attr, &pfx_un) + && prefix_same(&pfx_un, &un_prefix)) { + + un_match = 1; + } + + if (!un_match) + continue; + + vnc_zlog_debug_verbose( + "%s: removing holddown bpi matching NVE of new route", + __func__); + if (bpi->extra->vnc.import.timer) { + struct rfapi_withdraw *wcb = + EVENT_ARG(bpi->extra->vnc.import.timer); + + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + EVENT_OFF(bpi->extra->vnc.import.timer); + } + rfapiExpireEncapNow(import_table, rn, bpi); + } + + rfapiNexthop2Prefix(((struct bgp_path_info *)(rn->info))->attr, + &p_firstbpi_new); + + /* + * If the nexthop address of the selected Encap route (i.e., + * the UN address) has changed, then we must update the VPN + * routes that refer to this Encap route and possibly force + * rfapi callbacks. + */ + if (rfapiAttrNexthopAddrDifferent(&p_firstbpi_old, &p_firstbpi_new)) { + + struct rfapi_monitor_encap *m; + struct rfapi_monitor_encap *mnext; + + struct agg_node *referenced_vpn_prefix; + + /* + * Optimized approach: build radix tree on the fly to + * hold list of VPN nodes referenced by the ENCAP monitors + * + * The nodes in this table correspond to prefixes of VPN routes. + * The "info" pointer of the node points to a chain of + * struct rfapi_monitor_encap, each of which refers to a + * specific VPN node. + */ + struct agg_table *referenced_vpn_table; + + referenced_vpn_table = agg_table_init(); + +/* + * iterate over the set of monitors at this ENCAP node. + */ +#ifdef DEBUG_ENCAP_MONITOR + vnc_zlog_debug_verbose("%s: examining monitors at rn=%p", + __func__, rn); +#endif + for (m = RFAPI_MONITOR_ENCAP(rn); m; m = m->next) { + const struct prefix *p; + + /* + * For each referenced bpi/route, copy the ENCAP route's + * nexthop to the VPN route's cached UN address field + * and set + * the address family of the cached UN address field. + */ + rfapiCopyUnEncap2VPN(info_new, m->bpi); + if (!CHECK_FLAG(m->bpi->flags, BGP_PATH_VALID)) { + SET_FLAG(m->bpi->flags, BGP_PATH_VALID); + if (VALID_INTERIOR_TYPE(m->bpi->type)) + RFAPI_MONITOR_EXTERIOR(m->node) + ->valid_interior_count++; + vnc_import_bgp_exterior_add_route_interior( + bgp, import_table, m->node, m->bpi); + } + + /* + * Build a list of unique VPN nodes referenced by these + * monitors + * + * There could be more than one VPN node here with a + * given + * prefix. Those are currently in an unsorted linear + * list + * per prefix. + */ + p = agg_node_get_prefix(m->node); + referenced_vpn_prefix = + agg_node_get(referenced_vpn_table, p); + assert(referenced_vpn_prefix); + for (mnext = referenced_vpn_prefix->info; mnext; + mnext = mnext->next) { + + if (mnext->node == m->node) + break; + } + + if (mnext) { + /* + * already have an entry for this VPN node + */ + agg_unlock_node(referenced_vpn_prefix); + } else { + mnext = XCALLOC( + MTYPE_RFAPI_MONITOR_ENCAP, + sizeof(struct rfapi_monitor_encap)); + mnext->node = m->node; + mnext->next = referenced_vpn_prefix->info; + referenced_vpn_prefix->info = mnext; + } + } + + /* + * for each VPN node referenced in the ENCAP monitors: + */ + for (referenced_vpn_prefix = + agg_route_top(referenced_vpn_table); + referenced_vpn_prefix; + referenced_vpn_prefix = + agg_route_next(referenced_vpn_prefix)) { + + while ((m = referenced_vpn_prefix->info)) { + + struct agg_node *n; + + rfapiMonitorMoveLonger(m->node); + for (n = m->node; n; n = agg_node_parent(n)) { + // rfapiDoRouteCallback(import_table, n, + // NULL); + } + rfapiMonitorItNodeChanged(import_table, m->node, + NULL); + + referenced_vpn_prefix->info = m->next; + agg_unlock_node(referenced_vpn_prefix); + XFREE(MTYPE_RFAPI_MONITOR_ENCAP, m); + } + } + agg_table_finish(referenced_vpn_table); + } + + RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 0); +} + +static void rfapiExpireVpnNow(struct rfapi_import_table *it, + struct agg_node *rn, struct bgp_path_info *bpi, + int lockoffset) +{ + struct rfapi_withdraw *wcb; + struct event t; + + /* + * pretend we're an expiring timer + */ + wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw)); + wcb->info = bpi; + wcb->node = rn; + wcb->import_table = it; + wcb->lockoffset = lockoffset; + memset(&t, 0, sizeof(t)); + t.arg = wcb; + rfapiWithdrawTimerVPN(&t); /* frees wcb */ +} + + +/* + * import a bgp_path_info if its route target list intersects with the + * import table's route target list + */ +void rfapiBgpInfoFilteredImportVPN( + struct rfapi_import_table *import_table, int action, struct peer *peer, + void *rfd, /* set for looped back routes */ + const struct prefix *p, + const struct prefix *aux_prefix, /* AFI_L2VPN: optional IP */ + afi_t afi, struct prefix_rd *prd, + struct attr *attr, /* part of bgp_path_info */ + uint8_t type, /* part of bgp_path_info */ + uint8_t sub_type, /* part of bgp_path_info */ + uint32_t *label) /* part of bgp_path_info */ +{ + struct agg_table *rt = NULL; + struct agg_node *rn; + struct agg_node *n; + struct bgp_path_info *info_new; + struct bgp_path_info *bpi; + struct bgp_path_info *next; + char buf[BUFSIZ]; + struct prefix vn_prefix; + struct prefix un_prefix; + int un_prefix_valid = 0; + struct agg_node *ern; + int replacing = 0; + int original_had_routes = 0; + struct prefix original_nexthop; + const char *action_str = NULL; + int is_it_ce = 0; + + struct bgp *bgp; + bgp = bgp_get_default(); /* assume 1 instance for now */ + + switch (action) { + case FIF_ACTION_UPDATE: + action_str = "update"; + break; + case FIF_ACTION_WITHDRAW: + action_str = "withdraw"; + break; + case FIF_ACTION_KILL: + action_str = "kill"; + break; + default: + assert(0); + break; + } + + if (import_table == bgp->rfapi->it_ce) + is_it_ce = 1; + + vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s", + __func__, (is_it_ce ? "CE-IT " : ""), action_str, + rfapi_ntop(p->family, &p->u.prefix, buf, BUFSIZ), + p->prefixlen, import_table, afi2str(afi)); + + VNC_ITRCCK; + + /* + * Compare rt lists. If no intersection, don't import this route + * On a withdraw, peer and RD are sufficient to determine if + * we should act. + */ + if (action == FIF_ACTION_UPDATE) { + if (!attr || !bgp_attr_get_ecommunity(attr)) { + + vnc_zlog_debug_verbose( + "%s: attr, extra, or ecommunity missing, not importing", + __func__); + return; + } + if ((import_table != bgp->rfapi->it_ce) && + !rfapiEcommunitiesIntersect( + import_table->rt_import_list, + bgp_attr_get_ecommunity(attr))) { + + vnc_zlog_debug_verbose( + "%s: it=%p: no ecommunity intersection", + __func__, import_table); + return; + } + + memset(&vn_prefix, 0, + sizeof(vn_prefix)); /* keep valgrind happy */ + if (rfapiGetNexthop(attr, &vn_prefix)) { + /* missing nexthop address would be a bad, bad thing */ + vnc_zlog_debug_verbose("%s: missing nexthop", __func__); + return; + } + } + + /* + * Figure out which radix tree the route would go into + */ + switch (afi) { + case AFI_IP: + case AFI_IP6: + case AFI_L2VPN: + rt = import_table->imported_vpn[afi]; + break; + + case AFI_UNSPEC: + case AFI_MAX: + flog_err(EC_LIB_DEVELOPMENT, "%s: bad afi %d", __func__, afi); + return; + } + + /* clear it */ + memset(&original_nexthop, 0, sizeof(original_nexthop)); + + /* + * agg_node_lookup returns a node only if there is at least + * one route attached. + */ + rn = agg_node_lookup(rt, p); + + vnc_zlog_debug_verbose("%s: rn=%p", __func__, rn); + + if (rn) { + + RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 1); + agg_unlock_node(rn); /* undo lock in agg_node_lookup */ + + if (rn->info) + original_had_routes = 1; + + if (VNC_DEBUG(VERBOSE)) { + vnc_zlog_debug_verbose("%s: showing IT node on entry", + __func__); + rfapiShowItNode(NULL, rn); /* debug */ + } + + /* + * Look for same route (will have same RD and peer) + */ + bpi = rfapiItBiIndexSearch(rn, prd, peer, aux_prefix); + + if (bpi) { + + /* + * This was an old test when we iterated over the + * BPIs linearly. Since we're now looking up with + * RD and peer, comparing types should not be + * needed. Changed to assertion. + * + * Compare types. Doing so prevents a RFP-originated + * route from matching an imported route, for example. + */ + if (VNC_DEBUG(VERBOSE) && bpi->type != type) + /* should be handled by RDs, but warn for now */ + zlog_warn("%s: type mismatch! (bpi=%d, arg=%d)", + __func__, bpi->type, type); + + vnc_zlog_debug_verbose("%s: found matching bpi", + __func__); + + /* + * In the special CE table, withdrawals occur without + * holddown + */ + if (import_table == bgp->rfapi->it_ce) { + vnc_direct_bgp_del_route_ce(bgp, rn, bpi); + if (action == FIF_ACTION_WITHDRAW) + action = FIF_ACTION_KILL; + } + + if (action == FIF_ACTION_WITHDRAW) { + + int washolddown = CHECK_FLAG(bpi->flags, + BGP_PATH_REMOVED); + + vnc_zlog_debug_verbose( + "%s: withdrawing at prefix %pRN%s", + __func__, rn, + (washolddown + ? " (already being withdrawn)" + : "")); + + VNC_ITRCCK; + if (!washolddown) { + rfapiBiStartWithdrawTimer( + import_table, rn, bpi, afi, + SAFI_MPLS_VPN, + rfapiWithdrawTimerVPN); + + RFAPI_UPDATE_ITABLE_COUNT( + bpi, import_table, afi, -1); + import_table->holddown_count[afi] += 1; + } + VNC_ITRCCK; + } else { + vnc_zlog_debug_verbose( + "%s: %s at prefix %pRN", __func__, + ((action == FIF_ACTION_KILL) + ? "killing" + : "replacing"), + rn); + + /* + * If this route is waiting to be deleted + * because of + * a previous withdraw, we must cancel its + * timer. + */ + if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED) + && bpi->extra->vnc.import.timer) { + struct rfapi_withdraw *wcb = EVENT_ARG( + bpi->extra->vnc.import.timer); + + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + EVENT_OFF(bpi->extra->vnc.import.timer); + + import_table->holddown_count[afi] -= 1; + RFAPI_UPDATE_ITABLE_COUNT( + bpi, import_table, afi, 1); + } + /* + * decrement remote count (if route is remote) + * because + * we are going to remove it below + */ + RFAPI_UPDATE_ITABLE_COUNT(bpi, import_table, + afi, -1); + if (action == FIF_ACTION_UPDATE) { + replacing = 1; + + /* + * make copy of original nexthop so we + * can see if it changed + */ + rfapiGetNexthop(bpi->attr, + &original_nexthop); + + /* + * remove bpi without doing any export + * processing + */ + if (CHECK_FLAG(bpi->flags, + BGP_PATH_VALID) + && VALID_INTERIOR_TYPE(bpi->type)) + RFAPI_MONITOR_EXTERIOR(rn) + ->valid_interior_count--; + rfapiItBiIndexDel(rn, bpi); + rfapiBgpInfoDetach(rn, bpi); + rfapiMonitorEncapDelete(bpi); + vnc_import_bgp_exterior_del_route_interior( + bgp, import_table, rn, bpi); + rfapiBgpInfoFree(bpi); + } else { + /* Kill */ + /* + * remove bpi and do export processing + */ + import_table->holddown_count[afi] += 1; + rfapiExpireVpnNow(import_table, rn, bpi, + 0); + } + } + } + } + + if (rn) + RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, replacing ? 1 : 0); + + if (action == FIF_ACTION_WITHDRAW || action == FIF_ACTION_KILL) { + VNC_ITRCCK; + return; + } + + info_new = + rfapiBgpInfoCreate(attr, peer, rfd, prd, type, sub_type, label); + + /* + * lookup un address in encap table + */ + ern = agg_node_match(import_table->imported_encap[afi], &vn_prefix); + if (ern) { + rfapiCopyUnEncap2VPN(ern->info, info_new); + agg_unlock_node(ern); /* undo lock in route_note_match */ + } else { + /* Not a big deal, just means VPN route got here first */ + vnc_zlog_debug_verbose("%s: no encap route for vn addr %pFX", + __func__, &vn_prefix); + info_new->extra->vnc.import.un_family = AF_UNSPEC; + } + + if (rn) { + if (!replacing) + agg_lock_node(rn); + } else { + /* + * No need to increment reference count, so only "get" + * if the node is not there already + */ + rn = agg_node_get(rt, p); + } + + /* + * For ethernet routes, if there is an accompanying IP address, + * save it in the bpi + */ + if ((AFI_L2VPN == afi) && aux_prefix) { + + vnc_zlog_debug_verbose("%s: setting BPI's aux_prefix", + __func__); + info_new->extra->vnc.import.aux_prefix = *aux_prefix; + } + + vnc_zlog_debug_verbose("%s: inserting bpi %p at prefix %pRN #%d", + __func__, info_new, rn, + agg_node_get_lock_count(rn)); + + rfapiBgpInfoAttachSorted(rn, info_new, afi, SAFI_MPLS_VPN); + rfapiItBiIndexAdd(rn, info_new); + if (!rfapiGetUnAddrOfVpnBi(info_new, NULL)) { + if (VALID_INTERIOR_TYPE(info_new->type)) + RFAPI_MONITOR_EXTERIOR(rn)->valid_interior_count++; + SET_FLAG(info_new->flags, BGP_PATH_VALID); + } + RFAPI_UPDATE_ITABLE_COUNT(info_new, import_table, afi, 1); + vnc_import_bgp_exterior_add_route_interior(bgp, import_table, rn, + info_new); + + if (import_table == bgp->rfapi->it_ce) + vnc_direct_bgp_add_route_ce(bgp, rn, info_new); + + if (VNC_DEBUG(VERBOSE)) { + vnc_zlog_debug_verbose("%s: showing IT node", __func__); + rfapiShowItNode(NULL, rn); /* debug */ + } + + rfapiMonitorEncapAdd(import_table, &vn_prefix, rn, info_new); + + if (!rfapiGetUnAddrOfVpnBi(info_new, &un_prefix)) { + + /* + * if we have a valid UN address (either via Encap route + * or via tunnel attribute), then we should attempt + * to move any monitors at less-specific nodes to this node + */ + rfapiMonitorMoveLonger(rn); + + un_prefix_valid = 1; + } + + /* + * 101129 Enhancement: if we add a route (implication: it is not + * in holddown), delete all other routes from this nve at this + * node that are in holddown, regardless of peer. + * + * Reasons it's OK to do that: + * + * - if the holddown route being deleted originally came from BGP VPN, + * it is already gone from BGP (implication of holddown), so there + * won't be any added inconsistency with the BGP RIB. + * + * - once a fresh route is added at a prefix, any routes in holddown + * at that prefix will not show up in RFP responses, so deleting + * the holddown routes won't affect the contents of responses. + * + * - lifetimes are supposed to be consistent, so there should not + * be a case where the fresh route has a shorter lifetime than + * the holddown route, so we don't expect the fresh route to + * disappear and complete its holddown time before the existing + * holddown routes time out. Therefore, we won't have a situation + * where we expect the existing holddown routes to be hidden and + * then to reappear sometime later (as holddown routes) in a + * RFP response. + * + * Among other things, this would enable us to skirt the problem + * of local holddown routes that refer to NVE descriptors that + * have already been closed (if the same NVE triggers a subsequent + * rfapi_open(), the new peer is different and doesn't match the + * peer of the holddown route, so the stale holddown route still + * hangs around until it times out instead of just being replaced + * by the fresh route). + */ + /* + * We know that the new bpi will have been inserted before any routes + * in holddown, so we can skip any that came before it + */ + for (bpi = info_new->next; bpi; bpi = next) { + + struct prefix pfx_vn; + struct prefix pfx_un; + int un_match = 0; + int remote_peer_match = 0; + + next = bpi->next; + + /* + * Must be holddown + */ + if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) + continue; + + /* + * Must match VN address (nexthop of VPN route) + */ + if (rfapiGetNexthop(bpi->attr, &pfx_vn)) + continue; + if (!prefix_same(&pfx_vn, &vn_prefix)) + continue; + + if (un_prefix_valid && /* new route UN addr */ + !rfapiGetUnAddrOfVpnBi(bpi, &pfx_un) + && /* old route UN addr */ + prefix_same(&pfx_un, &un_prefix)) { /* compare */ + un_match = 1; + } + if (!RFAPI_LOCAL_BI(bpi) && !RFAPI_LOCAL_BI(info_new) && + sockunion_same(&bpi->peer->connection->su, + &info_new->peer->connection->su)) { + /* old & new are both remote, same peer */ + remote_peer_match = 1; + } + + if (!un_match && !remote_peer_match) + continue; + + vnc_zlog_debug_verbose( + "%s: removing holddown bpi matching NVE of new route", + __func__); + if (bpi->extra->vnc.import.timer) { + struct rfapi_withdraw *wcb = + EVENT_ARG(bpi->extra->vnc.import.timer); + + XFREE(MTYPE_RFAPI_WITHDRAW, wcb); + EVENT_OFF(bpi->extra->vnc.import.timer); + } + rfapiExpireVpnNow(import_table, rn, bpi, 0); + } + + if (!original_had_routes) { + /* + * We went from 0 usable routes to 1 usable route. Perform the + * "Adding a Route" export process. + */ + vnc_direct_bgp_add_prefix(bgp, import_table, rn); + vnc_zebra_add_prefix(bgp, import_table, rn); + } else { + /* + * Check for nexthop change event + * Note: the prefix_same() test below detects two situations: + * 1. route is replaced, new route has different nexthop + * 2. new route is added (original_nexthop is 0) + */ + struct prefix new_nexthop; + + rfapiGetNexthop(attr, &new_nexthop); + if (!prefix_same(&original_nexthop, &new_nexthop)) { + /* + * nexthop change event + * vnc_direct_bgp_add_prefix() will recompute VN addr + * ecommunity + */ + vnc_direct_bgp_add_prefix(bgp, import_table, rn); + } + } + + if (!(bgp->rfapi_cfg->flags & BGP_VNC_CONFIG_CALLBACK_DISABLE)) { + for (n = rn; n; n = agg_node_parent(n)) { + // rfapiDoRouteCallback(import_table, n, NULL); + } + rfapiMonitorItNodeChanged(import_table, rn, NULL); + } + RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 0); + VNC_ITRCCK; +} + +static void rfapiBgpInfoFilteredImportBadSafi( + struct rfapi_import_table *import_table, int action, struct peer *peer, + void *rfd, /* set for looped back routes */ + const struct prefix *p, + const struct prefix *aux_prefix, /* AFI_L2VPN: optional IP */ + afi_t afi, struct prefix_rd *prd, + struct attr *attr, /* part of bgp_path_info */ + uint8_t type, /* part of bgp_path_info */ + uint8_t sub_type, /* part of bgp_path_info */ + uint32_t *label) /* part of bgp_path_info */ +{ + vnc_zlog_debug_verbose("%s: Error, bad safi", __func__); +} + +static rfapi_bi_filtered_import_f * +rfapiBgpInfoFilteredImportFunction(safi_t safi) +{ + switch (safi) { + case SAFI_MPLS_VPN: + return rfapiBgpInfoFilteredImportVPN; + + case SAFI_ENCAP: + return rfapiBgpInfoFilteredImportEncap; + + case SAFI_UNSPEC: + case SAFI_UNICAST: + case SAFI_MULTICAST: + case SAFI_EVPN: + case SAFI_LABELED_UNICAST: + case SAFI_FLOWSPEC: + case SAFI_MAX: + /* not expected */ + flog_err(EC_LIB_DEVELOPMENT, "%s: bad safi %d", __func__, safi); + return rfapiBgpInfoFilteredImportBadSafi; + } + + assert(!"Reached end of function when we were not expecting to"); +} + +void rfapiProcessUpdate(struct peer *peer, + void *rfd, /* set when looped from RFP/RFAPI */ + const struct prefix *p, struct prefix_rd *prd, + struct attr *attr, afi_t afi, safi_t safi, uint8_t type, + uint8_t sub_type, uint32_t *label) +{ + struct bgp *bgp; + struct rfapi *h; + struct rfapi_import_table *it; + int has_ip_route = 1; + uint32_t lni = 0; + + bgp = bgp_get_default(); /* assume 1 instance for now */ + assert(bgp); + + h = bgp->rfapi; + assert(h); + + /* + * look at high-order byte of RD. FF means MAC + * address is present (VNC L2VPN) + */ + if ((safi == SAFI_MPLS_VPN) + && (decode_rd_type(prd->val) == RD_TYPE_VNC_ETH)) { + struct prefix pfx_mac_buf; + struct prefix pfx_nexthop_buf; + int rc; + + /* + * Set flag if prefix and nexthop are the same - don't + * add the route to normal IP-based import tables + */ + if (!rfapiGetNexthop(attr, &pfx_nexthop_buf)) { + if (!prefix_cmp(&pfx_nexthop_buf, p)) { + has_ip_route = 0; + } + } + + memset(&pfx_mac_buf, 0, sizeof(pfx_mac_buf)); + pfx_mac_buf.family = AF_ETHERNET; + pfx_mac_buf.prefixlen = 48; + memcpy(&pfx_mac_buf.u.prefix_eth.octet, prd->val + 2, 6); + + /* + * Find rt containing LNI (Logical Network ID), which + * _should_ always be present when mac address is present + */ + rc = rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(attr), &lni); + + vnc_zlog_debug_verbose( + "%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p", + __func__, rc, lni, attr); + if (!rc) { + it = rfapiMacImportTableGet(bgp, lni); + + rfapiBgpInfoFilteredImportVPN( + it, FIF_ACTION_UPDATE, peer, rfd, + &pfx_mac_buf, /* prefix */ + p, /* aux prefix: IP addr */ + AFI_L2VPN, prd, attr, type, sub_type, label); + } + } + + if (!has_ip_route) + return; + + /* + * Iterate over all import tables; do a filtered import + * for the afi/safi combination + */ + for (it = h->imports; it; it = it->next) { + (*rfapiBgpInfoFilteredImportFunction(safi))( + it, FIF_ACTION_UPDATE, peer, rfd, p, /* prefix */ + NULL, afi, prd, attr, type, sub_type, label); + } + + if (safi == SAFI_MPLS_VPN) { + vnc_direct_bgp_rh_add_route(bgp, afi, p, peer, attr); + rfapiBgpInfoFilteredImportVPN( + bgp->rfapi->it_ce, FIF_ACTION_UPDATE, peer, rfd, + p, /* prefix */ + NULL, afi, prd, attr, type, sub_type, label); + } +} + + +void rfapiProcessWithdraw(struct peer *peer, void *rfd, const struct prefix *p, + struct prefix_rd *prd, struct attr *attr, afi_t afi, + safi_t safi, uint8_t type, int kill) +{ + struct bgp *bgp; + struct rfapi *h; + struct rfapi_import_table *it; + + bgp = bgp_get_default(); /* assume 1 instance for now */ + assert(bgp); + + h = bgp->rfapi; + assert(h); + + /* + * look at high-order byte of RD. FF means MAC + * address is present (VNC L2VPN) + */ + if (h->import_mac != NULL && safi == SAFI_MPLS_VPN + && decode_rd_type(prd->val) == RD_TYPE_VNC_ETH) { + struct prefix pfx_mac_buf; + void *cursor = NULL; + int rc; + + memset(&pfx_mac_buf, 0, sizeof(pfx_mac_buf)); + pfx_mac_buf.family = AF_ETHERNET; + pfx_mac_buf.prefixlen = 48; + memcpy(&pfx_mac_buf.u.prefix_eth, prd->val + 2, 6); + + /* + * withdraw does not contain attrs, so we don't have + * access to the route's LNI, which would ordinarily + * select the specific mac-based import table. Instead, + * we must iterate over all mac-based tables and rely + * on the RD to match. + * + * If this approach is too slow, add an index where + * key is {RD, peer} and value is the import table + */ + for (rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor); + rc == 0; rc = skiplist_next(h->import_mac, NULL, + (void **)&it, &cursor)) { + +#ifdef DEBUG_L2_EXTRA + vnc_zlog_debug_verbose( + "%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)", + __func__, it); +#endif + + rfapiBgpInfoFilteredImportVPN( + it, + (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW), + peer, rfd, &pfx_mac_buf, /* prefix */ + p, /* aux_prefix: IP */ + AFI_L2VPN, prd, attr, type, 0, + NULL); /* sub_type & label unused for withdraw + */ + } + } + + /* + * XXX For the case where the withdraw involves an L2 + * route with no IP information, we rely on the lack + * of RT-list intersection to filter out the withdraw + * from the IP-based import tables below + */ + + /* + * Iterate over all import tables; do a filtered import + * for the afi/safi combination + */ + + for (it = h->imports; it; it = it->next) { + (*rfapiBgpInfoFilteredImportFunction(safi))( + it, (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW), + peer, rfd, p, /* prefix */ + NULL, afi, prd, attr, type, 0, + NULL); /* sub_type & label unused for withdraw */ + } + + /* TBD the deletion should happen after the lifetime expires */ + if (safi == SAFI_MPLS_VPN) + vnc_direct_bgp_rh_del_route(bgp, afi, p, peer); + + if (safi == SAFI_MPLS_VPN) { + rfapiBgpInfoFilteredImportVPN( + bgp->rfapi->it_ce, + (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW), peer, + rfd, p, /* prefix */ + NULL, afi, prd, attr, type, 0, + NULL); /* sub_type & label unused for withdraw */ + } +} + +/* + * TBD optimized withdraw timer algorithm for case of many + * routes expiring at the same time due to peer drop. + */ +/* + * 1. Visit all BPIs in all ENCAP import tables. + * + * a. If a bpi's peer is the failed peer, remove the bpi. + * b. If the removed ENCAP bpi was first in the list of + * BPIs at this ENCAP node, loop over all monitors + * at this node: + * + * (1) for each ENCAP monitor, loop over all its + * VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK + * flags. + * + * 2. Visit all BPIs in all VPN import tables. + * a. If a bpi's peer is the failed peer, remove the bpi. + * b. loop over all the VPN node monitors and set their + * RFAPI_MON_FLAG_NEEDCALLBACK flags + * c. If there are no BPIs left at this VPN node, + * + */ + + +/* surprise, this gets called from peer_delete(), from rfapi_close() */ +static void rfapiProcessPeerDownRt(struct peer *peer, + struct rfapi_import_table *import_table, + afi_t afi, safi_t safi) +{ + struct agg_node *rn; + struct bgp_path_info *bpi; + struct agg_table *rt = NULL; + void (*timer_service_func)(struct event *) = NULL; + + assert(afi == AFI_IP || afi == AFI_IP6); + + VNC_ITRCCK; + + switch (safi) { + case SAFI_MPLS_VPN: + rt = import_table->imported_vpn[afi]; + timer_service_func = rfapiWithdrawTimerVPN; + break; + case SAFI_ENCAP: + rt = import_table->imported_encap[afi]; + timer_service_func = rfapiWithdrawTimerEncap; + break; + case SAFI_UNSPEC: + case SAFI_UNICAST: + case SAFI_MULTICAST: + case SAFI_EVPN: + case SAFI_LABELED_UNICAST: + case SAFI_FLOWSPEC: + case SAFI_MAX: + /* Suppress uninitialized variable warning */ + rt = NULL; + timer_service_func = NULL; + assert(0); + } + + for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) { + for (bpi = rn->info; bpi; bpi = bpi->next) { + if (bpi->peer == peer) { + + if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) { + /* already in holddown, skip */ + continue; + } + + if (safi == SAFI_MPLS_VPN) { + RFAPI_UPDATE_ITABLE_COUNT( + bpi, import_table, afi, -1); + import_table->holddown_count[afi] += 1; + } + rfapiBiStartWithdrawTimer(import_table, rn, bpi, + afi, safi, + timer_service_func); + } + } + } + VNC_ITRCCK; +} + +/* + * This gets called when a peer connection drops. We have to remove + * all the routes from this peer. + * + * Current approach is crude. TBD Optimize by setting fewer timers and + * grouping withdrawn routes so we can generate callbacks more + * efficiently. + */ +void rfapiProcessPeerDown(struct peer *peer) +{ + struct bgp *bgp; + struct rfapi *h; + struct rfapi_import_table *it; + + /* + * If this peer is a "dummy" peer structure atached to a RFAPI + * nve_descriptor, we don't need to walk the import tables + * because the routes are already withdrawn by rfapi_close() + */ + if (CHECK_FLAG(peer->flags, PEER_FLAG_IS_RFAPI_HD)) + return; + + /* + * 1. Visit all BPIs in all ENCAP import tables. + * Start withdraw timer on the BPIs that match peer. + * + * 2. Visit All BPIs in all VPN import tables. + * Start withdraw timer on the BPIs that match peer. + */ + + bgp = bgp_get_default(); /* assume 1 instance for now */ + if (!bgp) + return; + + h = bgp->rfapi; + assert(h); + + for (it = h->imports; it; it = it->next) { + rfapiProcessPeerDownRt(peer, it, AFI_IP, SAFI_ENCAP); + rfapiProcessPeerDownRt(peer, it, AFI_IP6, SAFI_ENCAP); + rfapiProcessPeerDownRt(peer, it, AFI_IP, SAFI_MPLS_VPN); + rfapiProcessPeerDownRt(peer, it, AFI_IP6, SAFI_MPLS_VPN); + } + + if (h->it_ce) { + rfapiProcessPeerDownRt(peer, h->it_ce, AFI_IP, SAFI_MPLS_VPN); + rfapiProcessPeerDownRt(peer, h->it_ce, AFI_IP6, SAFI_MPLS_VPN); + } +} + +/* + * Import an entire RIB (for an afi/safi) to an import table RIB, + * filtered according to the import table's RT list + * + * TBD: does this function need additions to match rfapiProcessUpdate() + * for, e.g., L2 handling? + */ +static void rfapiBgpTableFilteredImport(struct bgp *bgp, + struct rfapi_import_table *it, + afi_t afi, safi_t safi) +{ + struct bgp_dest *dest1; + struct bgp_dest *dest2; + + /* Only these SAFIs have 2-level RIBS */ + assert(safi == SAFI_MPLS_VPN || safi == SAFI_ENCAP); + + /* + * Now visit all the rd nodes and the nodes of all the + * route tables attached to them, and import the routes + * if they have matching route targets + */ + for (dest1 = bgp_table_top(bgp->rib[afi][safi]); dest1; + dest1 = bgp_route_next(dest1)) { + + if (bgp_dest_has_bgp_path_info_data(dest1)) { + + for (dest2 = bgp_table_top( + bgp_dest_get_bgp_table_info(dest1)); + dest2; dest2 = bgp_route_next(dest2)) { + + struct bgp_path_info *bpi; + + for (bpi = bgp_dest_get_bgp_path_info(dest2); + bpi; bpi = bpi->next) { + uint32_t label = 0; + + if (CHECK_FLAG(bpi->flags, + BGP_PATH_REMOVED)) + continue; + + if (bpi->extra) + label = decode_label( + &bpi->extra->label[0]); + (*rfapiBgpInfoFilteredImportFunction( + safi))( + it, /* which import table */ + FIF_ACTION_UPDATE, bpi->peer, + NULL, + bgp_dest_get_prefix(dest2), + NULL, afi, + (struct prefix_rd *) + bgp_dest_get_prefix( + dest1), + bpi->attr, bpi->type, + bpi->sub_type, &label); + } + } + } + } +} + + +/* per-bgp-instance rfapi data */ +struct rfapi *bgp_rfapi_new(struct bgp *bgp) +{ + struct rfapi *h; + afi_t afi; + struct rfapi_rfp_cfg *cfg = NULL; + struct rfapi_rfp_cb_methods *cbm = NULL; + + assert(bgp->rfapi_cfg == NULL); + + h = XCALLOC(MTYPE_RFAPI, sizeof(struct rfapi)); + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + h->un[afi] = agg_table_init(); + } + + /* + * initialize the ce import table + */ + h->it_ce = XCALLOC(MTYPE_RFAPI_IMPORTTABLE, + sizeof(struct rfapi_import_table)); + h->it_ce->imported_vpn[AFI_IP] = agg_table_init(); + h->it_ce->imported_vpn[AFI_IP6] = agg_table_init(); + h->it_ce->imported_encap[AFI_IP] = agg_table_init(); + h->it_ce->imported_encap[AFI_IP6] = agg_table_init(); + rfapiBgpTableFilteredImport(bgp, h->it_ce, AFI_IP, SAFI_MPLS_VPN); + rfapiBgpTableFilteredImport(bgp, h->it_ce, AFI_IP6, SAFI_MPLS_VPN); + + /* + * Set up work queue for deferred rfapi_close operations + */ + h->deferred_close_q = + work_queue_new(bm->master, "rfapi deferred close"); + h->deferred_close_q->spec.workfunc = rfapi_deferred_close_workfunc; + h->deferred_close_q->spec.data = h; + + h->rfp = rfp_start(bm->master, &cfg, &cbm); + bgp->rfapi_cfg = bgp_rfapi_cfg_new(cfg); + if (cbm != NULL) { + h->rfp_methods = *cbm; + } + return h; +} + +void bgp_rfapi_destroy(struct bgp *bgp, struct rfapi *h) +{ + afi_t afi; + + if (bgp == NULL || h == NULL) + return; + + if (h->resolve_nve_nexthop) { + skiplist_free(h->resolve_nve_nexthop); + h->resolve_nve_nexthop = NULL; + } + + rfapiImportTableFlush(h->it_ce); + + if (h->import_mac) { + struct rfapi_import_table *it; + void *cursor; + int rc; + + for (cursor = NULL, + rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor); + !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor)) { + + rfapiImportTableFlush(it); + XFREE(MTYPE_RFAPI_IMPORTTABLE, it); + } + skiplist_free(h->import_mac); + h->import_mac = NULL; + } + + work_queue_free_and_null(&h->deferred_close_q); + + if (h->rfp != NULL) + rfp_stop(h->rfp); + + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + agg_table_finish(h->un[afi]); + } + + XFREE(MTYPE_RFAPI_IMPORTTABLE, h->it_ce); + XFREE(MTYPE_RFAPI, h); +} + +struct rfapi_import_table * +rfapiImportTableRefAdd(struct bgp *bgp, struct ecommunity *rt_import_list, + struct rfapi_nve_group_cfg *rfg) +{ + struct rfapi *h; + struct rfapi_import_table *it; + afi_t afi; + + h = bgp->rfapi; + assert(h); + + for (it = h->imports; it; it = it->next) { + if (ecommunity_cmp(it->rt_import_list, rt_import_list)) + break; + } + + vnc_zlog_debug_verbose("%s: matched it=%p", __func__, it); + + if (!it) { + it = XCALLOC(MTYPE_RFAPI_IMPORTTABLE, + sizeof(struct rfapi_import_table)); + it->next = h->imports; + h->imports = it; + + it->rt_import_list = ecommunity_dup(rt_import_list); + it->rfg = rfg; + it->monitor_exterior_orphans = + skiplist_new(0, NULL, prefix_free_lists); + + /* + * fill import route tables from RIBs + * + * Potential area for optimization. If this occurs when + * tables are large (e.g., the operator adds a nve group + * with a new RT list to a running system), it could take + * a while. + * + */ + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + + it->imported_vpn[afi] = agg_table_init(); + it->imported_encap[afi] = agg_table_init(); + + rfapiBgpTableFilteredImport(bgp, it, afi, + SAFI_MPLS_VPN); + rfapiBgpTableFilteredImport(bgp, it, afi, SAFI_ENCAP); + + vnc_import_bgp_exterior_redist_enable_it(bgp, afi, it); + } + } + + it->refcount += 1; + + return it; +} + +/* + * skiplist element free function + */ +static void delete_rem_pfx_na_free(void *na) +{ + uint32_t *pCounter = ((struct rfapi_nve_addr *)na)->info; + + *pCounter += 1; + XFREE(MTYPE_RFAPI_NVE_ADDR, na); +} + +/* + * Common deleter for IP and MAC import tables + */ +static void rfapiDeleteRemotePrefixesIt( + struct bgp *bgp, struct rfapi_import_table *it, struct prefix *un, + struct prefix *vn, struct prefix *p, int delete_active, + int delete_holddown, uint32_t *pARcount, uint32_t *pAHcount, + uint32_t *pHRcount, uint32_t *pHHcount, + struct skiplist *uniq_active_nves, struct skiplist *uniq_holddown_nves) +{ + afi_t afi; + +#ifdef DEBUG_L2_EXTRA + { + char buf_pfx[PREFIX_STRLEN]; + + if (p) { + prefix2str(p, buf_pfx, sizeof(buf_pfx)); + } else { + buf_pfx[0] = '*'; + buf_pfx[1] = 0; + } + + vnc_zlog_debug_verbose( + "%s: entry, p=%s, delete_active=%d, delete_holddown=%d", + __func__, buf_pfx, delete_active, delete_holddown); + } +#endif + + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + + struct agg_table *rt; + struct agg_node *rn; + + if (p && (family2afi(p->family) != afi)) { + continue; + } + + rt = it->imported_vpn[afi]; + if (!rt) + continue; + + vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__, + afi); + + for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) { + struct bgp_path_info *bpi; + struct bgp_path_info *next; + const struct prefix *rn_p = agg_node_get_prefix(rn); + + if (p && VNC_DEBUG(IMPORT_DEL_REMOTE)) + vnc_zlog_debug_any("%s: want %pFX, have %pRN", + __func__, p, rn); + + if (p && prefix_cmp(p, rn_p)) + continue; + + vnc_zlog_debug_verbose("%s: rn pfx=%pRN", __func__, rn); + + /* TBD is this valid for afi == AFI_L2VPN? */ + RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 1); + + for (bpi = rn->info; bpi; bpi = next) { + next = bpi->next; + + struct prefix qpt; + struct prefix qct; + int qpt_valid = 0; + int qct_valid = 0; + int is_active = 0; + + vnc_zlog_debug_verbose("%s: examining bpi %p", + __func__, bpi); + + if (!rfapiGetNexthop(bpi->attr, &qpt)) + qpt_valid = 1; + + if (vn) { + if (!qpt_valid + || !prefix_match(vn, &qpt)) { +#ifdef DEBUG_L2_EXTRA + vnc_zlog_debug_verbose( + "%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)", + __func__); +#endif + continue; + } + } + + if (!rfapiGetUnAddrOfVpnBi(bpi, &qct)) + qct_valid = 1; + + if (un) { + if (!qct_valid + || !prefix_match(un, &qct)) { +#ifdef DEBUG_L2_EXTRA + vnc_zlog_debug_verbose( + "%s: continue at un && !qct_valid || !prefix_match(un, &qct)", + __func__); +#endif + continue; + } + } + + + /* + * Blow bpi away + */ + /* + * If this route is waiting to be deleted + * because of + * a previous withdraw, we must cancel its + * timer. + */ + if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) { + if (!delete_holddown) + continue; + if (bpi->extra->vnc.import.timer) { + struct rfapi_withdraw *wcb = + EVENT_ARG( + bpi->extra->vnc + .import + .timer); + + wcb->import_table + ->holddown_count[afi] -= + 1; + RFAPI_UPDATE_ITABLE_COUNT( + bpi, wcb->import_table, + afi, 1); + XFREE(MTYPE_RFAPI_WITHDRAW, + wcb); + EVENT_OFF(bpi->extra->vnc.import + .timer); + } + } else { + if (!delete_active) + continue; + is_active = 1; + } + + vnc_zlog_debug_verbose( + "%s: deleting bpi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)", + __func__, bpi, qct_valid, qpt_valid, + delete_holddown, delete_active); + + + /* + * add nve to list + */ + if (qct_valid && qpt_valid) { + + struct rfapi_nve_addr na; + struct rfapi_nve_addr *nap; + + memset(&na, 0, sizeof(na)); + assert(!rfapiQprefix2Raddr(&qct, + &na.un)); + assert(!rfapiQprefix2Raddr(&qpt, + &na.vn)); + + if (skiplist_search( + (is_active + ? uniq_active_nves + : uniq_holddown_nves), + &na, (void **)&nap)) { + char line[BUFSIZ]; + + nap = XCALLOC( + MTYPE_RFAPI_NVE_ADDR, + sizeof(struct + rfapi_nve_addr)); + *nap = na; + nap->info = is_active + ? pAHcount + : pHHcount; + skiplist_insert( + (is_active + ? uniq_active_nves + : uniq_holddown_nves), + nap, nap); + + rfapiNveAddr2Str(nap, line, + BUFSIZ); + } + } + + vnc_direct_bgp_rh_del_route(bgp, afi, rn_p, + bpi->peer); + + RFAPI_UPDATE_ITABLE_COUNT(bpi, it, afi, -1); + it->holddown_count[afi] += 1; + rfapiExpireVpnNow(it, rn, bpi, 1); + + vnc_zlog_debug_verbose( + "%s: incrementing count (is_active=%d)", + __func__, is_active); + + if (is_active) + ++*pARcount; + else + ++*pHRcount; + } + } + } +} + + +/* + * For use by the "clear vnc prefixes" command + */ +/*------------------------------------------ + * rfapiDeleteRemotePrefixes + * + * UI helper: For use by the "clear vnc prefixes" command + * + * input: + * un if set, tunnel must match this prefix + * vn if set, nexthop prefix must match this prefix + * p if set, prefix must match this prefix + * it if set, only look in this import table + * + * output + * pARcount number of active routes deleted + * pAHcount number of active nves deleted + * pHRcount number of holddown routes deleted + * pHHcount number of holddown nves deleted + * + * return value: + * void + --------------------------------------------*/ +void rfapiDeleteRemotePrefixes(struct prefix *un, struct prefix *vn, + struct prefix *p, + struct rfapi_import_table *arg_it, + int delete_active, int delete_holddown, + uint32_t *pARcount, uint32_t *pAHcount, + uint32_t *pHRcount, uint32_t *pHHcount) +{ + struct bgp *bgp; + struct rfapi *h; + struct rfapi_import_table *it; + uint32_t deleted_holddown_route_count = 0; + uint32_t deleted_active_route_count = 0; + uint32_t deleted_holddown_nve_count = 0; + uint32_t deleted_active_nve_count = 0; + struct skiplist *uniq_holddown_nves; + struct skiplist *uniq_active_nves; + + VNC_ITRCCK; + + bgp = bgp_get_default(); /* assume 1 instance for now */ + /* If no bgp instantiated yet, no vnc prefixes exist */ + if (!bgp) + return; + + h = bgp->rfapi; + assert(h); + + uniq_holddown_nves = + skiplist_new(0, rfapi_nve_addr_cmp, delete_rem_pfx_na_free); + uniq_active_nves = + skiplist_new(0, rfapi_nve_addr_cmp, delete_rem_pfx_na_free); + + /* + * Iterate over all import tables; do a filtered import + * for the afi/safi combination + */ + + if (arg_it) + it = arg_it; + else + it = h->imports; + for (; it;) { + + vnc_zlog_debug_verbose( + "%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p", + __func__, it); + + rfapiDeleteRemotePrefixesIt( + bgp, it, un, vn, p, delete_active, delete_holddown, + &deleted_active_route_count, &deleted_active_nve_count, + &deleted_holddown_route_count, + &deleted_holddown_nve_count, uniq_active_nves, + uniq_holddown_nves); + + if (arg_it) + it = NULL; + else + it = it->next; + } + + /* + * Now iterate over L2 import tables + */ + if (h->import_mac && !(p && (p->family != AF_ETHERNET))) { + + void *cursor = NULL; + int rc; + + for (cursor = NULL, + rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor); + !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor)) { + + vnc_zlog_debug_verbose( + "%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p", + __func__, it); + + rfapiDeleteRemotePrefixesIt( + bgp, it, un, vn, p, delete_active, + delete_holddown, &deleted_active_route_count, + &deleted_active_nve_count, + &deleted_holddown_route_count, + &deleted_holddown_nve_count, uniq_active_nves, + uniq_holddown_nves); + } + } + + /* + * our custom element freeing function above counts as it deletes + */ + skiplist_free(uniq_holddown_nves); + skiplist_free(uniq_active_nves); + + if (pARcount) + *pARcount = deleted_active_route_count; + if (pAHcount) + *pAHcount = deleted_active_nve_count; + if (pHRcount) + *pHRcount = deleted_holddown_route_count; + if (pHHcount) + *pHHcount = deleted_holddown_nve_count; + + VNC_ITRCCK; +} + +/*------------------------------------------ + * rfapiCountRemoteRoutes + * + * UI helper: count VRF routes from BGP side + * + * input: + * + * output + * pALRcount count of active local routes + * pARRcount count of active remote routes + * pHRcount count of holddown routes + * pIRcount count of direct imported routes + * + * return value: + * void + --------------------------------------------*/ +void rfapiCountAllItRoutes(int *pALRcount, /* active local routes */ + int *pARRcount, /* active remote routes */ + int *pHRcount, /* holddown routes */ + int *pIRcount) /* imported routes */ +{ + struct bgp *bgp; + struct rfapi *h; + struct rfapi_import_table *it; + afi_t afi; + + int total_active_local = 0; + int total_active_remote = 0; + int total_holddown = 0; + int total_imported = 0; + + bgp = bgp_get_default(); /* assume 1 instance for now */ + assert(bgp); + + h = bgp->rfapi; + assert(h); + + /* + * Iterate over all import tables; do a filtered import + * for the afi/safi combination + */ + + for (it = h->imports; it; it = it->next) { + + for (afi = AFI_IP; afi < AFI_MAX; ++afi) { + + total_active_local += it->local_count[afi]; + total_active_remote += it->remote_count[afi]; + total_holddown += it->holddown_count[afi]; + total_imported += it->imported_count[afi]; + } + } + + void *cursor; + int rc; + + if (h->import_mac) { + for (cursor = NULL, + rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor); + !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it, + &cursor)) { + + total_active_local += it->local_count[AFI_L2VPN]; + total_active_remote += it->remote_count[AFI_L2VPN]; + total_holddown += it->holddown_count[AFI_L2VPN]; + total_imported += it->imported_count[AFI_L2VPN]; + } + } + + + if (pALRcount) { + *pALRcount = total_active_local; + } + if (pARRcount) { + *pARRcount = total_active_remote; + } + if (pHRcount) { + *pHRcount = total_holddown; + } + if (pIRcount) { + *pIRcount = total_imported; + } +} + +/*------------------------------------------ + * rfapiGetHolddownFromLifetime + * + * calculate holddown value based on lifetime + * + * input: + * lifetime lifetime + * + * return value: + * Holddown value based on lifetime, holddown_factor, + * and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY + * + --------------------------------------------*/ +/* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */ +uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime) +{ + uint32_t factor; + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp && bgp->rfapi_cfg) + factor = bgp->rfapi_cfg->rfp_cfg.holddown_factor; + else + factor = RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR; + + if (factor < 100 || lifetime < RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY) + lifetime = lifetime * factor / 100; + if (lifetime < RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY) + return lifetime; + else + return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY; +} |