// SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright 2009-2016, LabN Consulting, L.L.C. * */ #include "lib/zebra.h" #include "lib/prefix.h" #include "lib/agg_table.h" #include "lib/vty.h" #include "lib/memory.h" #include "lib/routemap.h" #include "lib/log.h" #include "lib/linklist.h" #include "lib/command.h" #include "lib/stream.h" #include "lib/ringbuf.h" #include "lib/lib_errors.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_ecommunity.h" #include "bgpd/bgp_attr.h" #include "bgpd/rfapi/bgp_rfapi_cfg.h" #include "bgpd/rfapi/rfapi.h" #include "bgpd/rfapi/rfapi_backend.h" #include "bgpd/bgp_route.h" #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_aspath.h" #include "bgpd/bgp_advertise.h" #include "bgpd/bgp_vnc_types.h" #include "bgpd/bgp_zebra.h" #include "bgpd/rfapi/rfapi_import.h" #include "bgpd/rfapi/rfapi_private.h" #include "bgpd/rfapi/rfapi_monitor.h" #include "bgpd/rfapi/rfapi_vty.h" #include "bgpd/rfapi/vnc_export_bgp.h" #include "bgpd/rfapi/vnc_export_bgp_p.h" #include "bgpd/rfapi/vnc_zebra.h" #include "bgpd/rfapi/vnc_import_bgp.h" #include "bgpd/rfapi/rfapi_rib.h" #include "bgpd/rfapi/rfapi_ap.h" #include "bgpd/rfapi/rfapi_encap_tlv.h" #include "bgpd/rfapi/vnc_debug.h" #define DEBUG_CLEANUP 0 struct ethaddr rfapi_ethaddr0 = {{0}}; #define DEBUG_RFAPI_STR "RF API debugging/testing command\n" const char *rfapi_error_str(int code) { switch (code) { case 0: return "Success"; case ENXIO: return "BGP or VNC not configured"; case ENOENT: return "No match"; case EEXIST: return "Handle already open"; case ENOMSG: return "Incomplete configuration"; case EAFNOSUPPORT: return "Invalid address family"; case EDEADLK: return "Called from within a callback procedure"; case EBADF: return "Invalid handle"; case EINVAL: return "Invalid argument"; case ESTALE: return "Stale descriptor"; default: return "Unknown error"; } } /*------------------------------------------ * rfapi_get_response_lifetime_default * * Returns the default lifetime for a response. * rfp_start_val value returned by rfp_start or * NULL (=use default instance) * * input: * None * * output: * * return value: The bgp instance default lifetime for a response. --------------------------------------------*/ int rfapi_get_response_lifetime_default(void *rfp_start_val) { struct bgp *bgp = rfapi_bgp_lookup_by_rfp(rfp_start_val); if (bgp) return bgp->rfapi_cfg->default_response_lifetime; return BGP_VNC_DEFAULT_RESPONSE_LIFETIME_DEFAULT; } /*------------------------------------------ * rfapi_is_vnc_configured * * Returns if VNC is configured * * input: * rfp_start_val value returned by rfp_start or * NULL (=use default instance) * * output: * * return value: If VNC is configured for the bgpd instance * 0 Success * ENXIO VNC not configured --------------------------------------------*/ int rfapi_is_vnc_configured(void *rfp_start_val) { struct bgp *bgp = rfapi_bgp_lookup_by_rfp(rfp_start_val); if (bgp_rfapi_is_vnc_configured(bgp) == 0) return 0; return ENXIO; } /*------------------------------------------ * rfapi_get_vn_addr * * Get the virtual network address used by an NVE based on it's RFD * * input: * rfd: rfapi descriptor returned by rfapi_open or rfapi_create_generic * * output: * * return value: * vn NVE virtual network address *------------------------------------------*/ struct rfapi_ip_addr *rfapi_get_vn_addr(void *rfd) { struct rfapi_descriptor *rrfd = (struct rfapi_descriptor *)rfd; return &rrfd->vn_addr; } /*------------------------------------------ * rfapi_get_un_addr * * Get the underlay network address used by an NVE based on it's RFD * * input: * rfd: rfapi descriptor returned by rfapi_open or rfapi_create_generic * * output: * * return value: * un NVE underlay network address *------------------------------------------*/ struct rfapi_ip_addr *rfapi_get_un_addr(void *rfd) { struct rfapi_descriptor *rrfd = (struct rfapi_descriptor *)rfd; return &rrfd->un_addr; } int rfapi_ip_addr_cmp(struct rfapi_ip_addr *a1, struct rfapi_ip_addr *a2) { if (a1->addr_family != a2->addr_family) return a1->addr_family - a2->addr_family; if (a1->addr_family == AF_INET) { return IPV4_ADDR_CMP(&a1->addr.v4, &a2->addr.v4); } if (a1->addr_family == AF_INET6) { return IPV6_ADDR_CMP(&a1->addr.v6, &a2->addr.v6); } assert(1); /* NOTREACHED */ return 1; } static int rfapi_find_node(struct bgp *bgp, struct rfapi_ip_addr *vn_addr, struct rfapi_ip_addr *un_addr, struct agg_node **node) { struct rfapi *h; struct prefix p; struct agg_node *rn; int rc; afi_t afi; if (!bgp) { return ENXIO; } h = bgp->rfapi; if (!h) { return ENXIO; } afi = family2afi(un_addr->addr_family); if (!afi) { return EAFNOSUPPORT; } if ((rc = rfapiRaddr2Qprefix(un_addr, &p))) return rc; rn = agg_node_lookup(h->un[afi], &p); if (!rn) return ENOENT; agg_unlock_node(rn); *node = rn; return 0; } int rfapi_find_rfd(struct bgp *bgp, struct rfapi_ip_addr *vn_addr, struct rfapi_ip_addr *un_addr, struct rfapi_descriptor **rfd) { struct agg_node *rn; int rc; rc = rfapi_find_node(bgp, vn_addr, un_addr, &rn); if (rc) return rc; for (*rfd = (struct rfapi_descriptor *)(rn->info); *rfd; *rfd = (*rfd)->next) { if (!rfapi_ip_addr_cmp(&(*rfd)->vn_addr, vn_addr)) break; } if (!*rfd) return ENOENT; return 0; } /*------------------------------------------ * rfapi_find_handle * * input: * un underlay network address * vn virtual network address * * output: * pHandle pointer to location to store handle * * return value: * 0 Success * ENOENT no matching handle * ENXIO BGP or VNC not configured *------------------------------------------*/ static int rfapi_find_handle(struct bgp *bgp, struct rfapi_ip_addr *vn_addr, struct rfapi_ip_addr *un_addr, rfapi_handle *handle) { struct rfapi_descriptor **rfd; rfd = (struct rfapi_descriptor **)handle; return rfapi_find_rfd(bgp, vn_addr, un_addr, rfd); } static int rfapi_find_handle_vty(struct vty *vty, struct rfapi_ip_addr *vn_addr, struct rfapi_ip_addr *un_addr, rfapi_handle *handle) { struct bgp *bgp; struct rfapi_descriptor **rfd; bgp = bgp_get_default(); /* assume 1 instance for now */ rfd = (struct rfapi_descriptor **)handle; return rfapi_find_rfd(bgp, vn_addr, un_addr, rfd); } static int is_valid_rfd(struct rfapi_descriptor *rfd) { rfapi_handle hh; if (!rfd || rfd->bgp == NULL) return 0; if (CHECK_FLAG( rfd->flags, RFAPI_HD_FLAG_IS_VRF)) /* assume VRF/internal are valid */ return 1; if (rfapi_find_handle(rfd->bgp, &rfd->vn_addr, &rfd->un_addr, &hh)) return 0; if (rfd != hh) return 0; return 1; } /* * check status of descriptor */ int rfapi_check(void *handle) { struct rfapi_descriptor *rfd = (struct rfapi_descriptor *)handle; rfapi_handle hh; int rc; if (!rfd || rfd->bgp == NULL) return EINVAL; if (CHECK_FLAG( rfd->flags, RFAPI_HD_FLAG_IS_VRF)) /* assume VRF/internal are valid */ return 0; if ((rc = rfapi_find_handle(rfd->bgp, &rfd->vn_addr, &rfd->un_addr, &hh))) return rc; if (rfd != hh) return ENOENT; if (!rfd->rfg) return ESTALE; return 0; } void del_vnc_route(struct rfapi_descriptor *rfd, struct peer *peer, /* rfd->peer for RFP regs */ struct bgp *bgp, safi_t safi, const struct prefix *p, struct prefix_rd *prd, uint8_t type, uint8_t sub_type, struct rfapi_nexthop *lnh, int kill) { afi_t afi; /* of the VN address */ struct bgp_dest *bn; struct bgp_path_info *bpi; struct prefix_rd prd0; afi = family2afi(p->family); assert(afi == AFI_IP || afi == AFI_IP6); if (safi == SAFI_ENCAP) { memset(&prd0, 0, sizeof(prd0)); prd0.family = AF_UNSPEC; prd0.prefixlen = 64; prd = &prd0; } bn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd); vnc_zlog_debug_verbose( "%s: peer=%p, prefix=%pFX, prd=%pRDP afi=%d, safi=%d bn=%p, bn->info=%p", __func__, peer, p, prd, afi, safi, bn, (bn ? bgp_dest_get_bgp_path_info(bn) : NULL)); for (bpi = (bn ? bgp_dest_get_bgp_path_info(bn) : NULL); bpi; bpi = bpi->next) { vnc_zlog_debug_verbose( "%s: trying bpi=%p, bpi->peer=%p, bpi->type=%d, bpi->sub_type=%d, bpi->extra->vnc.export.rfapi_handle=%p, local_pref=%" PRIu64, __func__, bpi, bpi->peer, bpi->type, bpi->sub_type, (bpi->extra ? bpi->extra->vnc->vnc.export.rfapi_handle : NULL), CHECK_FLAG(bpi->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF) ? bpi->attr->local_pref : 0)); if (bpi->peer == peer && bpi->type == type && bpi->sub_type == sub_type && bpi->extra && bpi->extra->vnc->vnc.export.rfapi_handle == (void *)rfd) { vnc_zlog_debug_verbose("%s: matched it", __func__); break; } } if (lnh) { /* * lnh set means to JUST delete the local nexthop from this * route. Leave the route itself in place. * TBD add return code reporting of success/failure */ if (!bpi || !bpi->extra || !bpi->extra->vnc->vnc.export.local_nexthops) { /* * no local nexthops */ vnc_zlog_debug_verbose( "%s: lnh list already empty at prefix %pFX", __func__, p); goto done; } /* * look for it */ struct listnode *node; struct rfapi_nexthop *pLnh = NULL; for (ALL_LIST_ELEMENTS_RO(bpi->extra->vnc->vnc.export .local_nexthops, node, pLnh)) { if (prefix_same(&pLnh->addr, &lnh->addr)) { break; } } if (pLnh) { listnode_delete(bpi->extra->vnc->vnc.export.local_nexthops, pLnh); /* silly rabbit, listnode_delete doesn't invoke * list->del on data */ rfapi_nexthop_free(pLnh); } else { vnc_zlog_debug_verbose("%s: desired lnh not found %pFX", __func__, p); } goto done; } /* * loop back to import tables * Do this before removing from BGP RIB because rfapiProcessWithdraw * might refer to it */ rfapiProcessWithdraw(peer, rfd, p, prd, NULL, afi, safi, type, kill); if (bpi) { vnc_zlog_debug_verbose( "%s: Found route (safi=%d) to delete at prefix %pFX", __func__, safi, p); if (safi == SAFI_MPLS_VPN) { struct bgp_dest *pdest = NULL; struct bgp_table *table = NULL; pdest = bgp_node_get(bgp->rib[afi][safi], (struct prefix *)prd); table = bgp_dest_get_bgp_table_info(pdest); if (table) vnc_import_bgp_del_vnc_host_route_mode_resolve_nve( bgp, prd, table, p, bpi); bgp_dest_unlock_node(pdest); } /* * Delete local_nexthops list */ if (bpi->extra && bpi->extra->vnc->vnc.export.local_nexthops) list_delete(&bpi->extra->vnc->vnc.export.local_nexthops); bgp_aggregate_decrement(bgp, p, bpi, afi, safi); bgp_path_info_delete(bn, bpi); bgp_process(bgp, bn, bpi, afi, safi); } else { vnc_zlog_debug_verbose( "%s: Couldn't find route (safi=%d) at prefix %pFX", __func__, safi, p); } done: bgp_dest_unlock_node(bn); } struct rfapi_nexthop *rfapi_nexthop_new(struct rfapi_nexthop *copyme) { struct rfapi_nexthop *new = XCALLOC(MTYPE_RFAPI_NEXTHOP, sizeof(struct rfapi_nexthop)); if (copyme) *new = *copyme; return new; } void rfapi_nexthop_free(void *p) { struct rfapi_nexthop *goner = p; XFREE(MTYPE_RFAPI_NEXTHOP, goner); } struct rfapi_vn_option *rfapi_vn_options_dup(struct rfapi_vn_option *existing) { struct rfapi_vn_option *p; struct rfapi_vn_option *head = NULL; struct rfapi_vn_option *tail = NULL; for (p = existing; p; p = p->next) { struct rfapi_vn_option *new; new = XCALLOC(MTYPE_RFAPI_VN_OPTION, sizeof(struct rfapi_vn_option)); *new = *p; new->next = NULL; if (tail) (tail)->next = new; tail = new; if (!head) { head = new; } } return head; } void rfapi_un_options_free(struct rfapi_un_option *p) { struct rfapi_un_option *next; while (p) { next = p->next; XFREE(MTYPE_RFAPI_UN_OPTION, p); p = next; } } void rfapi_vn_options_free(struct rfapi_vn_option *p) { struct rfapi_vn_option *next; while (p) { next = p->next; XFREE(MTYPE_RFAPI_VN_OPTION, p); p = next; } } /* Based on bgp_redistribute_add() */ void add_vnc_route(struct rfapi_descriptor *rfd, /* cookie, VPN UN addr, peer */ struct bgp *bgp, int safi, const struct prefix *p, struct prefix_rd *prd, struct rfapi_ip_addr *nexthop, uint32_t *local_pref, uint32_t *lifetime, /* NULL => dont send lifetime */ struct bgp_tea_options *rfp_options, struct rfapi_un_option *options_un, struct rfapi_vn_option *options_vn, struct ecommunity *rt_export_list, /* Copied, not consumed */ uint32_t *med, /* NULL => don't set med */ uint32_t *label, /* low order 3 bytes */ uint8_t type, uint8_t sub_type, /* RFP, NORMAL or REDIST */ int flags) { afi_t afi; /* of the VN address */ struct bgp_labels bgp_labels = {}; struct bgp_path_info *new; struct bgp_path_info *bpi; struct bgp_dest *bn; struct attr attr = {0}; struct attr *new_attr; uint32_t label_val; struct bgp_attr_encap_subtlv *encaptlv; char buf[PREFIX_STRLEN]; struct rfapi_nexthop *lnh = NULL; /* local nexthop */ struct rfapi_vn_option *vo; struct rfapi_l2address_option *l2o = NULL; struct rfapi_ip_addr *un_addr = &rfd->un_addr; bgp_encap_types TunnelType = BGP_ENCAP_TYPE_RESERVED; struct bgp_redist *red; if (safi == SAFI_ENCAP && !(bgp->rfapi_cfg->flags & BGP_VNC_CONFIG_ADV_UN_METHOD_ENCAP)) { /* * Encap mode not enabled. UN addresses will be communicated * via VNC Tunnel subtlv instead. */ vnc_zlog_debug_verbose( "%s: encap mode not enabled, not adding SAFI_ENCAP route", __func__); return; } for (vo = options_vn; vo; vo = vo->next) { if (RFAPI_VN_OPTION_TYPE_L2ADDR == vo->type) { l2o = &vo->v.l2addr; if (RFAPI_0_ETHERADDR(&l2o->macaddr)) l2o = NULL; /* not MAC resolution */ } if (RFAPI_VN_OPTION_TYPE_LOCAL_NEXTHOP == vo->type) { lnh = &vo->v.local_nexthop; } } if (label && *label != MPLS_INVALID_LABEL) label_val = *label; else label_val = MPLS_LABEL_IMPLICIT_NULL; afi = family2afi(p->family); assert(afi == AFI_IP || afi == AFI_IP6); vnc_zlog_debug_verbose("%s: afi=%s, safi=%s", __func__, afi2str(afi), safi2str(safi)); /* Make default attribute. Produces already-interned attr.aspath */ /* Cripes, the memory management of attributes is byzantine */ bgp_attr_default_set(&attr, bgp, BGP_ORIGIN_INCOMPLETE); /* * At this point: * attr: static * extra: dynamically allocated, owned by attr * aspath: points to interned hash from aspath hash table */ /* * Route-specific un_options get added to the VPN SAFI * advertisement tunnel encap attribute. (the per-NVE * "default" un_options are put into the 1-per-NVE ENCAP * SAFI advertisement). The VPN SAFI also gets the * default un_options if there are no route-specific options. */ if (options_un) { struct rfapi_un_option *uo; for (uo = options_un; uo; uo = uo->next) { if (RFAPI_UN_OPTION_TYPE_TUNNELTYPE == uo->type) { TunnelType = rfapi_tunneltype_option_to_tlv( bgp, un_addr, &uo->v.tunnel, &attr, l2o != NULL); } } } else { /* * Add encap attr * These are the NVE-specific "default" un_options which are * put into the 1-per-NVE ENCAP advertisement. */ if (rfd->default_tunneltype_option.type) { TunnelType = rfapi_tunneltype_option_to_tlv( bgp, un_addr, &rfd->default_tunneltype_option, &attr, l2o != NULL); } else /* create default for local addse */ if (type == ZEBRA_ROUTE_BGP && sub_type == BGP_ROUTE_RFP) TunnelType = rfapi_tunneltype_option_to_tlv( bgp, un_addr, NULL, &attr, l2o != NULL); } if (TunnelType == BGP_ENCAP_TYPE_MPLS) { if (safi == SAFI_ENCAP) { /* Encap SAFI not used with MPLS */ vnc_zlog_debug_verbose( "%s: mpls tunnel type, encap safi omitted", __func__); aspath_unintern(&attr.aspath); /* Unintern original. */ return; } } if (local_pref) { attr.local_pref = *local_pref; attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF); } if (med) { attr.med = *med; attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_MULTI_EXIT_DISC); } /* override default weight assigned by bgp_attr_default_set() */ attr.weight = rfd->peer ? rfd->peer->weight[afi][safi] : 0; /* * NB: ticket 81: do not reset attr.aspath here because it would * cause iBGP peers to drop route */ /* * Set originator ID for routes imported from BGP directly. * These routes could be synthetic, and therefore could * reuse the peer pointers of the routes they are derived * from. Setting the originator ID to "us" prevents the * wrong originator ID from being sent when this route is * sent from a route reflector. */ if (type == ZEBRA_ROUTE_BGP_DIRECT || type == ZEBRA_ROUTE_BGP_DIRECT_EXT) { attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID); attr.originator_id = bgp->router_id; } /* Set up vnc attribute (sub-tlv for Prefix Lifetime) */ if (lifetime && *lifetime != RFAPI_INFINITE_LIFETIME) { uint32_t lt; encaptlv = XCALLOC(MTYPE_ENCAP_TLV, sizeof(struct bgp_attr_encap_subtlv) + 4); encaptlv->type = BGP_VNC_SUBTLV_TYPE_LIFETIME; /* prefix lifetime */ encaptlv->length = 4; lt = htonl(*lifetime); memcpy(encaptlv->value, <, 4); bgp_attr_set_vnc_subtlvs(&attr, encaptlv); vnc_zlog_debug_verbose( "%s: set Encap Attr Prefix Lifetime to %d", __func__, *lifetime); } /* add rfp options to vnc attr */ if (rfp_options) { if (flags & RFAPI_AHR_RFPOPT_IS_VNCTLV) { struct bgp_attr_encap_subtlv *vnc_subtlvs = bgp_attr_get_vnc_subtlvs(&attr); /* * this flag means we're passing a pointer to an * existing encap tlv chain which we should copy. * It's a hack to avoid adding yet another argument * to add_vnc_route() */ encaptlv = encap_tlv_dup( (struct bgp_attr_encap_subtlv *)rfp_options); if (vnc_subtlvs) vnc_subtlvs->next = encaptlv; else bgp_attr_set_vnc_subtlvs(&attr, encaptlv); } else { struct bgp_tea_options *hop; /* XXX max of one tlv present so far from above code */ struct bgp_attr_encap_subtlv *tail = bgp_attr_get_vnc_subtlvs(&attr); for (hop = rfp_options; hop; hop = hop->next) { /* * Construct subtlv */ encaptlv = XCALLOC( MTYPE_ENCAP_TLV, sizeof(struct bgp_attr_encap_subtlv) + 2 + hop->length); encaptlv->type = BGP_VNC_SUBTLV_TYPE_RFPOPTION; /* RFP option */ encaptlv->length = 2 + hop->length; *((uint8_t *)(encaptlv->value) + 0) = hop->type; *((uint8_t *)(encaptlv->value) + 1) = hop->length; memcpy(((uint8_t *)encaptlv->value) + 2, hop->value, hop->length); /* * add to end of subtlv chain */ if (tail) tail->next = encaptlv; else bgp_attr_set_vnc_subtlvs(&attr, encaptlv); tail = encaptlv; } } } /* * At this point: * attr: static * extra: dynamically allocated, owned by attr * vnc_subtlvs: dynamic chain, length 1 * aspath: points to interned hash from aspath hash table */ bgp_attr_set_ecommunity(&attr, ecommunity_new()); assert(bgp_attr_get_ecommunity(&attr)); if (TunnelType != BGP_ENCAP_TYPE_MPLS && TunnelType != BGP_ENCAP_TYPE_RESERVED) { /* * Add BGP Encapsulation Extended Community. Format described in * section 4.5 of RFC 5512. * Always include when not MPLS type, to disambiguate this case. */ struct ecommunity_val beec; memset(&beec, 0, sizeof(beec)); beec.val[0] = ECOMMUNITY_ENCODE_OPAQUE; beec.val[1] = ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP; beec.val[6] = ((TunnelType) >> 8) & 0xff; beec.val[7] = (TunnelType)&0xff; ecommunity_add_val(bgp_attr_get_ecommunity(&attr), &beec, false, false); } /* * Add extended community attributes to match rt export list */ if (rt_export_list) { bgp_attr_set_ecommunity( &attr, ecommunity_merge(bgp_attr_get_ecommunity(&attr), rt_export_list)); } struct ecommunity *ecomm = bgp_attr_get_ecommunity(&attr); if (!ecomm->size) { ecommunity_free(&ecomm); bgp_attr_set_ecommunity(&attr, NULL); } vnc_zlog_debug_verbose("%s: attr.ecommunity=%p", __func__, ecomm); /* * At this point: * attr: static * extra: dynamically allocated, owned by attr * vnc_subtlvs: dynamic chain, length 1 * ecommunity: dynamic 2-part * aspath: points to interned hash from aspath hash table */ /* stuff nexthop in attr_extra; which field depends on IPv4 or IPv6 */ switch (nexthop->addr_family) { case AF_INET: /* * set this field to prevent bgp_route.c code from setting * mp_nexthop_global_in to self */ attr.nexthop.s_addr = nexthop->addr.v4.s_addr; attr.mp_nexthop_global_in = nexthop->addr.v4; attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; break; case AF_INET6: attr.mp_nexthop_global = nexthop->addr.v6; attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV6_GLOBAL; break; default: assert(0); } prefix2str(p, buf, sizeof(buf)); /* * At this point: * * attr: static * extra: dynamically allocated, owned by attr * vnc_subtlvs: dynamic chain, length 1 * ecommunity: dynamic 2-part * aspath: points to interned hash from aspath hash table */ red = bgp_redist_lookup(bgp, afi, type, 0); if (red && red->redist_metric_flag) { attr.med = red->redist_metric; attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_MULTI_EXIT_DISC); } bn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd); /* * bgp_attr_intern creates a new reference to a cached * attribute, but leaves the following bits of trash: * - old attr * - old attr->extra (free via bgp_attr_extra_free(attr)) * * Note that it frees the original attr->extra->ecommunity * but leaves the new attribute pointing to the ORIGINAL * vnc options (which therefore we needn't free from the * static attr) */ new_attr = bgp_attr_intern(&attr); aspath_unintern(&attr.aspath); /* Unintern original. */ /* * At this point: * * attr: static * extra: dynamically allocated, owned by attr * vnc_subtlvs: dynamic chain, length 1 * ecommunity: POINTS TO INTERNED ecom, THIS REF NOT COUNTED * * new_attr: an attr that is part of the hash table, distinct * from attr which is static. * extra: dynamically allocated, owned by new_attr (in hash table) * vnc_subtlvs: POINTS TO SAME dynamic chain AS attr * ecommunity: POINTS TO interned/refcounted dynamic 2-part AS attr * aspath: POINTS TO interned/refcounted hashed block */ for (bpi = bgp_dest_get_bgp_path_info(bn); bpi; bpi = bpi->next) { /* probably only need to check * bpi->extra->vnc->vnc.export.rfapi_handle */ if (bpi->peer == rfd->peer && bpi->type == type && bpi->sub_type == sub_type && bpi->extra && bpi->extra->vnc->vnc.export.rfapi_handle == (void *)rfd) { break; } } if (bpi) { /* * Adding new local_nexthop, which does not by itself change * what is advertised via BGP */ if (lnh) { if (!bpi->extra->vnc->vnc.export.local_nexthops) { /* TBD make arrangements to free when needed */ bpi->extra->vnc->vnc.export.local_nexthops = list_new(); bpi->extra->vnc->vnc.export.local_nexthops->del = rfapi_nexthop_free; } /* * already present? */ struct listnode *node; struct rfapi_nexthop *pLnh = NULL; for (ALL_LIST_ELEMENTS_RO(bpi->extra->vnc->vnc.export .local_nexthops, node, pLnh)) { if (prefix_same(&pLnh->addr, &lnh->addr)) { break; } } /* * Not present, add new one */ if (!pLnh) { pLnh = rfapi_nexthop_new(lnh); listnode_add(bpi->extra->vnc->vnc.export .local_nexthops, pLnh); } } if (attrhash_cmp(bpi->attr, new_attr) && !CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) { bgp_attr_unintern(&new_attr); bgp_dest_unlock_node(bn); vnc_zlog_debug_any( "%s: Found route (safi=%d) at prefix %s, no change", __func__, safi, buf); goto done; } else { /* The attribute is changed. */ bgp_path_info_set_flag(bn, bpi, BGP_PATH_ATTR_CHANGED); if (safi == SAFI_MPLS_VPN) { struct bgp_dest *pdest = NULL; struct bgp_table *table = NULL; pdest = bgp_node_get(bgp->rib[afi][safi], (struct prefix *)prd); table = bgp_dest_get_bgp_table_info(pdest); if (table) vnc_import_bgp_del_vnc_host_route_mode_resolve_nve( bgp, prd, table, p, bpi); bgp_dest_unlock_node(pdest); } /* Rewrite BGP route information. */ if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) bgp_path_info_restore(bn, bpi); else bgp_aggregate_decrement(bgp, p, bpi, afi, safi); bgp_attr_unintern(&bpi->attr); bpi->attr = new_attr; bpi->uptime = monotime(NULL); if (safi == SAFI_MPLS_VPN) { struct bgp_dest *pdest = NULL; struct bgp_table *table = NULL; pdest = bgp_node_get(bgp->rib[afi][safi], (struct prefix *)prd); table = bgp_dest_get_bgp_table_info(pdest); if (table) vnc_import_bgp_add_vnc_host_route_mode_resolve_nve( bgp, prd, table, p, bpi); bgp_dest_unlock_node(pdest); } /* Process change. */ bgp_aggregate_increment(bgp, p, bpi, afi, safi); bgp_process(bgp, bn, bpi, afi, safi); bgp_dest_unlock_node(bn); vnc_zlog_debug_any( "%s: Found route (safi=%d) at prefix %s, changed attr", __func__, safi, buf); goto done; } } new = info_make(type, sub_type, 0, rfd->peer, new_attr, NULL); SET_FLAG(new->flags, BGP_PATH_VALID); /* save backref to rfapi handle */ bgp_path_info_extra_get(new); new->extra->vnc = XCALLOC(MTYPE_BGP_ROUTE_EXTRA_VNC, sizeof(struct bgp_path_info_extra_vnc)); new->extra->vnc->vnc.export.rfapi_handle = (void *)rfd; encode_label(label_val, &bgp_labels.label[0]); bgp_labels.num_labels = 1; new->extra->labels = bgp_labels_intern(&bgp_labels); /* debug */ if (VNC_DEBUG(VERBOSE)) { vnc_zlog_debug_verbose("%s: printing BPI", __func__); rfapiPrintBi(NULL, new); } bgp_aggregate_increment(bgp, p, new, afi, safi); bgp_path_info_add(bn, new); if (safi == SAFI_MPLS_VPN) { struct bgp_dest *pdest = NULL; struct bgp_table *table = NULL; pdest = bgp_node_get(bgp->rib[afi][safi], (struct prefix *)prd); table = bgp_dest_get_bgp_table_info(pdest); if (table) vnc_import_bgp_add_vnc_host_route_mode_resolve_nve( bgp, prd, table, p, new); bgp_dest_unlock_node(pdest); encode_label(label_val, &bn->local_label); } bgp_dest_unlock_node(bn); bgp_process(bgp, bn, new, afi, safi); vnc_zlog_debug_any( "%s: Added route (safi=%s) at prefix %s (bn=%p, prd=%pRDP)", __func__, safi2str(safi), buf, bn, prd); done: /* Loop back to import tables */ rfapiProcessUpdate(rfd->peer, rfd, p, prd, new_attr, afi, safi, type, sub_type, &label_val); vnc_zlog_debug_verbose("%s: looped back import route (safi=%d)", __func__, safi); } uint32_t rfp_cost_to_localpref(uint8_t cost) { return 255 - cost; } static void rfapiTunnelRouteAnnounce(struct bgp *bgp, struct rfapi_descriptor *rfd, uint32_t *pLifetime) { struct prefix_rd prd; struct prefix pfx_vn; int rc; uint32_t local_pref = rfp_cost_to_localpref(0); rc = rfapiRaddr2Qprefix(&(rfd->vn_addr), &pfx_vn); assert(!rc); /* * Construct route distinguisher = 0 */ memset(&prd, 0, sizeof(prd)); prd.family = AF_UNSPEC; prd.prefixlen = 64; add_vnc_route(rfd, /* rfapi descr, for export list & backref */ bgp, /* which bgp instance */ SAFI_ENCAP, /* which SAFI */ &pfx_vn, /* prefix to advertise */ &prd, /* route distinguisher to use */ &rfd->un_addr, /* nexthop */ &local_pref, pLifetime, /* max lifetime of child VPN routes */ NULL, /* no rfp options for ENCAP safi */ NULL, /* rfp un options */ NULL, /* rfp vn options */ rfd->rt_export_list, NULL, /* med */ NULL, /* label: default */ ZEBRA_ROUTE_BGP, BGP_ROUTE_RFP, 0); } /*********************************************************************** * RFP processing behavior configuration ***********************************************************************/ /*------------------------------------------ * rfapi_rfp_set_configuration * * This is used to change rfapi's processing behavior based on * RFP requirements. * * input: * rfp_start_val value returned by rfp_start * rfapi_rfp_cfg Pointer to configuration structure * * output: * none * * return value: * 0 Success * ENXIO Unabled to locate configured BGP/VNC --------------------------------------------*/ int rfapi_rfp_set_configuration(void *rfp_start_val, struct rfapi_rfp_cfg *new) { struct rfapi_rfp_cfg *rcfg; struct bgp *bgp; bgp = rfapi_bgp_lookup_by_rfp(rfp_start_val); if (!new || !bgp || !bgp->rfapi_cfg) return ENXIO; rcfg = &bgp->rfapi_cfg->rfp_cfg; rcfg->download_type = new->download_type; rcfg->ftd_advertisement_interval = new->ftd_advertisement_interval; rcfg->holddown_factor = new->holddown_factor; if (rcfg->use_updated_response != new->use_updated_response) { rcfg->use_updated_response = new->use_updated_response; if (rcfg->use_updated_response) rfapiMonitorCallbacksOn(bgp); else rfapiMonitorCallbacksOff(bgp); } if (rcfg->use_removes != new->use_removes) { rcfg->use_removes = new->use_removes; if (rcfg->use_removes) rfapiMonitorResponseRemovalOn(bgp); else rfapiMonitorResponseRemovalOff(bgp); } return 0; } /*------------------------------------------ * rfapi_rfp_set_cb_methods * * Change registered callback functions for asynchronous notifications * from RFAPI to the RFP client. * * input: * rfp_start_val value returned by rfp_start * methods Pointer to struct rfapi_rfp_cb_methods containing * pointers to callback methods as described above * * return value: * 0 Success * ENXIO BGP or VNC not configured *------------------------------------------*/ int rfapi_rfp_set_cb_methods(void *rfp_start_val, struct rfapi_rfp_cb_methods *methods) { struct rfapi *h; struct bgp *bgp; bgp = rfapi_bgp_lookup_by_rfp(rfp_start_val); if (!bgp) return ENXIO; h = bgp->rfapi; if (!h) return ENXIO; h->rfp_methods = *methods; return 0; } /*********************************************************************** * NVE Sessions ***********************************************************************/ /* * Caller must supply an already-allocated rfd with the "caller" * fields already set (vn_addr, un_addr, callback, cookie) * The advertised_prefixes[] array elements should be NULL to * have this function set them to newly-allocated radix trees. */ static int rfapi_open_inner(struct rfapi_descriptor *rfd, struct bgp *bgp, struct rfapi *h, struct rfapi_nve_group_cfg *rfg) { int ret; if (h->flags & RFAPI_INCALLBACK) return EDEADLK; /* * Fill in configured fields */ /* * If group's RD is specified as "auto", then fill in based * on NVE's VN address */ rfd->rd = rfg->rd; if (rfd->rd.family == AF_UNIX) { ret = rfapi_set_autord_from_vn(&rfd->rd, &rfd->vn_addr); if (ret != 0) return ret; } rfd->rt_export_list = (rfg->rt_export_list) ? ecommunity_dup(rfg->rt_export_list) : NULL; rfd->response_lifetime = rfg->response_lifetime; rfd->rfg = rfg; /* * Fill in BGP peer structure */ rfd->peer = peer_new(bgp); rfd->peer->connection->status = Established; /* keep bgp core happy */ bgp_peer_connection_buffers_free(rfd->peer->connection); { /* base code assumes have valid host pointer */ char buf[INET6_ADDRSTRLEN]; buf[0] = 0; if (rfd->vn_addr.addr_family == AF_INET) { inet_ntop(AF_INET, &rfd->vn_addr.addr.v4, buf, sizeof(buf)); } else if (rfd->vn_addr.addr_family == AF_INET6) { inet_ntop(AF_INET6, &rfd->vn_addr.addr.v6, buf, sizeof(buf)); } rfd->peer->host = XSTRDUP(MTYPE_BGP_PEER_HOST, buf); } /* Mark peer as belonging to HD */ SET_FLAG(rfd->peer->flags, PEER_FLAG_IS_RFAPI_HD); /* * Set min prefix lifetime to max value so it will get set * upon first rfapi_register() */ rfd->min_prefix_lifetime = UINT32_MAX; /* * Allocate response tables if needed */ #define RFD_RTINIT_AFI(rh, ary, afi) \ do { \ if (!ary[afi]) { \ ary[afi] = agg_table_init(); \ agg_set_table_info(ary[afi], rh); \ } \ } while (0) #define RFD_RTINIT(rh, ary) \ do { \ RFD_RTINIT_AFI(rh, ary, AFI_IP); \ RFD_RTINIT_AFI(rh, ary, AFI_IP6); \ RFD_RTINIT_AFI(rh, ary, AFI_L2VPN); \ } while (0) RFD_RTINIT(rfd, rfd->rib); RFD_RTINIT(rfd, rfd->rib_pending); RFD_RTINIT(rfd, rfd->rsp_times); /* * Link to Import Table */ rfd->import_table = rfg->rfapi_import_table; rfd->import_table->refcount += 1; rfapiApInit(&rfd->advertised); /* * add this NVE descriptor to the list of NVEs in the NVE group */ if (!rfg->nves) { rfg->nves = list_new(); } listnode_add(rfg->nves, rfd); vnc_direct_bgp_add_nve(bgp, rfd); vnc_zebra_add_nve(bgp, rfd); return 0; } /* moved from rfapi_register */ int rfapi_init_and_open(struct bgp *bgp, struct rfapi_descriptor *rfd, struct rfapi_nve_group_cfg *rfg) { struct rfapi *h = bgp->rfapi; char buf_vn[BUFSIZ]; char buf_un[BUFSIZ]; afi_t afi_vn, afi_un; struct prefix pfx_un; struct agg_node *rn; rfd->open_time = monotime(NULL); if (rfg->type == RFAPI_GROUP_CFG_VRF) SET_FLAG(rfd->flags, RFAPI_HD_FLAG_IS_VRF); rfapiRfapiIpAddr2Str(&rfd->vn_addr, buf_vn, BUFSIZ); rfapiRfapiIpAddr2Str(&rfd->un_addr, buf_un, BUFSIZ); vnc_zlog_debug_verbose("%s: new RFD with VN=%s UN=%s cookie=%p", __func__, buf_vn, buf_un, rfd->cookie); if (rfg->type != RFAPI_GROUP_CFG_VRF) /* unclear if needed for VRF */ { listnode_add(&h->descriptors, rfd); if (h->descriptors.count > h->stat.max_descriptors) { h->stat.max_descriptors = h->descriptors.count; } /* * attach to UN radix tree */ afi_vn = family2afi(rfd->vn_addr.addr_family); afi_un = family2afi(rfd->un_addr.addr_family); assert(afi_vn && afi_un); assert(!rfapiRaddr2Qprefix(&rfd->un_addr, &pfx_un)); rn = agg_node_get(h->un[afi_un], &pfx_un); assert(rn); rfd->next = rn->info; rn->info = rfd; rfd->un_node = rn; } return rfapi_open_inner(rfd, bgp, h, rfg); } struct rfapi_vn_option *rfapiVnOptionsDup(struct rfapi_vn_option *orig) { struct rfapi_vn_option *head = NULL; struct rfapi_vn_option *tail = NULL; struct rfapi_vn_option *vo = NULL; for (vo = orig; vo; vo = vo->next) { struct rfapi_vn_option *new; new = XCALLOC(MTYPE_RFAPI_VN_OPTION, sizeof(struct rfapi_vn_option)); memcpy(new, vo, sizeof(struct rfapi_vn_option)); new->next = NULL; if (tail) { tail->next = new; } else { head = tail = new; } } return head; } struct rfapi_un_option *rfapiUnOptionsDup(struct rfapi_un_option *orig) { struct rfapi_un_option *head = NULL; struct rfapi_un_option *tail = NULL; struct rfapi_un_option *uo = NULL; for (uo = orig; uo; uo = uo->next) { struct rfapi_un_option *new; new = XCALLOC(MTYPE_RFAPI_UN_OPTION, sizeof(struct rfapi_un_option)); memcpy(new, uo, sizeof(struct rfapi_un_option)); new->next = NULL; if (tail) { tail->next = new; } else { head = tail = new; } } return head; } struct bgp_tea_options *rfapiOptionsDup(struct bgp_tea_options *orig) { struct bgp_tea_options *head = NULL; struct bgp_tea_options *tail = NULL; struct bgp_tea_options *hop = NULL; for (hop = orig; hop; hop = hop->next) { struct bgp_tea_options *new; new = XCALLOC(MTYPE_BGP_TEA_OPTIONS, sizeof(struct bgp_tea_options)); memcpy(new, hop, sizeof(struct bgp_tea_options)); new->next = NULL; if (hop->value) { new->value = XCALLOC(MTYPE_BGP_TEA_OPTIONS_VALUE, hop->length); memcpy(new->value, hop->value, hop->length); } if (tail) { tail->next = new; } else { head = tail = new; } } return head; } void rfapiFreeBgpTeaOptionChain(struct bgp_tea_options *p) { struct bgp_tea_options *next; while (p) { next = p->next; XFREE(MTYPE_BGP_TEA_OPTIONS_VALUE, p->value); XFREE(MTYPE_BGP_TEA_OPTIONS, p); p = next; } } void rfapiAdbFree(struct rfapi_adb *adb) { XFREE(MTYPE_RFAPI_ADB, adb); } static int rfapi_query_inner(void *handle, struct rfapi_ip_addr *target, struct rfapi_l2address_option *l2o, /* may be NULL */ struct rfapi_next_hop_entry **ppNextHopEntry) { afi_t afi; struct prefix p; struct prefix p_original; struct agg_node *rn; struct rfapi_descriptor *rfd = (struct rfapi_descriptor *)handle; struct bgp *bgp = rfd->bgp; struct rfapi_next_hop_entry *pNHE = NULL; struct rfapi_ip_addr *self_vn_addr = NULL; int eth_is_0 = 0; int use_eth_resolution = 0; struct rfapi_next_hop_entry *i_nhe; /* preemptive */ if (!bgp) { vnc_zlog_debug_verbose("%s: No BGP instance, returning ENXIO", __func__); return ENXIO; } if (!bgp->rfapi) { vnc_zlog_debug_verbose("%s: No RFAPI instance, returning ENXIO", __func__); return ENXIO; } if (bgp->rfapi->flags & RFAPI_INCALLBACK) { vnc_zlog_debug_verbose( "%s: Called during calback, returning EDEADLK", __func__); return EDEADLK; } if (!is_valid_rfd(rfd)) { vnc_zlog_debug_verbose("%s: invalid handle, returning EBADF", __func__); return EBADF; } rfd->rsp_counter++; /* dedup: identify this generation */ rfd->rsp_time = monotime(NULL); /* response content dedup */ rfd->ftd_last_allowed_time = monotime(NULL) - bgp->rfapi_cfg->rfp_cfg.ftd_advertisement_interval; if (l2o) { if (!memcmp(l2o->macaddr.octet, rfapi_ethaddr0.octet, ETH_ALEN)) { eth_is_0 = 1; } /* per t/c Paul/Lou 151022 */ if (!eth_is_0 || l2o->logical_net_id) { use_eth_resolution = 1; } } if (ppNextHopEntry) *ppNextHopEntry = NULL; /* * Save original target in prefix form. In case of L2-based queries, * p_original will be modified to reflect the L2 target */ assert(!rfapiRaddr2Qprefix(target, &p_original)); if (bgp->rfapi_cfg->rfp_cfg.download_type == RFAPI_RFP_DOWNLOAD_FULL) { /* convert query to 0/0 when full-table download is enabled */ memset((char *)&p, 0, sizeof(p)); p.family = target->addr_family; } else { p = p_original; } { char *s; vnc_zlog_debug_verbose("%s(rfd=%p, target=%pFX, ppNextHop=%p)", __func__, rfd, &p, ppNextHopEntry); s = ecommunity_ecom2str(rfd->import_table->rt_import_list, ECOMMUNITY_FORMAT_ROUTE_MAP, 0); vnc_zlog_debug_verbose( "%s rfd->import_table=%p, rfd->import_table->rt_import_list: %s", __func__, rfd->import_table, s); XFREE(MTYPE_ECOMMUNITY_STR, s); } afi = family2afi(p.family); assert(afi); if (CHECK_FLAG(bgp->rfapi_cfg->flags, BGP_VNC_CONFIG_FILTER_SELF_FROM_RSP)) { self_vn_addr = &rfd->vn_addr; } if (use_eth_resolution) { uint32_t logical_net_id = l2o->logical_net_id; struct ecommunity *l2com; /* * fix up p_original to contain L2 address */ rfapiL2o2Qprefix(l2o, &p_original); l2com = bgp_rfapi_get_ecommunity_by_lni_label( bgp, 1, logical_net_id, l2o->label); if (l2com) { uint8_t *v = l2com->val; logical_net_id = (v[5] << 16) + (v[6] << 8) + (v[7]); } /* * Ethernet/L2-based lookup * * Always returns IT node corresponding to route */ if (RFAPI_RFP_DOWNLOAD_FULL == bgp->rfapi_cfg->rfp_cfg.download_type) { eth_is_0 = 1; } rn = rfapiMonitorEthAdd( bgp, rfd, (eth_is_0 ? &rfapi_ethaddr0 : &l2o->macaddr), logical_net_id); if (eth_is_0) { struct rfapi_ip_prefix rprefix; memset(&rprefix, 0, sizeof(rprefix)); rprefix.prefix.addr_family = target->addr_family; if (target->addr_family == AF_INET) { rprefix.length = IPV4_MAX_BITLEN; } else { rprefix.length = IPV6_MAX_BITLEN; } pNHE = rfapiEthRouteTable2NextHopList( logical_net_id, &rprefix, rfd->response_lifetime, self_vn_addr, rfd->rib[afi], &p_original); goto done; } } else { /* * IP-based lookup */ rn = rfapiMonitorAdd(bgp, rfd, &p); /* * If target address is 0, this request is special: means to * return ALL routes in the table * * Monitors for All-Routes queries get put on a special list, * not in the VPN tree */ if (RFAPI_0_PREFIX(&p)) { vnc_zlog_debug_verbose("%s: 0-prefix", __func__); /* * Generate nexthop list for caller */ pNHE = rfapiRouteTable2NextHopList( rfd->import_table->imported_vpn[afi], rfd->response_lifetime, self_vn_addr, rfd->rib[afi], &p_original); goto done; } if (rn) { agg_lock_node(rn); /* so we can unlock below */ } else { /* * returns locked node. Don't unlock yet because the * unlock * might free it before we're done with it. This * situation * could occur when rfapiMonitorGetAttachNode() returns * a * newly-created default node. */ rn = rfapiMonitorGetAttachNode(rfd, &p); } } assert(rn); if (!rn->info) { agg_unlock_node(rn); vnc_zlog_debug_verbose( "%s: VPN route not found, returning ENOENT", __func__); return ENOENT; } if (VNC_DEBUG(RFAPI_QUERY)) { rfapiShowImportTable(NULL, "query", rfd->import_table->imported_vpn[afi], 1); } if (use_eth_resolution) { struct rfapi_ip_prefix rprefix; memset(&rprefix, 0, sizeof(rprefix)); rprefix.prefix.addr_family = target->addr_family; if (target->addr_family == AF_INET) { rprefix.length = IPV4_MAX_BITLEN; } else { rprefix.length = IPV6_MAX_BITLEN; } pNHE = rfapiEthRouteNode2NextHopList( rn, &rprefix, rfd->response_lifetime, self_vn_addr, rfd->rib[afi], &p_original); } else { /* * Generate answer to query */ pNHE = rfapiRouteNode2NextHopList(rn, rfd->response_lifetime, self_vn_addr, rfd->rib[afi], &p_original); } agg_unlock_node(rn); done: if (ppNextHopEntry) { /* only count if caller gets it */ ++bgp->rfapi->response_immediate_count; } if (!pNHE) { vnc_zlog_debug_verbose("%s: NO NHEs, returning ENOENT", __func__); return ENOENT; } /* * count nexthops for statistics */ for (i_nhe = pNHE; i_nhe; i_nhe = i_nhe->next) { ++rfd->stat_count_nh_reachable; } if (ppNextHopEntry) { *ppNextHopEntry = pNHE; } else { rfapi_free_next_hop_list(pNHE); } vnc_zlog_debug_verbose("%s: success", __func__); return 0; } /* * support on-the-fly reassignment of an already-open nve to a new * nve-group in the event that its original nve-group is * administratively deleted. */ static int rfapi_open_rfd(struct rfapi_descriptor *rfd, struct bgp *bgp) { struct prefix pfx_vn; struct prefix pfx_un; struct rfapi_nve_group_cfg *rfg; struct rfapi *h; struct rfapi_cfg *hc; int rc; h = bgp->rfapi; if (!h) return ENXIO; hc = bgp->rfapi_cfg; if (!hc) return ENXIO; rc = rfapiRaddr2Qprefix(&rfd->vn_addr, &pfx_vn); assert(!rc); rc = rfapiRaddr2Qprefix(&rfd->un_addr, &pfx_un); assert(!rc); /* * Find the matching nve group config block */ rfg = bgp_rfapi_cfg_match_group(hc, &pfx_vn, &pfx_un); if (!rfg) { return ENOENT; } /* * check nve group config block for required values */ if (!rfg->rt_export_list || !rfg->rfapi_import_table) { return ENOMSG; } rc = rfapi_open_inner(rfd, bgp, h, rfg); if (rc) { return rc; } /* * re-advertise registered routes, this time as part of new NVE-group */ rfapiApReadvertiseAll(bgp, rfd); /* * re-attach callbacks to import table */ if (!(bgp->rfapi_cfg->flags & BGP_VNC_CONFIG_CALLBACK_DISABLE)) { rfapiMonitorAttachImportHd(rfd); } return 0; } /*------------------------------------------ * rfapi_open * * This function initializes a NVE record and associates it with * the specified VN and underlay network addresses * * input: * rfp_start_val value returned by rfp_start * vn NVE virtual network address * * un NVE underlay network address * * default_options Default options to use on registrations. * For now only tunnel type is supported. * May be overridden per-prefix in rfapi_register(). * Caller owns (rfapi_open() does not free) * * response_cb Pointer to next hop list update callback function or * NULL when no callbacks are desired. * * userdata Passed to subsequent response_cb invocations. * * output: * response_lifetime The length of time that responses sent to this * NVE are valid. * * pHandle pointer to location to store rfapi handle. The * handle must be passed on subsequent rfapi_ calls. * * * return value: * 0 Success * EEXIST NVE with this {vn,un} already open * ENOENT No matching nve group config * ENOMSG Matched nve group config was incomplete * ENXIO BGP or VNC not configured * EAFNOSUPPORT Matched nve group specifies auto-assignment of RD, * but underlay network address is not IPv4 * EDEADLK Called from within a callback procedure *------------------------------------------*/ int rfapi_open(void *rfp_start_val, struct rfapi_ip_addr *vn, struct rfapi_ip_addr *un, struct rfapi_un_option *default_options, uint32_t *response_lifetime, void *userdata, /* callback cookie */ rfapi_handle *pHandle) { struct bgp *bgp; struct rfapi *h; struct rfapi_descriptor *rfd; struct rfapi_cfg *hc; struct rfapi_nve_group_cfg *rfg; struct prefix pfx_vn; struct prefix pfx_un; int rc; rfapi_handle hh = NULL; int reusing_provisional = 0; { char buf[2][INET_ADDRSTRLEN]; vnc_zlog_debug_verbose( "%s: VN=%s UN=%s", __func__, rfapiRfapiIpAddr2Str(vn, buf[0], INET_ADDRSTRLEN), rfapiRfapiIpAddr2Str(un, buf[1], INET_ADDRSTRLEN)); } assert(pHandle); *pHandle = NULL; bgp = rfapi_bgp_lookup_by_rfp(rfp_start_val); if (!bgp) return ENXIO; h = bgp->rfapi; if (!h) return ENXIO; hc = bgp->rfapi_cfg; if (!hc) return ENXIO; if (h->flags & RFAPI_INCALLBACK) return EDEADLK; rc = rfapiRaddr2Qprefix(vn, &pfx_vn); assert(!rc); rc = rfapiRaddr2Qprefix(un, &pfx_un); assert(!rc); /* * already have a descriptor with VN and UN? */ if (!rfapi_find_handle(bgp, vn, un, &hh)) { /* * we might have set up a handle for static routes before * this NVE was opened. In that case, reuse the handle */ rfd = hh; if (!CHECK_FLAG(rfd->flags, RFAPI_HD_FLAG_PROVISIONAL)) { return EEXIST; } /* * reuse provisional descriptor * hh is not NULL */ reusing_provisional = 1; } /* * Find the matching nve group config block */ rfg = bgp_rfapi_cfg_match_group(hc, &pfx_vn, &pfx_un); if (!rfg) { ++h->stat.count_unknown_nves; { char buf[2][INET_ADDRSTRLEN]; zlog_notice("%s: no matching group VN=%s UN=%s", __func__, rfapiRfapiIpAddr2Str(vn, buf[0], INET_ADDRSTRLEN), rfapiRfapiIpAddr2Str(un, buf[1], INET_ADDRSTRLEN)); } return ENOENT; } /* * check nve group config block for required values */ if (!rfg->rt_export_list || !rfg->rfapi_import_table) { ++h->stat.count_unknown_nves; return ENOMSG; } /* * If group config specifies auto-rd assignment, check that * VN address is IPv4|v6 so we don't fail in rfapi_open_inner(). * Check here so we don't need to unwind memory allocations, &c. */ if ((rfg->rd.family == AF_UNIX) && (vn->addr_family != AF_INET) && (vn->addr_family != AF_INET6)) { return EAFNOSUPPORT; } if (hh) { /* * reusing provisional rfd */ rfd = hh; } else { rfd = XCALLOC(MTYPE_RFAPI_DESC, sizeof(struct rfapi_descriptor)); } rfd->bgp = bgp; if (default_options) { struct rfapi_un_option *p; for (p = default_options; p; p = p->next) { if ((RFAPI_UN_OPTION_TYPE_PROVISIONAL == p->type)) { rfd->flags |= RFAPI_HD_FLAG_PROVISIONAL; } if ((RFAPI_UN_OPTION_TYPE_TUNNELTYPE == p->type)) { rfd->default_tunneltype_option = p->v.tunnel; } } } /* * Fill in caller fields */ rfd->vn_addr = *vn; rfd->un_addr = *un; rfd->cookie = userdata; if (!reusing_provisional) { rc = rfapi_init_and_open(bgp, rfd, rfg); /* * This can fail only if the VN address is IPv6 and the group * specified auto-assignment of RDs, which only works for v4, * and the check above should catch it. * * Another failure possibility is that we were called * during an rfapi callback. Also checked above. */ assert(!rc); } if (response_lifetime) *response_lifetime = rfd->response_lifetime; *pHandle = rfd; return 0; } /* * For use with debug functions */ static int rfapi_set_response_cb(struct rfapi_descriptor *rfd, rfapi_response_cb_t *response_cb) { if (!is_valid_rfd(rfd)) return EBADF; rfd->response_cb = response_cb; return 0; } /* * rfapi_close_inner * * Does almost all the work of rfapi_close, except: * 1. preserves the descriptor (doesn't free it) * 2. preserves the prefix query list (i.e., rfd->mon list) * 3. preserves the advertised prefix list (rfd->advertised) * 4. preserves the rib and rib_pending tables * * The purpose of organizing it this way is to support on-the-fly * reassignment of an already-open nve to a new nve-group in the * event that its original nve-group is administratively deleted. */ static int rfapi_close_inner(struct rfapi_descriptor *rfd, struct bgp *bgp) { int rc; struct prefix pfx_vn; struct prefix_rd prd; /* currently always 0 for VN->UN */ if (!is_valid_rfd(rfd)) return EBADF; rc = rfapiRaddr2Qprefix(&rfd->vn_addr, &pfx_vn); assert(!rc); /* should never have bad AF in stored vn address */ /* * update exported routes to reflect disappearance of this NVE as * nexthop */ vnc_direct_bgp_del_nve(bgp, rfd); vnc_zebra_del_nve(bgp, rfd); /* * unlink this HD's monitors from import table */ rfapiMonitorDetachImportHd(rfd); /* * Unlink from Import Table * NB rfd->import_table will be NULL if we are closing a stale * descriptor */ if (rfd->import_table) rfapiImportTableRefDelByIt(bgp, rfd->import_table); rfd->import_table = NULL; /* * Construct route distinguisher */ memset(&prd, 0, sizeof(prd)); prd = rfd->rd; prd.family = AF_UNSPEC; prd.prefixlen = 64; /* * withdraw tunnel */ del_vnc_route(rfd, rfd->peer, bgp, SAFI_ENCAP, &pfx_vn, /* prefix being advertised */ &prd, /* route distinguisher to use (0 for ENCAP) */ ZEBRA_ROUTE_BGP, BGP_ROUTE_RFP, NULL, 0); /* no kill */ /* * Construct route distinguisher for VPN routes */ prd = rfd->rd; prd.family = AF_UNSPEC; prd.prefixlen = 64; /* * find all VPN routes associated with this rfd and delete them, too */ rfapiApWithdrawAll(bgp, rfd); /* * remove this nve descriptor from the list of nves * associated with the nve group */ if (rfd->rfg) { listnode_delete(rfd->rfg->nves, rfd); rfd->rfg = NULL; /* XXX mark as orphaned/stale */ } if (rfd->rt_export_list) ecommunity_free(&rfd->rt_export_list); rfd->rt_export_list = NULL; /* * free peer structure (possibly delayed until its * refcount reaches zero) */ if (rfd->peer) { vnc_zlog_debug_verbose("%s: calling peer_delete(%p), #%d", __func__, rfd->peer, rfd->peer->lock); peer_delete(rfd->peer); } rfd->peer = NULL; return 0; } int rfapi_close(void *handle) { struct rfapi_descriptor *rfd = (struct rfapi_descriptor *)handle; int rc; struct agg_node *node; struct bgp *bgp; struct rfapi *h; vnc_zlog_debug_verbose("%s: rfd=%p", __func__, rfd); #ifdef RFAPI_WHO_IS_CALLING_ME zlog_backtrace(LOG_INFO); #endif bgp = rfd->bgp; if (!bgp) return ENXIO; h = bgp->rfapi; if (!h) return ENXIO; if (!is_valid_rfd(rfd)) return EBADF; if (h->flags & RFAPI_INCALLBACK) { /* * Queue these close requests for processing after callback * is finished */ if (!CHECK_FLAG(rfd->flags, RFAPI_HD_FLAG_CLOSING_ADMINISTRATIVELY)) { work_queue_add(h->deferred_close_q, handle); vnc_zlog_debug_verbose( "%s: added handle %p to deferred close queue", __func__, handle); } return 0; } if (CHECK_FLAG(rfd->flags, RFAPI_HD_FLAG_CLOSING_ADMINISTRATIVELY)) { vnc_zlog_debug_verbose("%s administrative close rfd=%p", __func__, rfd); if (h->rfp_methods.close_cb) { vnc_zlog_debug_verbose( "%s calling close callback rfd=%p", __func__, rfd); /* * call the callback fairly early so that it can still * lookup un/vn * from handle, etc. * * NB RFAPI_INCALLBACK is tested above, so if we reach * this point * we are not already in the context of a callback. */ h->flags |= RFAPI_INCALLBACK; (*h->rfp_methods.close_cb)(handle, EIDRM); h->flags &= ~RFAPI_INCALLBACK; } } if (rfd->rfg) { /* * Orphaned descriptors have already done this part, so do * only for non-orphaned descriptors. */ if ((rc = rfapi_close_inner(rfd, bgp))) return rc; } /* * Remove descriptor from UN index * (remove from chain at node) */ rc = rfapi_find_node(bgp, &rfd->vn_addr, &rfd->un_addr, &node); if (!rc) { struct rfapi_descriptor *hh; if (node->info == rfd) { node->info = rfd->next; } else { for (hh = node->info; hh; hh = hh->next) { if (hh->next == rfd) { hh->next = rfd->next; break; } } } agg_unlock_node(node); } /* * remove from descriptor list */ listnode_delete(&h->descriptors, rfd); /* * Delete monitor list items and free monitor structures */ (void)rfapiMonitorDelHd(rfd); /* * release advertised prefix data */ rfapiApRelease(&rfd->advertised); /* * Release RFP callback RIB */ rfapiRibFree(rfd); /* * free descriptor */ memset(rfd, 0, sizeof(struct rfapi_descriptor)); XFREE(MTYPE_RFAPI_DESC, rfd); return 0; } /* * Reopen a nve descriptor. If the descriptor's NVE-group * does not exist (e.g., if it has been administratively removed), * reassignment to a new NVE-group is attempted. * * If NVE-group reassignment fails, the descriptor becomes "stale" * (rfd->rfg == NULL implies "stale:). The only permissible API operation * on a stale descriptor is rfapi_close(). Any other rfapi_* API operation * on the descriptor will return ESTALE. * * Reopening a descriptor is a potentially expensive operation, because * it involves withdrawing any routes advertised by the NVE, withdrawing * the NVE's route queries, and then re-adding them all after a new * NVE-group is assigned. There are also possible route-export affects * caused by deleting and then adding the NVE: advertised prefixes * and nexthop lists for exported routes can turn over. */ int rfapi_reopen(struct rfapi_descriptor *rfd, struct bgp *bgp) { struct rfapi *h; int rc; if ((rc = rfapi_close_inner(rfd, bgp))) { return rc; } if ((rc = rfapi_open_rfd(rfd, bgp))) { h = bgp->rfapi; assert(h != NULL && !CHECK_FLAG(h->flags, RFAPI_INCALLBACK)); if (CHECK_FLAG(rfd->flags, RFAPI_HD_FLAG_CLOSING_ADMINISTRATIVELY) && h && h->rfp_methods.close_cb) { /* * NB RFAPI_INCALLBACK is tested above, so if we reach * this point * we are not already in the context of a callback. */ h->flags |= RFAPI_INCALLBACK; (*h->rfp_methods.close_cb)((rfapi_handle)rfd, ESTALE); h->flags &= ~RFAPI_INCALLBACK; } return rc; } return 0; } /*********************************************************************** * NVE Routes ***********************************************************************/ /* * Announce reachability to this prefix via the NVE */ int rfapi_register(void *handle, struct rfapi_ip_prefix *prefix, uint32_t lifetime, /* host byte order */ struct rfapi_un_option *options_un, struct rfapi_vn_option *options_vn, rfapi_register_action action) { struct rfapi_descriptor *rfd = (struct rfapi_descriptor *)handle; struct bgp *bgp; struct prefix p; struct prefix *pfx_ip = NULL; struct prefix_rd prd; afi_t afi; struct prefix pfx_mac_buf; struct prefix *pfx_mac = NULL; struct prefix pfx_vn_buf; const char *action_str = NULL; uint32_t *label = NULL; struct rfapi_vn_option *vo; struct rfapi_l2address_option *l2o = NULL; struct prefix_rd *prd_override = NULL; switch (action) { case RFAPI_REGISTER_ADD: action_str = "add"; break; case RFAPI_REGISTER_WITHDRAW: action_str = "withdraw"; break; case RFAPI_REGISTER_KILL: action_str = "kill"; break; default: assert(0); break; } /* * Inspect VN options */ for (vo = options_vn; vo; vo = vo->next) { if (RFAPI_VN_OPTION_TYPE_L2ADDR == vo->type) { l2o = &vo->v.l2addr; } if (RFAPI_VN_OPTION_TYPE_INTERNAL_RD == vo->type) { prd_override = &vo->v.internal_rd; } } /********************************************************************* * advertise prefix *********************************************************************/ /* * set
based on