/* Routing Information Base. * Copyright (C) 1997, 98, 99, 2001 Kunihiro Ishiguro * * This file is part of GNU Zebra. * * GNU Zebra is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2, or (at your option) any * later version. * * GNU Zebra is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; see the file COPYING; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include "command.h" #include "if.h" #include "linklist.h" #include "log.h" #include "memory.h" #include "mpls.h" #include "nexthop.h" #include "prefix.h" #include "prefix.h" #include "routemap.h" #include "sockunion.h" #include "srcdest_table.h" #include "table.h" #include "thread.h" #include "vrf.h" #include "workqueue.h" #include "nexthop_group_private.h" #include "frr_pthread.h" #include "printfrr.h" #include "frrscript.h" #include "zebra/zebra_router.h" #include "zebra/connected.h" #include "zebra/debug.h" #include "zebra/interface.h" #include "zebra/redistribute.h" #include "zebra/rib.h" #include "zebra/rt.h" #include "zebra/zapi_msg.h" #include "zebra/zebra_errors.h" #include "zebra/zebra_ns.h" #include "zebra/zebra_rnh.h" #include "zebra/zebra_routemap.h" #include "zebra/zebra_vrf.h" #include "zebra/zebra_vxlan.h" #include "zebra/zapi_msg.h" #include "zebra/zebra_dplane.h" #include "zebra/zebra_evpn_mh.h" #include "zebra/zebra_script.h" DEFINE_MGROUP(ZEBRA, "zebra"); DEFINE_MTYPE(ZEBRA, RE, "Route Entry"); DEFINE_MTYPE_STATIC(ZEBRA, RIB_DEST, "RIB destination"); DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object"); DEFINE_MTYPE_STATIC(ZEBRA, WQ_WRAPPER, "WQ wrapper"); /* * Event, list, and mutex for delivery of dataplane results */ static pthread_mutex_t dplane_mutex; static struct thread *t_dplane; static struct dplane_ctx_q rib_dplane_q; DEFINE_HOOK(rib_update, (struct route_node * rn, const char *reason), (rn, reason)); DEFINE_HOOK(rib_shutdown, (struct route_node * rn), (rn)); /* Meta Q's specific names */ enum meta_queue_indexes { META_QUEUE_NHG, META_QUEUE_EVPN, META_QUEUE_EARLY_ROUTE, META_QUEUE_EARLY_LABEL, META_QUEUE_CONNECTED, META_QUEUE_KERNEL, META_QUEUE_STATIC, META_QUEUE_NOTBGP, META_QUEUE_BGP, META_QUEUE_OTHER, }; /* Each route type's string and default distance value. */ static const struct { int key; uint8_t distance; enum meta_queue_indexes meta_q_map; } route_info[ZEBRA_ROUTE_MAX] = { [ZEBRA_ROUTE_NHG] = {ZEBRA_ROUTE_NHG, 255 /* Unneeded for nhg's */, META_QUEUE_NHG}, [ZEBRA_ROUTE_SYSTEM] = {ZEBRA_ROUTE_SYSTEM, 0, META_QUEUE_KERNEL}, [ZEBRA_ROUTE_KERNEL] = {ZEBRA_ROUTE_KERNEL, 0, META_QUEUE_KERNEL}, [ZEBRA_ROUTE_CONNECT] = {ZEBRA_ROUTE_CONNECT, 0, META_QUEUE_CONNECTED}, [ZEBRA_ROUTE_STATIC] = {ZEBRA_ROUTE_STATIC, 1, META_QUEUE_STATIC}, [ZEBRA_ROUTE_RIP] = {ZEBRA_ROUTE_RIP, 120, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_RIPNG] = {ZEBRA_ROUTE_RIPNG, 120, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_OSPF] = {ZEBRA_ROUTE_OSPF, 110, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_OSPF6] = {ZEBRA_ROUTE_OSPF6, 110, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_ISIS] = {ZEBRA_ROUTE_ISIS, 115, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_BGP] = {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */, META_QUEUE_BGP}, [ZEBRA_ROUTE_PIM] = {ZEBRA_ROUTE_PIM, 255, META_QUEUE_OTHER}, [ZEBRA_ROUTE_EIGRP] = {ZEBRA_ROUTE_EIGRP, 90, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_NHRP] = {ZEBRA_ROUTE_NHRP, 10, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_HSLS] = {ZEBRA_ROUTE_HSLS, 255, META_QUEUE_OTHER}, [ZEBRA_ROUTE_OLSR] = {ZEBRA_ROUTE_OLSR, 255, META_QUEUE_OTHER}, [ZEBRA_ROUTE_TABLE] = {ZEBRA_ROUTE_TABLE, 150, META_QUEUE_STATIC}, [ZEBRA_ROUTE_LDP] = {ZEBRA_ROUTE_LDP, 150, META_QUEUE_OTHER}, [ZEBRA_ROUTE_VNC] = {ZEBRA_ROUTE_VNC, 20, META_QUEUE_BGP}, [ZEBRA_ROUTE_VNC_DIRECT] = {ZEBRA_ROUTE_VNC_DIRECT, 20, META_QUEUE_BGP}, [ZEBRA_ROUTE_VNC_DIRECT_RH] = {ZEBRA_ROUTE_VNC_DIRECT_RH, 20, META_QUEUE_BGP}, [ZEBRA_ROUTE_BGP_DIRECT] = {ZEBRA_ROUTE_BGP_DIRECT, 20, META_QUEUE_BGP}, [ZEBRA_ROUTE_BGP_DIRECT_EXT] = {ZEBRA_ROUTE_BGP_DIRECT_EXT, 20, META_QUEUE_BGP}, [ZEBRA_ROUTE_BABEL] = {ZEBRA_ROUTE_BABEL, 100, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_SHARP] = {ZEBRA_ROUTE_SHARP, 150, META_QUEUE_OTHER}, [ZEBRA_ROUTE_PBR] = {ZEBRA_ROUTE_PBR, 200, META_QUEUE_OTHER}, [ZEBRA_ROUTE_BFD] = {ZEBRA_ROUTE_BFD, 255, META_QUEUE_OTHER}, [ZEBRA_ROUTE_OPENFABRIC] = {ZEBRA_ROUTE_OPENFABRIC, 115, META_QUEUE_NOTBGP}, [ZEBRA_ROUTE_VRRP] = {ZEBRA_ROUTE_VRRP, 255, META_QUEUE_OTHER}, [ZEBRA_ROUTE_SRTE] = {ZEBRA_ROUTE_SRTE, 255, META_QUEUE_OTHER}, [ZEBRA_ROUTE_ALL] = {ZEBRA_ROUTE_ALL, 255, META_QUEUE_OTHER}, /* Any new route type added to zebra, should be mirrored here */ /* no entry/default: 150 */ }; /* Wrapper struct for nhg workqueue items; a 'ctx' is an incoming update * from the OS, and an 'nhe' is a nhe update. */ struct wq_nhg_wrapper { int type; union { struct nhg_ctx *ctx; struct nhg_hash_entry *nhe; } u; }; #define WQ_NHG_WRAPPER_TYPE_CTX 0x01 #define WQ_NHG_WRAPPER_TYPE_NHG 0x02 /* Wrapper structs for evpn/vxlan workqueue items. */ struct wq_evpn_wrapper { int type; bool add_p; vrf_id_t vrf_id; bool esr_rxed; uint8_t df_alg; uint16_t df_pref; uint32_t flags; uint32_t seq; esi_t esi; vni_t vni; struct ipaddr ip; struct ethaddr macaddr; struct prefix prefix; struct in_addr vtep_ip; }; #define WQ_EVPN_WRAPPER_TYPE_VRFROUTE 0x01 #define WQ_EVPN_WRAPPER_TYPE_REM_ES 0x02 #define WQ_EVPN_WRAPPER_TYPE_REM_MACIP 0x03 #define WQ_EVPN_WRAPPER_TYPE_REM_VTEP 0x04 enum wq_label_types { WQ_LABEL_FTN_UNINSTALL, WQ_LABEL_LABELS_PROCESS, }; struct wq_label_wrapper { enum wq_label_types type; vrf_id_t vrf_id; struct prefix p; enum lsp_types_t ltype; uint8_t route_type; uint8_t route_instance; bool add_p; struct zapi_labels zl; int afi; }; static void rib_addnode(struct route_node *rn, struct route_entry *re, int process); /* %pRN is already a printer for route_nodes that just prints the prefix */ #ifdef _FRR_ATTRIBUTE_PRINTFRR #pragma FRR printfrr_ext "%pZN" (struct route_node *) #endif static const char *subqueue2str(enum meta_queue_indexes index) { switch (index) { case META_QUEUE_NHG: return "NHG Objects"; case META_QUEUE_EVPN: return "EVPN/VxLan Objects"; case META_QUEUE_EARLY_ROUTE: return "Early Route Processing"; case META_QUEUE_EARLY_LABEL: return "Early Label Handling"; case META_QUEUE_CONNECTED: return "Connected Routes"; case META_QUEUE_KERNEL: return "Kernel Routes"; case META_QUEUE_STATIC: return "Static Routes"; case META_QUEUE_NOTBGP: return "RIP/OSPF/ISIS/EIGRP/NHRP Routes"; case META_QUEUE_BGP: return "BGP Routes"; case META_QUEUE_OTHER: return "Other Routes"; } return "Unknown"; } printfrr_ext_autoreg_p("ZN", printfrr_zebra_node); static ssize_t printfrr_zebra_node(struct fbuf *buf, struct printfrr_eargs *ea, const void *ptr) { struct route_node *rn = (struct route_node *)ptr; ssize_t rv = 0; /* just the table number? */ if (ea->fmt[0] == 't') { rib_dest_t *dest; struct route_entry *re = NULL; ea->fmt++; if (!rn) return bputch(buf, '!'); dest = rib_dest_from_rnode(rn); if (dest) re = re_list_first(&dest->routes); if (re) rv += bprintfrr(buf, "%u", re->table); else rv += bputch(buf, '?'); } else { char cbuf[PREFIX_STRLEN * 2 + 6]; struct rib_table_info *info; if (!rn) return bputs(buf, "{(route_node *) NULL}"); srcdest_rnode2str(rn, cbuf, sizeof(cbuf)); rv += bputs(buf, cbuf); info = srcdest_rnode_table_info(rn); if (info->safi == SAFI_MULTICAST) rv += bputs(buf, " (MRIB)"); } return rv; } #define rnode_debug(node, vrf_id, msg, ...) \ zlog_debug("%s: (%u:%pZNt):%pZN: " msg, __func__, vrf_id, node, node, \ ##__VA_ARGS__) #define rnode_info(node, vrf_id, msg, ...) \ zlog_info("%s: (%u:%pZNt):%pZN: " msg, __func__, vrf_id, node, node, \ ##__VA_ARGS__) static char *_dump_re_status(const struct route_entry *re, char *buf, size_t len) { if (re->status == 0) { snprintfrr(buf, len, "None "); return buf; } snprintfrr( buf, len, "%s%s%s%s%s%s%s", CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED) ? "Removed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED) ? "Changed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED) ? "Label Changed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED) ? "Queued " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) ? "Installed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED) ? "Failed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG) ? "Fib NHG " : ""); return buf; } uint8_t route_distance(int type) { uint8_t distance; if ((unsigned)type >= array_size(route_info)) distance = 150; else distance = route_info[type].distance; return distance; } int is_zebra_valid_kernel_table(uint32_t table_id) { #ifdef linux if ((table_id == RT_TABLE_UNSPEC) || (table_id == RT_TABLE_LOCAL) || (table_id == RT_TABLE_COMPAT)) return 0; #endif return 1; } int is_zebra_main_routing_table(uint32_t table_id) { if (table_id == RT_TABLE_MAIN) return 1; return 0; } int zebra_check_addr(const struct prefix *p) { if (p->family == AF_INET) { uint32_t addr; addr = p->u.prefix4.s_addr; addr = ntohl(addr); if (IPV4_NET127(addr) || IN_CLASSD(addr) || IPV4_LINKLOCAL(addr)) return 0; } if (p->family == AF_INET6) { if (IN6_IS_ADDR_LOOPBACK(&p->u.prefix6)) return 0; if (IN6_IS_ADDR_LINKLOCAL(&p->u.prefix6)) return 0; } return 1; } static void route_entry_attach_ref(struct route_entry *re, struct nhg_hash_entry *new) { re->nhe = new; re->nhe_id = new->id; re->nhe_installed_id = 0; zebra_nhg_increment_ref(new); } /* Replace (if 'new_nhghe') or clear (if that's NULL) an re's nhe. */ int route_entry_update_nhe(struct route_entry *re, struct nhg_hash_entry *new_nhghe) { int ret = 0; struct nhg_hash_entry *old_nhg = NULL; if (new_nhghe == NULL) { old_nhg = re->nhe; re->nhe_id = 0; re->nhe_installed_id = 0; re->nhe = NULL; goto done; } if ((re->nhe_id != 0) && re->nhe && (re->nhe != new_nhghe)) { /* Capture previous nhg, if any */ old_nhg = re->nhe; route_entry_attach_ref(re, new_nhghe); } else if (!re->nhe) /* This is the first time it's being attached */ route_entry_attach_ref(re, new_nhghe); done: /* Detach / deref previous nhg */ if (old_nhg) zebra_nhg_decrement_ref(old_nhg); return ret; } void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, struct nhg_hash_entry *new_entry) { struct zebra_router_table *zrt; struct route_node *rn; struct route_entry *re, *next; if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p", __func__, new_entry->id, new_entry, old_entry); /* We have to do them ALL */ RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) { for (rn = route_top(zrt->table); rn; rn = srcdest_route_next(rn)) { RNODE_FOREACH_RE_SAFE (rn, re, next) { if (re->nhe && re->nhe == old_entry) route_entry_update_nhe(re, new_entry); } } } } struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, const union g_addr *addr, struct route_node **rn_out) { struct prefix p; struct route_table *table; struct route_node *rn; struct route_entry *match = NULL; /* Lookup table. */ table = zebra_vrf_table(afi, safi, vrf_id); if (!table) return 0; memset(&p, 0, sizeof(p)); p.family = afi; if (afi == AFI_IP) { p.u.prefix4 = addr->ipv4; p.prefixlen = IPV4_MAX_BITLEN; } else { p.u.prefix6 = addr->ipv6; p.prefixlen = IPV6_MAX_BITLEN; } rn = route_node_match(table, &p); while (rn) { rib_dest_t *dest; route_unlock_node(rn); dest = rib_dest_from_rnode(rn); if (dest && dest->selected_fib && !CHECK_FLAG(dest->selected_fib->status, ROUTE_ENTRY_REMOVED)) match = dest->selected_fib; /* If there is no selected route or matched route is EGP, go up tree. */ if (!match) { do { rn = rn->parent; } while (rn && rn->info == NULL); if (rn) route_lock_node(rn); } else { if (match->type != ZEBRA_ROUTE_CONNECT) { if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) return NULL; } if (rn_out) *rn_out = rn; return match; } } return NULL; } struct route_entry *rib_match_ipv4_multicast(vrf_id_t vrf_id, struct in_addr addr, struct route_node **rn_out) { struct route_entry *re = NULL, *mre = NULL, *ure = NULL; struct route_node *m_rn = NULL, *u_rn = NULL; union g_addr gaddr = {.ipv4 = addr}; switch (zrouter.ipv4_multicast_mode) { case MCAST_MRIB_ONLY: return rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, rn_out); case MCAST_URIB_ONLY: return rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, rn_out); case MCAST_NO_CONFIG: case MCAST_MIX_MRIB_FIRST: re = mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); if (!mre) re = ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); break; case MCAST_MIX_DISTANCE: mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); if (mre && ure) re = ure->distance < mre->distance ? ure : mre; else if (mre) re = mre; else if (ure) re = ure; break; case MCAST_MIX_PFXLEN: mre = rib_match(AFI_IP, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); ure = rib_match(AFI_IP, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); if (mre && ure) re = u_rn->p.prefixlen > m_rn->p.prefixlen ? ure : mre; else if (mre) re = mre; else if (ure) re = ure; break; } if (rn_out) *rn_out = (re == mre) ? m_rn : u_rn; if (IS_ZEBRA_DEBUG_RIB) { char buf[BUFSIZ]; inet_ntop(AF_INET, &addr, buf, BUFSIZ); zlog_debug("%s: %s: vrf: %s(%u) found %s, using %s", __func__, buf, vrf_id_to_name(vrf_id), vrf_id, mre ? (ure ? "MRIB+URIB" : "MRIB") : ure ? "URIB" : "nothing", re == ure ? "URIB" : re == mre ? "MRIB" : "none"); } return re; } struct route_entry *rib_match_ipv6_multicast(vrf_id_t vrf_id, struct in6_addr addr, struct route_node **rn_out) { struct route_entry *re = NULL, *mre = NULL, *ure = NULL; struct route_node *m_rn = NULL, *u_rn = NULL; union g_addr gaddr = {.ipv6 = addr}; switch (zrouter.ipv4_multicast_mode) { case MCAST_MRIB_ONLY: return rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, rn_out); case MCAST_URIB_ONLY: return rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, rn_out); case MCAST_NO_CONFIG: case MCAST_MIX_MRIB_FIRST: re = mre = rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); if (!mre) re = ure = rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); break; case MCAST_MIX_DISTANCE: mre = rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); ure = rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); if (mre && ure) re = ure->distance < mre->distance ? ure : mre; else if (mre) re = mre; else if (ure) re = ure; break; case MCAST_MIX_PFXLEN: mre = rib_match(AFI_IP6, SAFI_MULTICAST, vrf_id, &gaddr, &m_rn); ure = rib_match(AFI_IP6, SAFI_UNICAST, vrf_id, &gaddr, &u_rn); if (mre && ure) re = u_rn->p.prefixlen > m_rn->p.prefixlen ? ure : mre; else if (mre) re = mre; else if (ure) re = ure; break; } if (rn_out) *rn_out = (re == mre) ? m_rn : u_rn; if (IS_ZEBRA_DEBUG_RIB) zlog_debug("%s: %pI6: vrf: %s(%u) found %s, using %s", __func__, &addr, vrf_id_to_name(vrf_id), vrf_id, mre ? (ure ? "MRIB+URIB" : "MRIB") : ure ? "URIB" : "nothing", re == ure ? "URIB" : re == mre ? "MRIB" : "none"); return re; } struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, vrf_id_t vrf_id) { struct route_table *table; struct route_node *rn; struct route_entry *match = NULL; rib_dest_t *dest; /* Lookup table. */ table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id); if (!table) return 0; rn = route_node_lookup(table, (struct prefix *)p); /* No route for this prefix. */ if (!rn) return NULL; /* Unlock node. */ route_unlock_node(rn); dest = rib_dest_from_rnode(rn); if (dest && dest->selected_fib && !CHECK_FLAG(dest->selected_fib->status, ROUTE_ENTRY_REMOVED)) match = dest->selected_fib; if (!match) return NULL; if (match->type == ZEBRA_ROUTE_CONNECT) return match; if (CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) return match; return NULL; } /* * Is this RIB labeled-unicast? It must be of type BGP and all paths * (nexthops) must have a label. */ int zebra_rib_labeled_unicast(struct route_entry *re) { struct nexthop *nexthop = NULL; if (re->type != ZEBRA_ROUTE_BGP) return 0; for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) if (!nexthop->nh_label || !nexthop->nh_label->num_labels) return 0; return 1; } /* Update flag indicates whether this is a "replace" or not. Currently, this * is only used for IPv4. */ void rib_install_kernel(struct route_node *rn, struct route_entry *re, struct route_entry *old) { struct nexthop *nexthop; struct rib_table_info *info = srcdest_rnode_table_info(rn); struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); const struct prefix *p, *src_p; enum zebra_dplane_result ret; rib_dest_t *dest = rib_dest_from_rnode(rn); srcdest_rnode_prefixes(rn, &p, &src_p); if (info->safi != SAFI_UNICAST) { for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); return; } /* * Install the resolved nexthop object first. */ zebra_nhg_install_kernel(re->nhe); /* * If this is a replace to a new RE let the originator of the RE * know that they've lost */ if (old && (old != re) && (old->type != re->type)) zsend_route_notify_owner(rn, old, ZAPI_ROUTE_BETTER_ADMIN_WON, info->afi, info->safi); /* Update fib selection */ dest->selected_fib = re; /* * Make sure we update the FPM any time we send new information to * the kernel. */ hook_call(rib_update, rn, "installing in kernel"); /* Send add or update */ if (old) ret = dplane_route_update(rn, re, old); else ret = dplane_route_add(rn, re); switch (ret) { case ZEBRA_DPLANE_REQUEST_QUEUED: SET_FLAG(re->status, ROUTE_ENTRY_QUEUED); if (old) { SET_FLAG(old->status, ROUTE_ENTRY_QUEUED); /* Free old FIB nexthop group */ UNSET_FLAG(old->status, ROUTE_ENTRY_USE_FIB_NHG); if (old->fib_ng.nexthop) { nexthops_free(old->fib_ng.nexthop); old->fib_ng.nexthop = NULL; } } if (zvrf) zvrf->installs_queued++; break; case ZEBRA_DPLANE_REQUEST_FAILURE: { flog_err(EC_ZEBRA_DP_INSTALL_FAIL, "%u:%u:%pRN: Failed to enqueue dataplane install", re->vrf_id, re->table, rn); break; } case ZEBRA_DPLANE_REQUEST_SUCCESS: if (zvrf) zvrf->installs++; break; } return; } /* Uninstall the route from kernel. */ void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re) { struct nexthop *nexthop; struct rib_table_info *info = srcdest_rnode_table_info(rn); struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); if (info->safi != SAFI_UNICAST) { UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); return; } /* * Make sure we update the FPM any time we send new information to * the dataplane. */ hook_call(rib_update, rn, "uninstalling from kernel"); switch (dplane_route_delete(rn, re)) { case ZEBRA_DPLANE_REQUEST_QUEUED: if (zvrf) zvrf->removals_queued++; break; case ZEBRA_DPLANE_REQUEST_FAILURE: flog_err(EC_ZEBRA_DP_INSTALL_FAIL, "%u:%pRN: Failed to enqueue dataplane uninstall", re->vrf_id, rn); break; case ZEBRA_DPLANE_REQUEST_SUCCESS: if (zvrf) zvrf->removals++; break; } return; } /* * rib_can_delete_dest * * Returns true if the given dest can be deleted from the table. */ static int rib_can_delete_dest(rib_dest_t *dest) { if (re_list_first(&dest->routes)) { return 0; } /* * Unresolved rnh's are stored on the default route's list * * dest->rnode can also be the source prefix node in an * ipv6 sourcedest table. Fortunately the prefix of a * source prefix node can never be the default prefix. */ if (is_default_prefix(&dest->rnode->p)) return 0; /* * Don't delete the dest if we have to update the FPM about this * prefix. */ if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM) || CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) return 0; return 1; } void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, bool rt_delete) { rib_dest_t *dest = rib_dest_from_rnode(rn); struct rnh *rnh; /* * We are storing the rnh's associated withb * the tracked nexthop as a list of the rn's. * Unresolved rnh's are placed at the top * of the tree list.( 0.0.0.0/0 for v4 and 0::0/0 for v6 ) * As such for each rn we need to walk up the tree * and see if any rnh's need to see if they * would match a more specific route */ while (rn) { if (IS_ZEBRA_DEBUG_NHT_DETAILED) zlog_debug( "%s: %pRN Being examined for Nexthop Tracking Count: %zd", __func__, rn, dest ? rnh_list_count(&dest->nht) : 0); if (rt_delete && (!dest || !rnh_list_count(&dest->nht))) { if (IS_ZEBRA_DEBUG_NHT_DETAILED) zlog_debug("%pRN has no tracking NHTs. Bailing", rn); break; } if (!dest) { rn = rn->parent; if (rn) dest = rib_dest_from_rnode(rn); continue; } /* * If we have any rnh's stored in the nht list * then we know that this route node was used for * nht resolution and as such we need to call the * nexthop tracking evaluation code */ frr_each_safe(rnh_list, &dest->nht, rnh) { struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(rnh->vrf_id); struct prefix *p = &rnh->node->p; if (IS_ZEBRA_DEBUG_NHT_DETAILED) zlog_debug( "%s(%u):%pRN has Nexthop(%pRN) depending on it, evaluating %u:%u", zvrf_name(zvrf), zvrf_id(zvrf), rn, rnh->node, seq, rnh->seqno); /* * If we have evaluated this node on this pass * already, due to following the tree up * then we know that we can move onto the next * rnh to process. * * Additionally we call zebra_evaluate_rnh * when we gc the dest. In this case we know * that there must be no other re's where * we were originally as such we know that * that sequence number is ok to respect. */ if (rnh->seqno == seq) { if (IS_ZEBRA_DEBUG_NHT_DETAILED) zlog_debug( " Node processed and moved already"); continue; } rnh->seqno = seq; zebra_evaluate_rnh(zvrf, family2afi(p->family), 0, p, rnh->safi); } rn = rn->parent; if (rn) dest = rib_dest_from_rnode(rn); } } /* * rib_gc_dest * * Garbage collect the rib dest corresponding to the given route node * if appropriate. * * Returns true if the dest was deleted, false otherwise. */ int rib_gc_dest(struct route_node *rn) { rib_dest_t *dest; dest = rib_dest_from_rnode(rn); if (!dest) return 0; if (!rib_can_delete_dest(dest)) return 0; if (IS_ZEBRA_DEBUG_RIB) { struct zebra_vrf *zvrf; zvrf = rib_dest_vrf(dest); rnode_debug(rn, zvrf_id(zvrf), "removing dest from table"); } zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), true); dest->rnode = NULL; rnh_list_fini(&dest->nht); XFREE(MTYPE_RIB_DEST, dest); rn->info = NULL; /* * Release the one reference that we keep on the route node. */ route_unlock_node(rn); return 1; } void zebra_rtable_node_cleanup(struct route_table *table, struct route_node *node) { struct route_entry *re, *next; RNODE_FOREACH_RE_SAFE (node, re, next) { rib_unlink(node, re); } if (node->info) { rib_dest_t *dest = node->info; /* Remove from update queue of FPM module */ hook_call(rib_shutdown, node); rnh_list_fini(&dest->nht); XFREE(MTYPE_RIB_DEST, node->info); } } static void rib_process_add_fib(struct zebra_vrf *zvrf, struct route_node *rn, struct route_entry *new) { hook_call(rib_update, rn, "new route selected"); /* Update real nexthop. This may actually determine if nexthop is active * or not. */ if (!nexthop_group_active_nexthop_num(&(new->nhe->nhg))) { UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); return; } if (IS_ZEBRA_DEBUG_RIB) zlog_debug("%s(%u:%u):%pRN: Adding route rn %p, re %p (%s)", zvrf_name(zvrf), zvrf_id(zvrf), new->table, rn, rn, new, zebra_route_string(new->type)); /* If labeled-unicast route, install transit LSP. */ if (zebra_rib_labeled_unicast(new)) zebra_mpls_lsp_install(zvrf, rn, new); rib_install_kernel(rn, new, NULL); UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); } static void rib_process_del_fib(struct zebra_vrf *zvrf, struct route_node *rn, struct route_entry *old) { hook_call(rib_update, rn, "removing existing route"); /* Uninstall from kernel. */ if (IS_ZEBRA_DEBUG_RIB) zlog_debug("%s(%u:%u):%pRN: Deleting route rn %p, re %p (%s)", zvrf_name(zvrf), zvrf_id(zvrf), old->table, rn, rn, old, zebra_route_string(old->type)); /* If labeled-unicast route, uninstall transit LSP. */ if (zebra_rib_labeled_unicast(old)) zebra_mpls_lsp_uninstall(zvrf, rn, old); rib_uninstall_kernel(rn, old); /* Update nexthop for route, reset changed flag. */ /* Note: this code also handles the Linux case when an interface goes * down, causing the kernel to delete routes without sending DELROUTE * notifications */ if (RIB_KERNEL_ROUTE(old)) SET_FLAG(old->status, ROUTE_ENTRY_REMOVED); else UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED); } static void rib_process_update_fib(struct zebra_vrf *zvrf, struct route_node *rn, struct route_entry *old, struct route_entry *new) { int nh_active = 0; /* * We have to install or update if a new route has been selected or * something has changed. */ if (new != old || CHECK_FLAG(new->status, ROUTE_ENTRY_CHANGED)) { hook_call(rib_update, rn, "updating existing route"); /* Update the nexthop; we could determine here that nexthop is * inactive. */ if (nexthop_group_active_nexthop_num(&(new->nhe->nhg))) nh_active = 1; /* If nexthop is active, install the selected route, if * appropriate. If * the install succeeds, cleanup flags for prior route, if * different from * newly selected. */ if (nh_active) { if (IS_ZEBRA_DEBUG_RIB) { if (new != old) zlog_debug( "%s(%u:%u):%pRN: Updating route rn %p, re %p (%s) old %p (%s)", zvrf_name(zvrf), zvrf_id(zvrf), new->table, rn, rn, new, zebra_route_string(new->type), old, zebra_route_string(old->type)); else zlog_debug( "%s(%u:%u):%pRN: Updating route rn %p, re %p (%s)", zvrf_name(zvrf), zvrf_id(zvrf), new->table, rn, rn, new, zebra_route_string(new->type)); } /* If labeled-unicast route, uninstall transit LSP. */ if (zebra_rib_labeled_unicast(old)) zebra_mpls_lsp_uninstall(zvrf, rn, old); /* * Non-system route should be installed. * If labeled-unicast route, install transit * LSP. */ if (zebra_rib_labeled_unicast(new)) zebra_mpls_lsp_install(zvrf, rn, new); rib_install_kernel(rn, new, old); } /* * If nexthop for selected route is not active or install * failed, we * may need to uninstall and delete for redistribution. */ if (!nh_active) { if (IS_ZEBRA_DEBUG_RIB) { if (new != old) zlog_debug( "%s(%u:%u):%pRN: Deleting route rn %p, re %p (%s) old %p (%s) - nexthop inactive", zvrf_name(zvrf), zvrf_id(zvrf), new->table, rn, rn, new, zebra_route_string(new->type), old, zebra_route_string(old->type)); else zlog_debug( "%s(%u:%u):%pRN: Deleting route rn %p, re %p (%s) - nexthop inactive", zvrf_name(zvrf), zvrf_id(zvrf), new->table, rn, rn, new, zebra_route_string(new->type)); } /* * When we have gotten to this point * the new route entry has no nexthops * that are usable and as such we need * to remove the old route, but only * if we were the one who installed * the old route */ if (!RIB_SYSTEM_ROUTE(old)) { /* If labeled-unicast route, uninstall transit * LSP. */ if (zebra_rib_labeled_unicast(old)) zebra_mpls_lsp_uninstall(zvrf, rn, old); rib_uninstall_kernel(rn, old); } } } else { /* * Same route selected; check if in the FIB and if not, * re-install. This is housekeeping code to deal with * race conditions in kernel with linux netlink reporting * interface up before IPv4 or IPv6 protocol is ready * to add routes. */ if (!CHECK_FLAG(new->status, ROUTE_ENTRY_INSTALLED) || RIB_SYSTEM_ROUTE(new)) rib_install_kernel(rn, new, NULL); } /* Update prior route. */ if (new != old) UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED); /* Clear changed flag. */ UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); } /* Check if 'alternate' RIB entry is better than 'current'. */ static struct route_entry *rib_choose_best(struct route_entry *current, struct route_entry *alternate) { if (current == NULL) return alternate; /* filter route selection in following order: * - connected beats other types * - if both connected, loopback or vrf wins * - lower distance beats higher * - lower metric beats higher for equal distance * - last, hence oldest, route wins tie break. */ /* Connected routes. Check to see if either are a vrf * or loopback interface. If not, pick the last connected * route of the set of lowest metric connected routes. */ if (alternate->type == ZEBRA_ROUTE_CONNECT) { if (current->type != ZEBRA_ROUTE_CONNECT) return alternate; /* both are connected. are either loop or vrf? */ struct nexthop *nexthop = NULL; for (ALL_NEXTHOPS(alternate->nhe->nhg, nexthop)) { struct interface *ifp = if_lookup_by_index( nexthop->ifindex, alternate->vrf_id); if (ifp && if_is_loopback(ifp)) return alternate; } for (ALL_NEXTHOPS(current->nhe->nhg, nexthop)) { struct interface *ifp = if_lookup_by_index( nexthop->ifindex, current->vrf_id); if (ifp && if_is_loopback(ifp)) return current; } /* Neither are loop or vrf so pick best metric */ if (alternate->metric <= current->metric) return alternate; return current; } if (current->type == ZEBRA_ROUTE_CONNECT) return current; /* higher distance loses */ if (alternate->distance < current->distance) return alternate; if (current->distance < alternate->distance) return current; /* metric tie-breaks equal distance */ if (alternate->metric <= current->metric) return alternate; return current; } /* Core function for processing routing information base. */ static void rib_process(struct route_node *rn) { struct route_entry *re; struct route_entry *next; struct route_entry *old_selected = NULL; struct route_entry *new_selected = NULL; struct route_entry *old_fib = NULL; struct route_entry *new_fib = NULL; struct route_entry *best = NULL; rib_dest_t *dest; struct zebra_vrf *zvrf = NULL; struct vrf *vrf; vrf_id_t vrf_id = VRF_UNKNOWN; assert(rn); dest = rib_dest_from_rnode(rn); /* * We have an enqueued node with nothing to process here * let's just finish up and return; */ if (!dest) return; zvrf = rib_dest_vrf(dest); vrf_id = zvrf_id(zvrf); vrf = vrf_lookup_by_id(vrf_id); /* * we can have rn's that have a NULL info pointer * (dest). As such let's not let the deref happen * additionally we know RNODE_FOREACH_RE_SAFE * will not iterate so we are ok. */ if (IS_ZEBRA_DEBUG_RIB_DETAILED) { struct route_entry *re = re_list_first(&dest->routes); zlog_debug("%s(%u:%u):%pRN: Processing rn %p", VRF_LOGNAME(vrf), vrf_id, re->table, rn, rn); } old_fib = dest->selected_fib; RNODE_FOREACH_RE_SAFE (rn, re, next) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) { char flags_buf[128]; char status_buf[128]; zlog_debug( "%s(%u:%u):%pRN: Examine re %p (%s) status: %sflags: %sdist %d metric %d", VRF_LOGNAME(vrf), vrf_id, re->table, rn, re, zebra_route_string(re->type), _dump_re_status(re, status_buf, sizeof(status_buf)), zclient_dump_route_flags(re->flags, flags_buf, sizeof(flags_buf)), re->distance, re->metric); } /* Currently selected re. */ if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) { assert(old_selected == NULL); old_selected = re; } /* Skip deleted entries from selection */ if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) continue; /* * If the route entry has changed, verify/resolve * the nexthops associated with the entry. * * In any event if we have nexthops that are not active * then we cannot use this particular route entry so * skip it. */ if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) { if (!nexthop_active_update(rn, re)) { const struct prefix *p; struct rib_table_info *info; if (re->type == ZEBRA_ROUTE_TABLE) { /* XXX: HERE BE DRAGONS!!!!! * In all honesty, I have not yet * figured out what this part does or * why the ROUTE_ENTRY_CHANGED test * above is correct or why we need to * delete a route here, and also not * whether this concerns both selected * and fib route, or only selected * or only fib * * This entry was denied by the 'ip * protocol * table' route-map, we need to delete * it */ if (re != old_selected) { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s: %s(%u):%pRN: imported via import-table but denied by the ip protocol table route-map", __func__, VRF_LOGNAME( vrf), vrf_id, rn); rib_unlink(rn, re); } else SET_FLAG(re->status, ROUTE_ENTRY_REMOVED); } info = srcdest_rnode_table_info(rn); srcdest_rnode_prefixes(rn, &p, NULL); zsend_route_notify_owner( rn, re, ZAPI_ROUTE_FAIL_INSTALL, info->afi, info->safi); continue; } } else { /* * If the re has not changed and the nhg we have is * not usable, then we cannot use this route entry * for consideration, as that the route will just * not install if it is selected. */ if (!nexthop_group_active_nexthop_num(&re->nhe->nhg)) continue; } /* Infinite distance. */ if (re->distance == DISTANCE_INFINITY && re->type != ZEBRA_ROUTE_KERNEL) { UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); continue; } if (CHECK_FLAG(re->flags, ZEBRA_FLAG_FIB_OVERRIDE)) { best = rib_choose_best(new_fib, re); if (new_fib && best != new_fib) UNSET_FLAG(new_fib->status, ROUTE_ENTRY_CHANGED); new_fib = best; } else { best = rib_choose_best(new_selected, re); if (new_selected && best != new_selected) UNSET_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED); new_selected = best; } if (best != re) UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); } /* RNODE_FOREACH_RE */ /* If no FIB override route, use the selected route also for FIB */ if (new_fib == NULL) new_fib = new_selected; /* After the cycle is finished, the following pointers will be set: * old_selected --- RE entry currently having SELECTED * new_selected --- RE entry that is newly SELECTED * old_fib --- RE entry currently in kernel FIB * new_fib --- RE entry that is newly to be in kernel FIB * * new_selected will get SELECTED flag, and is going to be redistributed * the zclients. new_fib (which can be new_selected) will be installed * in kernel. */ if (IS_ZEBRA_DEBUG_RIB_DETAILED) { struct route_entry *entry; entry = old_selected ? old_selected : new_selected ? new_selected : old_fib ? old_fib : new_fib ? new_fib : NULL; zlog_debug( "%s(%u:%u):%pRN: After processing: old_selected %p new_selected %p old_fib %p new_fib %p", VRF_LOGNAME(vrf), vrf_id, entry ? entry->table : 0, rn, (void *)old_selected, (void *)new_selected, (void *)old_fib, (void *)new_fib); } /* Buffer ROUTE_ENTRY_CHANGED here, because it will get cleared if * fib == selected */ bool selected_changed = new_selected && CHECK_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED); /* Update SELECTED entry */ if (old_selected != new_selected || selected_changed) { if (new_selected && new_selected != new_fib) UNSET_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED); if (new_selected) SET_FLAG(new_selected->flags, ZEBRA_FLAG_SELECTED); if (old_selected) { /* * If we're removing the old entry, we should tell * redist subscribers about that *if* they aren't * going to see a redist for the new entry. */ if (!new_selected || CHECK_FLAG(old_selected->status, ROUTE_ENTRY_REMOVED)) redistribute_delete(rn, old_selected, new_selected); if (old_selected != new_selected) UNSET_FLAG(old_selected->flags, ZEBRA_FLAG_SELECTED); } } /* Update fib according to selection results */ if (new_fib && old_fib) rib_process_update_fib(zvrf, rn, old_fib, new_fib); else if (new_fib) rib_process_add_fib(zvrf, rn, new_fib); else if (old_fib) rib_process_del_fib(zvrf, rn, old_fib); /* Remove all RE entries queued for removal */ RNODE_FOREACH_RE_SAFE (rn, re, next) { if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { if (IS_ZEBRA_DEBUG_RIB) { rnode_debug(rn, vrf_id, "rn %p, removing re %p", (void *)rn, (void *)re); } rib_unlink(rn, re); } } /* * Check if the dest can be deleted now. */ rib_gc_dest(rn); } static void zebra_rib_evaluate_mpls(struct route_node *rn) { rib_dest_t *dest = rib_dest_from_rnode(rn); struct zebra_vrf *zvrf = vrf_info_lookup(VRF_DEFAULT); if (!dest) return; if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_LSPS)) { if (IS_ZEBRA_DEBUG_MPLS) zlog_debug( "%s(%u): Scheduling all LSPs upon RIB completion", zvrf_name(zvrf), zvrf_id(zvrf)); zebra_mpls_lsp_schedule(zvrf); mpls_unmark_lsps_for_processing(rn); } } /* * Utility to match route with dplane context data */ static bool rib_route_match_ctx(const struct route_entry *re, const struct zebra_dplane_ctx *ctx, bool is_update) { bool result = false; if (is_update) { /* * In 'update' case, we test info about the 'previous' or * 'old' route */ if ((re->type == dplane_ctx_get_old_type(ctx)) && (re->instance == dplane_ctx_get_old_instance(ctx))) { result = true; /* We use an extra test for statics, and another for * kernel routes. */ if (re->type == ZEBRA_ROUTE_STATIC && (re->distance != dplane_ctx_get_old_distance(ctx) || re->tag != dplane_ctx_get_old_tag(ctx))) { result = false; } else if (re->type == ZEBRA_ROUTE_KERNEL && re->metric != dplane_ctx_get_old_metric(ctx)) { result = false; } } } else { /* * Ordinary, single-route case using primary context info */ if ((dplane_ctx_get_op(ctx) != DPLANE_OP_ROUTE_DELETE) && CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { /* Skip route that's been deleted */ goto done; } if ((re->type == dplane_ctx_get_type(ctx)) && (re->instance == dplane_ctx_get_instance(ctx))) { result = true; /* We use an extra test for statics, and another for * kernel routes. */ if (re->type == ZEBRA_ROUTE_STATIC && (re->distance != dplane_ctx_get_distance(ctx) || re->tag != dplane_ctx_get_tag(ctx))) { result = false; } else if (re->type == ZEBRA_ROUTE_KERNEL && re->metric != dplane_ctx_get_metric(ctx)) { result = false; } else if (re->type == ZEBRA_ROUTE_CONNECT) { result = nexthop_group_equal_no_recurse( &re->nhe->nhg, dplane_ctx_get_ng(ctx)); } } } done: return (result); } static void zebra_rib_fixup_system(struct route_node *rn) { struct route_entry *re; RNODE_FOREACH_RE(rn, re) { struct nexthop *nhop; if (!RIB_SYSTEM_ROUTE(re)) continue; if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) continue; SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); for (ALL_NEXTHOPS(re->nhe->nhg, nhop)) { if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; SET_FLAG(nhop->flags, NEXTHOP_FLAG_FIB); } } } /* Route comparison logic, with various special cases. */ static bool rib_compare_routes(const struct route_entry *re1, const struct route_entry *re2) { if (re1->type != re2->type) return false; if (re1->instance != re2->instance) return false; if (re1->type == ZEBRA_ROUTE_KERNEL && re1->metric != re2->metric) return false; if (CHECK_FLAG(re1->flags, ZEBRA_FLAG_RR_USE_DISTANCE) && re1->distance != re2->distance) return false; /* We support multiple connected routes: this supports multiple * v6 link-locals, and we also support multiple addresses in the same * subnet on a single interface. */ if (re1->type != ZEBRA_ROUTE_CONNECT) return true; return false; } /* * Compare nexthop lists from a route and a dplane context; test whether * the list installed in the FIB matches the route's list. * Set 'changed_p' to 'true' if there were changes to the route's * installed nexthops. * * Return 'false' if any ACTIVE route nexthops are not mentioned in the FIB * list. */ static bool rib_update_nhg_from_ctx(struct nexthop_group *re_nhg, const struct nexthop_group *ctx_nhg, bool *changed_p) { bool matched_p = true; struct nexthop *nexthop, *ctx_nexthop; /* Get the first `installed` one to check against. * If the dataplane doesn't set these to be what was actually installed, * it will just be whatever was in re->nhe->nhg? */ ctx_nexthop = ctx_nhg->nexthop; if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_RECURSIVE) || !CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_ACTIVE)) ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop); for (ALL_NEXTHOPS_PTR(re_nhg, nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) continue; /* Check for a FIB nexthop corresponding to the RIB nexthop */ if (!nexthop_same(ctx_nexthop, nexthop)) { /* If the FIB doesn't know about the nexthop, * it's not installed */ if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) { zlog_debug("%s: no ctx match for rib nh %pNHv %s", __func__, nexthop, (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB) ? "(FIB)":"")); } matched_p = false; if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) *changed_p = true; UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); /* Keep checking nexthops */ continue; } if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_FIB)) { if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) { if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: rib nh %pNHv -> installed", __func__, nexthop); *changed_p = true; } SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } else { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) { if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: rib nh %pNHv -> uninstalled", __func__, nexthop); *changed_p = true; } UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } ctx_nexthop = nexthop_next_active_resolved(ctx_nexthop); } return matched_p; } /* * Update a route from a dplane context. This consolidates common code * that can be used in processing of results from FIB updates, and in * async notification processing. * The return is 'true' if the installed nexthops changed; 'false' otherwise. */ static bool rib_update_re_from_ctx(struct route_entry *re, struct route_node *rn, struct zebra_dplane_ctx *ctx) { struct nexthop *nexthop; bool matched; const struct nexthop_group *ctxnhg; struct nexthop_group *re_nhg; bool is_selected = false; /* Is 're' currently the selected re? */ bool changed_p = false; /* Change to nexthops? */ rib_dest_t *dest; struct vrf *vrf; vrf = vrf_lookup_by_id(re->vrf_id); dest = rib_dest_from_rnode(rn); if (dest) is_selected = (re == dest->selected_fib); if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("update_from_ctx: %s(%u:%u):%pRN: %sSELECTED, re %p", VRF_LOGNAME(vrf), re->vrf_id, re->table, rn, (is_selected ? "" : "NOT "), re); /* Update zebra's nexthop FIB flag for each nexthop that was installed. * If the installed set differs from the set requested by the rib/owner, * we use the fib-specific nexthop-group to record the actual FIB * status. */ matched = false; ctxnhg = dplane_ctx_get_ng(ctx); /* Check route's fib group and incoming notif group for equivalence. * * Let's assume the nexthops are ordered here to save time. */ /* TODO -- this isn't testing or comparing the FIB flags; we should * do a more explicit loop, checking the incoming notification's flags. */ if (re->fib_ng.nexthop && ctxnhg->nexthop && nexthop_group_equal(&re->fib_ng, ctxnhg)) matched = true; /* If the new FIB set matches the existing FIB set, we're done. */ if (matched) { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u:%u):%pRN update_from_ctx(): existing fib nhg, no change", VRF_LOGNAME(vrf), re->vrf_id, re->table, rn); goto check_backups; } else if (CHECK_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG)) { /* * Free stale fib list and move on to check the rib nhg. */ if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u:%u):%pRN update_from_ctx(): replacing fib nhg", VRF_LOGNAME(vrf), re->vrf_id, re->table, rn); nexthops_free(re->fib_ng.nexthop); re->fib_ng.nexthop = NULL; UNSET_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG); /* Note that the installed nexthops have changed */ changed_p = true; } else { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u:%u):%pRN update_from_ctx(): no fib nhg", VRF_LOGNAME(vrf), re->vrf_id, re->table, rn); } /* * Compare with the rib nexthop group. The comparison here is different: * the RIB group may be a superset of the list installed in the FIB. We * walk the RIB group, looking for the 'installable' candidate * nexthops, and then check those against the set * that is actually installed. * * Assume nexthops are ordered here as well. */ /* If nothing is installed, we can skip some of the checking/comparison * of nexthops. */ if (ctxnhg->nexthop == NULL) { changed_p = true; goto no_nexthops; } matched = rib_update_nhg_from_ctx(&(re->nhe->nhg), ctxnhg, &changed_p); /* If all nexthops were processed, we're done */ if (matched) { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u:%u):%pRN update_from_ctx(): rib nhg matched, changed '%s'", VRF_LOGNAME(vrf), re->vrf_id, re->table, rn, (changed_p ? "true" : "false")); goto check_backups; } no_nexthops: /* FIB nexthop set differs from the RIB set: * create a fib-specific nexthop-group */ if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u:%u):%pRN update_from_ctx(): changed %s, adding new fib nhg%s", VRF_LOGNAME(vrf), re->vrf_id, re->table, rn, (changed_p ? "true" : "false"), ctxnhg->nexthop != NULL ? "" : " (empty)"); /* Set the flag about the dedicated fib list */ SET_FLAG(re->status, ROUTE_ENTRY_USE_FIB_NHG); if (ctxnhg->nexthop) copy_nexthops(&(re->fib_ng.nexthop), ctxnhg->nexthop, NULL); check_backups: /* * Check the status of the route's backup nexthops, if any. * The logic for backups is somewhat different: if any backup is * installed, a new fib nhg will be attached to the route. */ re_nhg = zebra_nhg_get_backup_nhg(re->nhe); if (re_nhg == NULL) goto done; /* No backup nexthops */ /* First check the route's 'fib' list of backups, if it's present * from some previous event. */ re_nhg = &re->fib_backup_ng; ctxnhg = dplane_ctx_get_backup_ng(ctx); matched = false; if (re_nhg->nexthop && ctxnhg && nexthop_group_equal(re_nhg, ctxnhg)) matched = true; /* If the new FIB set matches an existing FIB set, we're done. */ if (matched) { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u):%pRN update_from_ctx(): existing fib backup nhg, no change", VRF_LOGNAME(vrf), re->vrf_id, rn); goto done; } else if (re->fib_backup_ng.nexthop) { /* * Free stale fib backup list and move on to check * the route's backups. */ if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u):%pRN update_from_ctx(): replacing fib backup nhg", VRF_LOGNAME(vrf), re->vrf_id, rn); nexthops_free(re->fib_backup_ng.nexthop); re->fib_backup_ng.nexthop = NULL; /* Note that the installed nexthops have changed */ changed_p = true; } else { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u):%pRN update_from_ctx(): no fib backup nhg", VRF_LOGNAME(vrf), re->vrf_id, rn); } /* * If a FIB backup nexthop set exists, attach a copy * to the route if any backup is installed */ if (ctxnhg && ctxnhg->nexthop) { for (ALL_NEXTHOPS_PTR(ctxnhg, nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) break; } /* If no installed backups, we're done */ if (nexthop == NULL) goto done; if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "%s(%u):%pRN update_from_ctx(): changed %s, adding new backup fib nhg", VRF_LOGNAME(vrf), re->vrf_id, rn, (changed_p ? "true" : "false")); copy_nexthops(&(re->fib_backup_ng.nexthop), ctxnhg->nexthop, NULL); } done: return changed_p; } /* * Helper to locate a zebra route-node from a dplane context. This is used * when processing dplane results, e.g. Note well: the route-node is returned * with a ref held - route_unlock_node() must be called eventually. */ struct route_node *rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx) { struct route_table *table = NULL; struct route_node *rn = NULL; const struct prefix *dest_pfx, *src_pfx; /* Locate rn and re(s) from ctx */ table = zebra_vrf_lookup_table_with_table_id( dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx)); if (table == NULL) { if (IS_ZEBRA_DEBUG_DPLANE) { zlog_debug( "Failed to find route for ctx: no table for afi %d, safi %d, vrf %s(%u)", dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx), vrf_id_to_name(dplane_ctx_get_vrf(ctx)), dplane_ctx_get_vrf(ctx)); } goto done; } dest_pfx = dplane_ctx_get_dest(ctx); src_pfx = dplane_ctx_get_src(ctx); rn = srcdest_rnode_get(table, dest_pfx, src_pfx ? (struct prefix_ipv6 *)src_pfx : NULL); done: return rn; } /* * Route-update results processing after async dataplane update. */ static void rib_process_result(struct zebra_dplane_ctx *ctx) { struct zebra_vrf *zvrf = NULL; struct vrf *vrf; struct route_node *rn = NULL; struct route_entry *re = NULL, *old_re = NULL, *rib; bool is_update = false; enum dplane_op_e op; enum zebra_dplane_result status; uint32_t seq; rib_dest_t *dest; bool fib_changed = false; struct rib_table_info *info; bool rt_delete = false; zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx)); vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); /* Locate rn and re(s) from ctx */ rn = rib_find_rn_from_ctx(ctx); if (rn == NULL) { if (IS_ZEBRA_DEBUG_DPLANE) { zlog_debug( "Failed to process dplane results: no route for %s(%u):%pRN", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), rn); } goto done; } dest = rib_dest_from_rnode(rn); info = srcdest_rnode_table_info(rn); op = dplane_ctx_get_op(ctx); status = dplane_ctx_get_status(ctx); if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug( "%s(%u:%u):%pRN Processing dplane result ctx %p, op %s result %s", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn, ctx, dplane_op2str(op), dplane_res2str(status)); /* * Update is a bit of a special case, where we may have both old and new * routes to post-process. */ is_update = dplane_ctx_is_update(ctx); /* * Take a pass through the routes, look for matches with the context * info. */ RNODE_FOREACH_RE(rn, rib) { if (re == NULL) { if (rib_route_match_ctx(rib, ctx, false)) re = rib; } /* Check for old route match */ if (is_update && (old_re == NULL)) { if (rib_route_match_ctx(rib, ctx, true /*is_update*/)) old_re = rib; } /* Have we found the routes we need to work on? */ if (re && ((!is_update || old_re))) break; } seq = dplane_ctx_get_seq(ctx); /* * Check sequence number(s) to detect stale results before continuing */ if (re) { if (re->dplane_sequence != seq) { if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug( "%s(%u):%pRN Stale dplane result for re %p", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), rn, re); } else { if (!zrouter.asic_offloaded || (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED) || CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED))) UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); } } if (old_re) { if (old_re->dplane_sequence != dplane_ctx_get_old_seq(ctx)) { if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug( "%s(%u:%u):%pRN Stale dplane result for old_re %p", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), old_re->table, rn, old_re); } else UNSET_FLAG(old_re->status, ROUTE_ENTRY_QUEUED); } switch (op) { case DPLANE_OP_ROUTE_INSTALL: case DPLANE_OP_ROUTE_UPDATE: if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { if (re) { UNSET_FLAG(re->status, ROUTE_ENTRY_FAILED); SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); } /* * On an update operation from the same route type * context retrieval currently has no way to know * which was the old and which was the new. * So don't unset our flags that we just set. * We know redistribution is ok because the * old_re in this case is used for nothing * more than knowing whom to contact if necessary. */ if (old_re && old_re != re) { UNSET_FLAG(old_re->status, ROUTE_ENTRY_FAILED); UNSET_FLAG(old_re->status, ROUTE_ENTRY_INSTALLED); } /* Update zebra route based on the results in * the context struct. */ if (re) { fib_changed = rib_update_re_from_ctx(re, rn, ctx); if (!fib_changed) { if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug( "%s(%u:%u):%pRN no fib change for re", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table( ctx), rn); } /* Redistribute if this is the selected re */ if (dest && re == dest->selected_fib) redistribute_update(rn, re, old_re); } /* * System routes are weird in that they * allow multiple to be installed that match * to the same prefix, so after we get the * result we need to clean them up so that * we can actually use them. */ if ((re && RIB_SYSTEM_ROUTE(re)) || (old_re && RIB_SYSTEM_ROUTE(old_re))) zebra_rib_fixup_system(rn); if (zvrf) zvrf->installs++; /* Notify route owner */ if (zebra_router_notify_on_ack()) zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED); else { if (re) { if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED)) zsend_route_notify_owner_ctx( ctx, ZAPI_ROUTE_INSTALLED); if (CHECK_FLAG( re->flags, ZEBRA_FLAG_OFFLOAD_FAILED)) zsend_route_notify_owner_ctx( ctx, ZAPI_ROUTE_FAIL_INSTALL); } } } else { if (re) { SET_FLAG(re->status, ROUTE_ENTRY_FAILED); UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); } if (old_re) SET_FLAG(old_re->status, ROUTE_ENTRY_FAILED); if (re) zsend_route_notify_owner( rn, re, ZAPI_ROUTE_FAIL_INSTALL, info->afi, info->safi); zlog_warn("%s(%u:%u):%pRN: Route install failed", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); } break; case DPLANE_OP_ROUTE_DELETE: rt_delete = true; if (re) SET_FLAG(re->status, ROUTE_ENTRY_FAILED); /* * In the delete case, the zebra core datastructs were * updated (or removed) at the time the delete was issued, * so we're just notifying the route owner. */ if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { if (re) { UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); UNSET_FLAG(re->status, ROUTE_ENTRY_FAILED); } zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVED); if (zvrf) zvrf->removals++; } else { if (re) SET_FLAG(re->status, ROUTE_ENTRY_FAILED); zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVE_FAIL); zlog_warn("%s(%u:%u):%pRN: Route Deletion failure", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); } /* * System routes are weird in that they * allow multiple to be installed that match * to the same prefix, so after we get the * result we need to clean them up so that * we can actually use them. */ if ((re && RIB_SYSTEM_ROUTE(re)) || (old_re && RIB_SYSTEM_ROUTE(old_re))) zebra_rib_fixup_system(rn); break; default: break; } zebra_rib_evaluate_rn_nexthops(rn, seq, rt_delete); zebra_rib_evaluate_mpls(rn); done: if (rn) route_unlock_node(rn); } /* * Count installed/FIB nexthops */ static int rib_count_installed_nh(struct route_entry *re) { int count = 0; struct nexthop *nexthop; struct nexthop_group *nhg; nhg = rib_get_fib_nhg(re); for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { /* The meaningful flag depends on where the installed * nexthops reside. */ if (nhg == &(re->fib_ng)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) count++; } else { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) count++; } } nhg = rib_get_fib_backup_nhg(re); if (nhg) { for (ALL_NEXTHOPS_PTR(nhg, nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) count++; } } return count; } /* * Handle notification from async dataplane: the dataplane has detected * some change to a route, and notifies zebra so that the control plane * can reflect that change. */ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) { struct route_node *rn = NULL; struct route_entry *re = NULL; struct vrf *vrf; struct nexthop *nexthop; rib_dest_t *dest; bool fib_changed = false; bool debug_p = IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_RIB; int start_count, end_count; vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); /* Locate rn and re(s) from ctx */ rn = rib_find_rn_from_ctx(ctx); if (rn == NULL) { if (debug_p) { zlog_debug( "Failed to process dplane notification: no routes for %s(%u:%u):%pRN", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); } goto done; } dest = rib_dest_from_rnode(rn); if (debug_p) zlog_debug("%s(%u:%u):%pRN Processing dplane notif ctx %p", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn, ctx); /* * Take a pass through the routes, look for matches with the context * info. */ RNODE_FOREACH_RE(rn, re) { if (rib_route_match_ctx(re, ctx, false /*!update*/)) break; } /* No match? Nothing we can do */ if (re == NULL) { if (debug_p) zlog_debug( "%s(%u:%u):%pRN Unable to process dplane notification: no entry for type %s", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn, zebra_route_string(dplane_ctx_get_type(ctx))); goto done; } /* Ensure we clear the QUEUED flag */ if (!zrouter.asic_offloaded) UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); /* Is this a notification that ... matters? We mostly care about * the route that is currently selected for installation; we may also * get an un-install notification, and handle that too. */ if (re != dest->selected_fib) { /* * If we need to, clean up after a delete that was part of * an update operation. */ end_count = 0; for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) end_count++; } /* If no nexthops or none installed, ensure that this re * gets its 'installed' flag cleared. */ if (end_count == 0) { if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); if (debug_p) zlog_debug( "%s(%u:%u):%pRN dplane notif, uninstalled type %s route", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn, zebra_route_string( dplane_ctx_get_type(ctx))); } else { /* At least report on the event. */ if (debug_p) zlog_debug( "%s(%u:%u):%pRN dplane notif, but type %s not selected_fib", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn, zebra_route_string( dplane_ctx_get_type(ctx))); } goto done; } /* We'll want to determine whether the installation status of the * route has changed: we'll check the status before processing, * and then again if there's been a change. */ start_count = 0; if (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED)) start_count = rib_count_installed_nh(re); /* Update zebra's nexthop FIB flags based on the context struct's * nexthops. */ fib_changed = rib_update_re_from_ctx(re, rn, ctx); if (!fib_changed) { if (debug_p) zlog_debug( "%s(%u:%u):%pRN dplane notification: rib_update returns FALSE", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); } /* * Perform follow-up work if the actual status of the prefix * changed. */ end_count = rib_count_installed_nh(re); /* Various fib transitions: changed nexthops; from installed to * not-installed; or not-installed to installed. */ if (start_count > 0 && end_count > 0) { if (debug_p) zlog_debug( "%s(%u:%u):%pRN applied nexthop changes from dplane notification", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); /* Changed nexthops - update kernel/others */ dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_UPDATE, ctx); } else if (start_count == 0 && end_count > 0) { if (debug_p) zlog_debug( "%s(%u:%u):%pRN installed transition from dplane notification", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); /* We expect this to be the selected route, so we want * to tell others about this transition. */ SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); /* Changed nexthops - update kernel/others */ dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_UPDATE, ctx); /* Redistribute, lsp, and nht update */ redistribute_update(rn, re, NULL); } else if (start_count > 0 && end_count == 0) { if (debug_p) zlog_debug( "%s(%u:%u):%pRN un-installed transition from dplane notification", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), rn); /* Transition from _something_ installed to _nothing_ * installed. */ /* We expect this to be the selected route, so we want * to tell others about this transistion. */ UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); /* Changed nexthops - update kernel/others */ dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_DELETE, ctx); /* Redistribute, lsp, and nht update */ redistribute_delete(rn, re, NULL); } /* Make any changes visible for lsp and nexthop-tracking processing */ zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), false); zebra_rib_evaluate_mpls(rn); done: if (rn) route_unlock_node(rn); } /* * Process a node from the EVPN/VXLAN subqueue. */ static void process_subq_evpn(struct listnode *lnode) { struct wq_evpn_wrapper *w; /* In general, the list node points to a wrapper object * holding the info necessary to make some update. */ w = listgetdata(lnode); if (!w) return; if (w->type == WQ_EVPN_WRAPPER_TYPE_VRFROUTE) { if (w->add_p) zebra_vxlan_evpn_vrf_route_add(w->vrf_id, &w->macaddr, &w->ip, &w->prefix); else zebra_vxlan_evpn_vrf_route_del(w->vrf_id, &w->ip, &w->prefix); } else if (w->type == WQ_EVPN_WRAPPER_TYPE_REM_ES) { if (w->add_p) zebra_evpn_remote_es_add(&w->esi, w->ip.ipaddr_v4, w->esr_rxed, w->df_alg, w->df_pref); else zebra_evpn_remote_es_del(&w->esi, w->ip.ipaddr_v4); } else if (w->type == WQ_EVPN_WRAPPER_TYPE_REM_MACIP) { uint16_t ipa_len = 0; if (w->ip.ipa_type == IPADDR_V4) ipa_len = IPV4_MAX_BYTELEN; else if (w->ip.ipa_type == IPADDR_V6) ipa_len = IPV6_MAX_BYTELEN; if (w->add_p) zebra_evpn_rem_macip_add(w->vni, &w->macaddr, ipa_len, &w->ip, w->flags, w->seq, w->vtep_ip, &w->esi); else zebra_evpn_rem_macip_del(w->vni, &w->macaddr, ipa_len, &w->ip, w->vtep_ip); } else if (w->type == WQ_EVPN_WRAPPER_TYPE_REM_VTEP) { if (w->add_p) zebra_vxlan_remote_vtep_add(w->vrf_id, w->vni, w->vtep_ip, w->flags); else zebra_vxlan_remote_vtep_del(w->vrf_id, w->vni, w->vtep_ip); } XFREE(MTYPE_WQ_WRAPPER, w); } /* * Process the nexthop-group workqueue subqueue */ static void process_subq_nhg(struct listnode *lnode) { struct nhg_ctx *ctx; struct nhg_hash_entry *nhe, *newnhe; struct wq_nhg_wrapper *w; uint8_t qindex = META_QUEUE_NHG; w = listgetdata(lnode); if (!w) return; /* Two types of object - an update from the local kernel, or * an nhg update from a daemon. */ if (w->type == WQ_NHG_WRAPPER_TYPE_CTX) { ctx = w->u.ctx; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( "NHG Context id=%u dequeued from sub-queue %s", ctx->id, subqueue2str(qindex)); /* Process nexthop group updates coming 'up' from the OS */ nhg_ctx_process(ctx); } else if (w->type == WQ_NHG_WRAPPER_TYPE_NHG) { nhe = w->u.nhe; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("NHG %u dequeued from sub-queue %s", nhe->id, subqueue2str(qindex)); /* Process incoming nhg update, probably from a proto daemon */ newnhe = zebra_nhg_proto_add(nhe->id, nhe->type, nhe->zapi_instance, nhe->zapi_session, &nhe->nhg, 0); /* Report error to daemon via ZAPI */ if (newnhe == NULL) zsend_nhg_notify(nhe->type, nhe->zapi_instance, nhe->zapi_session, nhe->id, ZAPI_NHG_FAIL_INSTALL); /* Free temp nhe - we own that memory. */ zebra_nhg_free(nhe); } XFREE(MTYPE_WQ_WRAPPER, w); } static void process_subq_early_label(struct listnode *lnode) { struct wq_label_wrapper *w = listgetdata(lnode); struct zebra_vrf *zvrf; if (!w) return; zvrf = vrf_info_lookup(w->vrf_id); if (!zvrf) { XFREE(MTYPE_WQ_WRAPPER, w); return; } switch (w->type) { case WQ_LABEL_FTN_UNINSTALL: zebra_mpls_ftn_uninstall(zvrf, w->ltype, &w->p, w->route_type, w->route_instance); break; case WQ_LABEL_LABELS_PROCESS: zebra_mpls_zapi_labels_process(w->add_p, zvrf, &w->zl); break; } XFREE(MTYPE_WQ_WRAPPER, w); } static void process_subq_route(struct listnode *lnode, uint8_t qindex) { struct route_node *rnode = NULL; rib_dest_t *dest = NULL; struct zebra_vrf *zvrf = NULL; rnode = listgetdata(lnode); dest = rib_dest_from_rnode(rnode); assert(dest); zvrf = rib_dest_vrf(dest); rib_process(rnode); if (IS_ZEBRA_DEBUG_RIB_DETAILED) { struct route_entry *re = NULL; /* * rib_process may have freed the dest * as part of the garbage collection. Let's * prevent stupidity from happening. */ dest = rib_dest_from_rnode(rnode); if (dest) re = re_list_first(&dest->routes); zlog_debug("%s(%u:%u):%pRN rn %p dequeued from sub-queue %s", zvrf_name(zvrf), zvrf_id(zvrf), re ? re->table : 0, rnode, rnode, subqueue2str(qindex)); } if (rnode->info) UNSET_FLAG(rib_dest_from_rnode(rnode)->flags, RIB_ROUTE_QUEUED(qindex)); route_unlock_node(rnode); } static void rib_re_nhg_free(struct route_entry *re) { if (re->nhe && re->nhe_id) { assert(re->nhe->id == re->nhe_id); route_entry_update_nhe(re, NULL); } else if (re->nhe && re->nhe->nhg.nexthop) nexthops_free(re->nhe->nhg.nexthop); nexthops_free(re->fib_ng.nexthop); } struct zebra_early_route { afi_t afi; safi_t safi; struct prefix p; struct prefix_ipv6 src_p; bool src_p_provided; struct route_entry *re; struct nhg_hash_entry *re_nhe; bool startup; bool deletion; bool fromkernel; }; static void early_route_memory_free(struct zebra_early_route *ere) { if (ere->re_nhe) zebra_nhg_free(ere->re_nhe); XFREE(MTYPE_RE, ere->re); XFREE(MTYPE_WQ_WRAPPER, ere); } static void process_subq_early_route_add(struct zebra_early_route *ere) { struct route_entry *re = ere->re; struct route_table *table; struct nhg_hash_entry *nhe = NULL; struct route_node *rn; struct route_entry *same = NULL, *first_same = NULL; int same_count = 0; rib_dest_t *dest; /* Lookup table. */ table = zebra_vrf_get_table_with_table_id(ere->afi, ere->safi, re->vrf_id, re->table); if (!table) { early_route_memory_free(ere); return; } if (re->nhe_id > 0) { nhe = zebra_nhg_lookup_id(re->nhe_id); if (!nhe) { /* * We've received from the kernel a nexthop id * that we don't have saved yet. More than likely * it has not been processed and is on the * queue to be processed. Let's stop what we * are doing and cause the meta q to be processed * storing this for later. * * This is being done this way because zebra * runs with the assumption t */ flog_err( EC_ZEBRA_TABLE_LOOKUP_FAILED, "Zebra failed to find the nexthop hash entry for id=%u in a route entry %pFX", re->nhe_id, &ere->p); early_route_memory_free(ere); return; } } else { /* Lookup nhe from route information */ nhe = zebra_nhg_rib_find_nhe(ere->re_nhe, ere->afi); if (!nhe) { char buf2[PREFIX_STRLEN] = ""; flog_err( EC_ZEBRA_TABLE_LOOKUP_FAILED, "Zebra failed to find or create a nexthop hash entry for %pFX%s%s", &ere->p, ere->src_p_provided ? " from " : "", ere->src_p_provided ? prefix2str(&ere->src_p, buf2, sizeof(buf2)) : ""); early_route_memory_free(ere); return; } } /* * Attach the re to the nhe's nexthop group. * * TODO: This will need to change when we start getting IDs from upper * level protocols, as the refcnt might be wrong, since it checks * if old_id != new_id. */ route_entry_update_nhe(re, nhe); /* Make it sure prefixlen is applied to the prefix. */ apply_mask(&ere->p); if (ere->src_p_provided) apply_mask_ipv6(&ere->src_p); /* Set default distance by route type. */ if (re->distance == 0) re->distance = route_distance(re->type); /* Lookup route node.*/ rn = srcdest_rnode_get(table, &ere->p, ere->src_p_provided ? &ere->src_p : NULL); /* * If same type of route are installed, treat it as a implicit * withdraw. If the user has specified the No route replace semantics * for the install don't do a route replace. */ RNODE_FOREACH_RE (rn, same) { if (CHECK_FLAG(same->status, ROUTE_ENTRY_REMOVED)) { same_count++; continue; } /* Compare various route_entry properties */ if (rib_compare_routes(re, same)) { same_count++; if (first_same == NULL) first_same = same; } } same = first_same; if (!ere->startup && (re->flags & ZEBRA_FLAG_SELFROUTE) && zrouter.asic_offloaded) { if (!same) { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( "prefix: %pRN is a self route where we do not have an entry for it. Dropping this update, it's useless", rn); /* * We are not on startup, this is a self route * and we have asic offload. Which means * we are getting a callback for a entry * that was already deleted to the kernel * but an earlier response was just handed * back. Drop it on the floor */ early_route_memory_free(ere); return; } } /* If this route is kernel/connected route, notify the dataplane. */ if (RIB_SYSTEM_ROUTE(re)) { /* Notify dataplane */ dplane_sys_route_add(rn, re); } /* Link new re to node.*/ if (IS_ZEBRA_DEBUG_RIB) { rnode_debug( rn, re->vrf_id, "Inserting route rn %p, re %p (%s) existing %p, same_count %d", rn, re, zebra_route_string(re->type), same, same_count); if (IS_ZEBRA_DEBUG_RIB_DETAILED) route_entry_dump( &ere->p, ere->src_p_provided ? &ere->src_p : NULL, re); } SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); rib_addnode(rn, re, 1); /* Free implicit route.*/ if (same) rib_delnode(rn, same); /* See if we can remove some RE entries that are queued for * removal, but won't be considered in rib processing. */ dest = rib_dest_from_rnode(rn); RNODE_FOREACH_RE_SAFE (rn, re, same) { if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { /* If the route was used earlier, must retain it. */ if (dest && re == dest->selected_fib) continue; if (IS_ZEBRA_DEBUG_RIB) rnode_debug(rn, re->vrf_id, "rn %p, removing unneeded re %p", rn, re); rib_unlink(rn, re); } } route_unlock_node(rn); if (ere->re_nhe) zebra_nhg_free(ere->re_nhe); XFREE(MTYPE_WQ_WRAPPER, ere); } static void process_subq_early_route_delete(struct zebra_early_route *ere) { struct route_table *table; struct route_node *rn; struct route_entry *re; struct route_entry *fib = NULL; struct route_entry *same = NULL; struct nexthop *rtnh; char buf2[INET6_ADDRSTRLEN]; rib_dest_t *dest; if (ere->src_p_provided) assert(!ere->src_p.prefixlen || ere->afi == AFI_IP6); /* Lookup table. */ table = zebra_vrf_lookup_table_with_table_id( ere->afi, ere->safi, ere->re->vrf_id, ere->re->table); if (!table) { early_route_memory_free(ere); return; } /* Apply mask. */ apply_mask(&ere->p); if (ere->src_p_provided) apply_mask_ipv6(&ere->src_p); /* Lookup route node. */ rn = srcdest_rnode_lookup(table, &ere->p, ere->src_p_provided ? &ere->src_p : NULL); if (!rn) { if (IS_ZEBRA_DEBUG_RIB) { char src_buf[PREFIX_STRLEN]; struct vrf *vrf = vrf_lookup_by_id(ere->re->vrf_id); if (ere->src_p_provided && ere->src_p.prefixlen) prefix2str(&ere->src_p, src_buf, sizeof(src_buf)); else src_buf[0] = '\0'; zlog_debug("%s[%d]:%pRN%s%s doesn't exist in rib", vrf->name, ere->re->table, rn, (src_buf[0] != '\0') ? " from " : "", src_buf); } early_route_memory_free(ere); return; } dest = rib_dest_from_rnode(rn); fib = dest->selected_fib; struct nexthop *nh = NULL; if (ere->re->nhe) nh = ere->re->nhe->nhg.nexthop; /* Lookup same type route. */ RNODE_FOREACH_RE (rn, re) { if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) continue; if (re->type != ere->re->type) continue; if (re->instance != ere->re->instance) continue; if (CHECK_FLAG(re->flags, ZEBRA_FLAG_RR_USE_DISTANCE) && ere->re->distance != re->distance) continue; if (re->type == ZEBRA_ROUTE_KERNEL && re->metric != ere->re->metric) continue; if (re->type == ZEBRA_ROUTE_CONNECT && (rtnh = nh) && rtnh->type == NEXTHOP_TYPE_IFINDEX && nh) { if (rtnh->ifindex != nh->ifindex) continue; same = re; break; } /* Make sure that the route found has the same gateway. */ if (ere->re->nhe_id && re->nhe_id == ere->re->nhe_id) { same = re; break; } if (nh == NULL) { same = re; break; } for (ALL_NEXTHOPS(re->nhe->nhg, rtnh)) { /* * No guarantee all kernel send nh with labels * on delete. */ if (nexthop_same_no_labels(rtnh, nh)) { same = re; break; } } if (same) break; } /* * If same type of route can't be found and this message is from * kernel. */ if (!same) { /* * In the past(HA!) we could get here because * we were receiving a route delete from the * kernel and we're not marking the proto * as coming from it's appropriate originator. * Now that we are properly noticing the fact * that the kernel has deleted our route we * are not going to get called in this path * I am going to leave this here because * this might still work this way on non-linux * platforms as well as some weird state I have * not properly thought of yet. * If we can show that this code path is * dead then we can remove it. */ if (fib && CHECK_FLAG(ere->re->flags, ZEBRA_FLAG_SELFROUTE)) { if (IS_ZEBRA_DEBUG_RIB) { rnode_debug( rn, ere->re->vrf_id, "rn %p, re %p (%s) was deleted from kernel, adding", rn, fib, zebra_route_string(fib->type)); } if (zrouter.allow_delete || CHECK_FLAG(dest->flags, RIB_ROUTE_ANY_QUEUED)) { UNSET_FLAG(fib->status, ROUTE_ENTRY_INSTALLED); /* Unset flags. */ for (rtnh = fib->nhe->nhg.nexthop; rtnh; rtnh = rtnh->next) UNSET_FLAG(rtnh->flags, NEXTHOP_FLAG_FIB); /* * This is a non FRR route * as such we should mark * it as deleted */ dest->selected_fib = NULL; } else { /* * This means someone else, other than Zebra, * has deleted a Zebra router from the kernel. * We will add it back */ rib_install_kernel(rn, fib, NULL); } } else { if (IS_ZEBRA_DEBUG_RIB) { if (nh) rnode_debug( rn, ere->re->vrf_id, "via %s ifindex %d type %d doesn't exist in rib", inet_ntop(afi2family(ere->afi), &nh->gate, buf2, sizeof(buf2)), nh->ifindex, ere->re->type); else rnode_debug( rn, ere->re->vrf_id, "type %d doesn't exist in rib", ere->re->type); } route_unlock_node(rn); early_route_memory_free(ere); return; } } if (same) { struct nexthop *tmp_nh; if (ere->fromkernel && CHECK_FLAG(ere->re->flags, ZEBRA_FLAG_SELFROUTE) && !zrouter.allow_delete) { rib_install_kernel(rn, same, NULL); route_unlock_node(rn); early_route_memory_free(ere); return; } /* Special handling for IPv4 or IPv6 routes sourced from * EVPN - the nexthop (and associated MAC) need to be * uninstalled if no more refs. */ for (ALL_NEXTHOPS(re->nhe->nhg, tmp_nh)) { struct ipaddr vtep_ip; if (CHECK_FLAG(tmp_nh->flags, NEXTHOP_FLAG_EVPN)) { memset(&vtep_ip, 0, sizeof(struct ipaddr)); if (ere->afi == AFI_IP) { vtep_ip.ipa_type = IPADDR_V4; memcpy(&(vtep_ip.ipaddr_v4), &(tmp_nh->gate.ipv4), sizeof(struct in_addr)); } else { vtep_ip.ipa_type = IPADDR_V6; memcpy(&(vtep_ip.ipaddr_v6), &(tmp_nh->gate.ipv6), sizeof(struct in6_addr)); } zebra_rib_queue_evpn_route_del( re->vrf_id, &vtep_ip, &ere->p); } } /* Notify dplane if system route changes */ if (RIB_SYSTEM_ROUTE(re)) dplane_sys_route_del(rn, same); rib_delnode(rn, same); } route_unlock_node(rn); early_route_memory_free(ere); } /* * When FRR receives a route we need to match the route up to * nexthop groups. That we also may have just received * place the data on this queue so that this work of finding * the nexthop group entries for the route entry is always * done after the nexthop group has had a chance to be processed */ static void process_subq_early_route(struct listnode *lnode) { struct zebra_early_route *ere = listgetdata(lnode); if (ere->deletion) process_subq_early_route_delete(ere); else process_subq_early_route_add(ere); } /* * Examine the specified subqueue; process one entry and return 1 if * there is a node, return 0 otherwise. */ static unsigned int process_subq(struct list *subq, enum meta_queue_indexes qindex) { struct listnode *lnode = listhead(subq); if (!lnode) return 0; switch (qindex) { case META_QUEUE_EVPN: process_subq_evpn(lnode); break; case META_QUEUE_NHG: process_subq_nhg(lnode); break; case META_QUEUE_EARLY_ROUTE: process_subq_early_route(lnode); break; case META_QUEUE_EARLY_LABEL: process_subq_early_label(lnode); break; case META_QUEUE_CONNECTED: case META_QUEUE_KERNEL: case META_QUEUE_STATIC: case META_QUEUE_NOTBGP: case META_QUEUE_BGP: case META_QUEUE_OTHER: process_subq_route(lnode, qindex); break; } list_delete_node(subq, lnode); return 1; } /* Dispatch the meta queue by picking and processing the next node from * a non-empty sub-queue with lowest priority. wq is equal to zebra->ribq and * data is pointed to the meta queue structure. */ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data) { struct meta_queue *mq = data; unsigned i; uint32_t queue_len, queue_limit; /* Ensure there's room for more dataplane updates */ queue_limit = dplane_get_in_queue_limit(); queue_len = dplane_get_in_queue_len(); if (queue_len > queue_limit) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( "rib queue: dplane queue len %u, limit %u, retrying", queue_len, queue_limit); /* Ensure that the meta-queue is actually enqueued */ if (work_queue_empty(zrouter.ribq)) work_queue_add(zrouter.ribq, zrouter.mq); return WQ_QUEUE_BLOCKED; } for (i = 0; i < MQ_SIZE; i++) if (process_subq(mq->subq[i], i)) { mq->size--; break; } return mq->size ? WQ_REQUEUE : WQ_SUCCESS; } /* * Look into the RN and queue it into the highest priority queue * at this point in time for processing. * * We will enqueue a route node only once per invocation. * * There are two possibilities here that should be kept in mind. * If the original invocation has not been pulled off for processing * yet, A subsuquent invocation can have a route entry with a better * meta queue index value and we can have a situation where * we might have the same node enqueued 2 times. Not necessarily * an optimal situation but it should be ok. * * The other possibility is that the original invocation has not * been pulled off for processing yet, A subsusquent invocation * doesn't have a route_entry with a better meta-queue and the * original metaqueue index value will win and we'll end up with * the route node enqueued once. */ static int rib_meta_queue_add(struct meta_queue *mq, void *data) { struct route_node *rn = NULL; struct route_entry *re = NULL, *curr_re = NULL; uint8_t qindex = MQ_SIZE, curr_qindex = MQ_SIZE; rn = (struct route_node *)data; RNODE_FOREACH_RE (rn, curr_re) { curr_qindex = route_info[curr_re->type].meta_q_map; if (curr_qindex <= qindex) { re = curr_re; qindex = curr_qindex; } } if (!re) return -1; /* Invariant: at this point we always have rn->info set. */ if (CHECK_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_QUEUED(qindex))) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) rnode_debug(rn, re->vrf_id, "rn %p is already queued in sub-queue %s", (void *)rn, subqueue2str(qindex)); return -1; } SET_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_QUEUED(qindex)); listnode_add(mq->subq[qindex], rn); route_lock_node(rn); mq->size++; if (IS_ZEBRA_DEBUG_RIB_DETAILED) rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %s", (void *)rn, subqueue2str(qindex)); return 0; } static int early_label_meta_queue_add(struct meta_queue *mq, void *data) { listnode_add(mq->subq[META_QUEUE_EARLY_LABEL], data); mq->size++; return 0; } static int rib_meta_queue_nhg_ctx_add(struct meta_queue *mq, void *data) { struct nhg_ctx *ctx = NULL; uint8_t qindex = META_QUEUE_NHG; struct wq_nhg_wrapper *w; ctx = (struct nhg_ctx *)data; if (!ctx) return -1; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_nhg_wrapper)); w->type = WQ_NHG_WRAPPER_TYPE_CTX; w->u.ctx = ctx; listnode_add(mq->subq[qindex], w); mq->size++; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("NHG Context id=%u queued into sub-queue %s", ctx->id, subqueue2str(qindex)); return 0; } static int rib_meta_queue_nhg_add(struct meta_queue *mq, void *data) { struct nhg_hash_entry *nhe = NULL; uint8_t qindex = META_QUEUE_NHG; struct wq_nhg_wrapper *w; nhe = (struct nhg_hash_entry *)data; if (!nhe) return -1; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_nhg_wrapper)); w->type = WQ_NHG_WRAPPER_TYPE_NHG; w->u.nhe = nhe; listnode_add(mq->subq[qindex], w); mq->size++; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("NHG id=%u queued into sub-queue %s", nhe->id, subqueue2str(qindex)); return 0; } static int rib_meta_queue_evpn_add(struct meta_queue *mq, void *data) { listnode_add(mq->subq[META_QUEUE_EVPN], data); mq->size++; return 0; } static int mq_add_handler(void *data, int (*mq_add_func)(struct meta_queue *mq, void *data)) { if (zrouter.ribq == NULL) { flog_err(EC_ZEBRA_WQ_NONEXISTENT, "%s: work_queue does not exist!", __func__); return -1; } /* * The RIB queue should normally be either empty or holding the only * work_queue_item element. In the latter case this element would * hold a pointer to the meta queue structure, which must be used to * actually queue the route nodes to process. So create the MQ * holder, if necessary, then push the work into it in any case. * This semantics was introduced after 0.99.9 release. */ if (work_queue_empty(zrouter.ribq)) work_queue_add(zrouter.ribq, zrouter.mq); return mq_add_func(zrouter.mq, data); } void mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type, struct prefix *prefix, uint8_t route_type, uint8_t route_instance) { struct wq_label_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_label_wrapper)); w->type = WQ_LABEL_FTN_UNINSTALL; w->vrf_id = zvrf->vrf->vrf_id; w->p = *prefix; w->ltype = type; w->route_type = route_type; w->route_instance = route_instance; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("Early Label Handling for %pFX", prefix); mq_add_handler(w, early_label_meta_queue_add); } void mpls_zapi_labels_process(bool add_p, struct zebra_vrf *zvrf, const struct zapi_labels *zl) { struct wq_label_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_label_wrapper)); w->type = WQ_LABEL_LABELS_PROCESS; w->vrf_id = zvrf->vrf->vrf_id; w->add_p = add_p; w->zl = *zl; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("Early Label Handling: Labels Process"); mq_add_handler(w, early_label_meta_queue_add); } /* Add route_node to work queue and schedule processing */ int rib_queue_add(struct route_node *rn) { assert(rn); /* Pointless to queue a route_node with no RIB entries to add or remove */ if (!rnode_to_ribs(rn)) { zlog_debug("%s: called for route_node (%p, %u) with no ribs", __func__, (void *)rn, route_node_get_lock_count(rn)); zlog_backtrace(LOG_DEBUG); return -1; } return mq_add_handler(rn, rib_meta_queue_add); } /* * Enqueue incoming nhg info from OS for processing */ int rib_queue_nhg_ctx_add(struct nhg_ctx *ctx) { assert(ctx); return mq_add_handler(ctx, rib_meta_queue_nhg_ctx_add); } /* * Enqueue incoming nhg from proto daemon for processing */ int rib_queue_nhe_add(struct nhg_hash_entry *nhe) { if (nhe == NULL) return -1; return mq_add_handler(nhe, rib_meta_queue_nhg_add); } /* * Enqueue evpn route for processing */ int zebra_rib_queue_evpn_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, const struct ipaddr *vtep_ip, const struct prefix *host_prefix) { struct wq_evpn_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_VRFROUTE; w->add_p = true; w->vrf_id = vrf_id; w->macaddr = *rmac; w->ip = *vtep_ip; w->prefix = *host_prefix; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("%s: (%u)%pIA, host prefix %pFX enqueued", __func__, vrf_id, vtep_ip, host_prefix); return mq_add_handler(w, rib_meta_queue_evpn_add); } int zebra_rib_queue_evpn_route_del(vrf_id_t vrf_id, const struct ipaddr *vtep_ip, const struct prefix *host_prefix) { struct wq_evpn_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_VRFROUTE; w->add_p = false; w->vrf_id = vrf_id; w->ip = *vtep_ip; w->prefix = *host_prefix; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("%s: (%u)%pIA, host prefix %pFX enqueued", __func__, vrf_id, vtep_ip, host_prefix); return mq_add_handler(w, rib_meta_queue_evpn_add); } /* Enqueue EVPN remote ES for processing */ int zebra_rib_queue_evpn_rem_es_add(const esi_t *esi, const struct in_addr *vtep_ip, bool esr_rxed, uint8_t df_alg, uint16_t df_pref) { struct wq_evpn_wrapper *w; char buf[ESI_STR_LEN]; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_REM_ES; w->add_p = true; w->esi = *esi; w->ip.ipa_type = IPADDR_V4; w->ip.ipaddr_v4 = *vtep_ip; w->esr_rxed = esr_rxed; w->df_alg = df_alg; w->df_pref = df_pref; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("%s: vtep %pI4, esi %s enqueued", __func__, vtep_ip, esi_to_str(esi, buf, sizeof(buf))); return mq_add_handler(w, rib_meta_queue_evpn_add); } int zebra_rib_queue_evpn_rem_es_del(const esi_t *esi, const struct in_addr *vtep_ip) { struct wq_evpn_wrapper *w; char buf[ESI_STR_LEN]; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_REM_ES; w->add_p = false; w->esi = *esi; w->ip.ipa_type = IPADDR_V4; w->ip.ipaddr_v4 = *vtep_ip; if (IS_ZEBRA_DEBUG_RIB_DETAILED) { if (memcmp(esi, zero_esi, sizeof(esi_t)) != 0) esi_to_str(esi, buf, sizeof(buf)); else strlcpy(buf, "-", sizeof(buf)); zlog_debug("%s: vtep %pI4, esi %s enqueued", __func__, vtep_ip, buf); } return mq_add_handler(w, rib_meta_queue_evpn_add); } /* * Enqueue EVPN remote macip update for processing */ int zebra_rib_queue_evpn_rem_macip_add(vni_t vni, const struct ethaddr *macaddr, const struct ipaddr *ipaddr, uint8_t flags, uint32_t seq, struct in_addr vtep_ip, const esi_t *esi) { struct wq_evpn_wrapper *w; char buf[ESI_STR_LEN]; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_REM_MACIP; w->add_p = true; w->vni = vni; w->macaddr = *macaddr; w->ip = *ipaddr; w->flags = flags; w->seq = seq; w->vtep_ip = vtep_ip; w->esi = *esi; if (IS_ZEBRA_DEBUG_RIB_DETAILED) { if (memcmp(esi, zero_esi, sizeof(esi_t)) != 0) esi_to_str(esi, buf, sizeof(buf)); else strlcpy(buf, "-", sizeof(buf)); zlog_debug("%s: mac %pEA, vtep %pI4, esi %s enqueued", __func__, macaddr, &vtep_ip, buf); } return mq_add_handler(w, rib_meta_queue_evpn_add); } int zebra_rib_queue_evpn_rem_macip_del(vni_t vni, const struct ethaddr *macaddr, const struct ipaddr *ip, struct in_addr vtep_ip) { struct wq_evpn_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_REM_MACIP; w->add_p = false; w->vni = vni; w->macaddr = *macaddr; w->ip = *ip; w->vtep_ip = vtep_ip; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("%s: mac %pEA, vtep %pI4 enqueued", __func__, macaddr, &vtep_ip); return mq_add_handler(w, rib_meta_queue_evpn_add); } /* * Enqueue remote VTEP address for processing */ int zebra_rib_queue_evpn_rem_vtep_add(vrf_id_t vrf_id, vni_t vni, struct in_addr vtep_ip, int flood_control) { struct wq_evpn_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_REM_VTEP; w->add_p = true; w->vrf_id = vrf_id; w->vni = vni; w->vtep_ip = vtep_ip; w->flags = flood_control; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("%s: vrf %u, vtep %pI4 enqueued", __func__, vrf_id, &vtep_ip); return mq_add_handler(w, rib_meta_queue_evpn_add); } int zebra_rib_queue_evpn_rem_vtep_del(vrf_id_t vrf_id, vni_t vni, struct in_addr vtep_ip) { struct wq_evpn_wrapper *w; w = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(struct wq_evpn_wrapper)); w->type = WQ_EVPN_WRAPPER_TYPE_REM_VTEP; w->add_p = false; w->vrf_id = vrf_id; w->vni = vni; w->vtep_ip = vtep_ip; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("%s: vrf %u, vtep %pI4 enqueued", __func__, vrf_id, &vtep_ip); return mq_add_handler(w, rib_meta_queue_evpn_add); } /* Create new meta queue. A destructor function doesn't seem to be necessary here. */ static struct meta_queue *meta_queue_new(void) { struct meta_queue *new; unsigned i; new = XCALLOC(MTYPE_WORK_QUEUE, sizeof(struct meta_queue)); for (i = 0; i < MQ_SIZE; i++) { new->subq[i] = list_new(); assert(new->subq[i]); } return new; } /* Clean up the EVPN meta-queue list */ static void evpn_meta_queue_free(struct meta_queue *mq, struct list *l, struct zebra_vrf *zvrf) { struct listnode *node, *nnode; struct wq_evpn_wrapper *w; /* Free the node wrapper object, and the struct it wraps */ for (ALL_LIST_ELEMENTS(l, node, nnode, w)) { if (zvrf) { vrf_id_t vrf_id = zvrf->vrf->vrf_id; if (w->vrf_id != vrf_id) continue; } node->data = NULL; XFREE(MTYPE_WQ_WRAPPER, w); list_delete_node(l, node); mq->size--; } } /* Clean up the nhg meta-queue list */ static void nhg_meta_queue_free(struct meta_queue *mq, struct list *l, struct zebra_vrf *zvrf) { struct wq_nhg_wrapper *w; struct listnode *node, *nnode; /* Free the node wrapper object, and the struct it wraps */ for (ALL_LIST_ELEMENTS(l, node, nnode, w)) { if (zvrf) { vrf_id_t vrf_id = zvrf->vrf->vrf_id; if (w->type == WQ_NHG_WRAPPER_TYPE_CTX && w->u.ctx->vrf_id != vrf_id) continue; else if (w->type == WQ_NHG_WRAPPER_TYPE_NHG && w->u.nhe->vrf_id != vrf_id) continue; } if (w->type == WQ_NHG_WRAPPER_TYPE_CTX) nhg_ctx_free(&w->u.ctx); else if (w->type == WQ_NHG_WRAPPER_TYPE_NHG) zebra_nhg_free(w->u.nhe); node->data = NULL; XFREE(MTYPE_WQ_WRAPPER, w); list_delete_node(l, node); mq->size--; } } static void early_label_meta_queue_free(struct meta_queue *mq, struct list *l, struct zebra_vrf *zvrf) { struct wq_label_wrapper *w; struct listnode *node, *nnode; for (ALL_LIST_ELEMENTS(l, node, nnode, w)) { if (zvrf && zvrf->vrf->vrf_id != w->vrf_id) continue; switch (w->type) { case WQ_LABEL_FTN_UNINSTALL: case WQ_LABEL_LABELS_PROCESS: break; } node->data = NULL; XFREE(MTYPE_WQ_WRAPPER, w); list_delete_node(l, node); mq->size--; } } static void rib_meta_queue_free(struct meta_queue *mq, struct list *l, struct zebra_vrf *zvrf) { struct route_node *rnode; struct listnode *node, *nnode; for (ALL_LIST_ELEMENTS(l, node, nnode, rnode)) { rib_dest_t *dest = rib_dest_from_rnode(rnode); if (dest && rib_dest_vrf(dest) != zvrf) continue; route_unlock_node(rnode); node->data = NULL; list_delete_node(l, node); mq->size--; } } static void early_route_meta_queue_free(struct meta_queue *mq, struct list *l, struct zebra_vrf *zvrf) { struct zebra_early_route *zer; struct listnode *node, *nnode; for (ALL_LIST_ELEMENTS(l, node, nnode, zer)) { if (zvrf && zer->re->vrf_id != zvrf->vrf->vrf_id) continue; XFREE(MTYPE_RE, zer); node->data = NULL; list_delete_node(l, node); mq->size--; } } void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf) { enum meta_queue_indexes i; for (i = 0; i < MQ_SIZE; i++) { /* Some subqueues may need cleanup - nhgs for example */ switch (i) { case META_QUEUE_NHG: nhg_meta_queue_free(mq, mq->subq[i], zvrf); break; case META_QUEUE_EVPN: evpn_meta_queue_free(mq, mq->subq[i], zvrf); break; case META_QUEUE_EARLY_ROUTE: early_route_meta_queue_free(mq, mq->subq[i], zvrf); break; case META_QUEUE_EARLY_LABEL: early_label_meta_queue_free(mq, mq->subq[i], zvrf); break; case META_QUEUE_CONNECTED: case META_QUEUE_KERNEL: case META_QUEUE_STATIC: case META_QUEUE_NOTBGP: case META_QUEUE_BGP: case META_QUEUE_OTHER: rib_meta_queue_free(mq, mq->subq[i], zvrf); break; } if (!zvrf) list_delete(&mq->subq[i]); } if (!zvrf) XFREE(MTYPE_WORK_QUEUE, mq); } /* initialise zebra rib work queue */ static void rib_queue_init(void) { if (!(zrouter.ribq = work_queue_new(zrouter.master, "route_node processing"))) { flog_err(EC_ZEBRA_WQ_NONEXISTENT, "%s: could not initialise work queue!", __func__); return; } /* fill in the work queue spec */ zrouter.ribq->spec.workfunc = &meta_queue_process; zrouter.ribq->spec.errorfunc = NULL; zrouter.ribq->spec.completion_func = NULL; /* XXX: TODO: These should be runtime configurable via vty */ zrouter.ribq->spec.max_retries = 3; zrouter.ribq->spec.hold = ZEBRA_RIB_PROCESS_HOLD_TIME; zrouter.ribq->spec.retry = ZEBRA_RIB_PROCESS_RETRY_TIME; if (!(zrouter.mq = meta_queue_new())) { flog_err(EC_ZEBRA_WQ_NONEXISTENT, "%s: could not initialise meta queue!", __func__); return; } return; } rib_dest_t *zebra_rib_create_dest(struct route_node *rn) { rib_dest_t *dest; dest = XCALLOC(MTYPE_RIB_DEST, sizeof(rib_dest_t)); rnh_list_init(&dest->nht); re_list_init(&dest->routes); route_lock_node(rn); /* rn route table reference */ rn->info = dest; dest->rnode = rn; return dest; } /* RIB updates are processed via a queue of pointers to route_nodes. * * The queue length is bounded by the maximal size of the routing table, * as a route_node will not be requeued, if already queued. * * REs are submitted via rib_addnode or rib_delnode which set minimal * state, or static_install_route (when an existing RE is updated) * and then submit route_node to queue for best-path selection later. * Order of add/delete state changes are preserved for any given RE. * * Deleted REs are reaped during best-path selection. * * rib_addnode * |-> rib_link or unset ROUTE_ENTRY_REMOVE |->Update kernel with * |-------->| | best RE, if required * | | * static_install->|->rib_addqueue...... -> rib_process * | | * |-------->| |-> rib_unlink * |-> set ROUTE_ENTRY_REMOVE | * rib_delnode (RE freed) * * The 'info' pointer of a route_node points to a rib_dest_t * ('dest'). Queueing state for a route_node is kept on the dest. The * dest is created on-demand by rib_link() and is kept around at least * as long as there are ribs hanging off it (@see rib_gc_dest()). * * Refcounting (aka "locking" throughout the Zebra and FRR code): * * - route_nodes: refcounted by: * - dest attached to route_node: * - managed by: rib_link/rib_gc_dest * - route_node processing queue * - managed by: rib_addqueue, rib_process. * */ /* Add RE to head of the route node. */ static void rib_link(struct route_node *rn, struct route_entry *re, int process) { rib_dest_t *dest; afi_t afi; const char *rmap_name; assert(re && rn); dest = rib_dest_from_rnode(rn); if (!dest) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) rnode_debug(rn, re->vrf_id, "rn %p adding dest", rn); dest = zebra_rib_create_dest(rn); } re_list_add_head(&dest->routes, re); afi = (rn->p.family == AF_INET) ? AFI_IP : (rn->p.family == AF_INET6) ? AFI_IP6 : AFI_MAX; if (is_zebra_import_table_enabled(afi, re->vrf_id, re->table)) { struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(re->vrf_id); rmap_name = zebra_get_import_table_route_map(afi, re->table); zebra_add_import_table_entry(zvrf, rn, re, rmap_name); } if (process) rib_queue_add(rn); } static void rib_addnode(struct route_node *rn, struct route_entry *re, int process) { /* RE node has been un-removed before route-node is processed. * route_node must hence already be on the queue for processing.. */ if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { if (IS_ZEBRA_DEBUG_RIB) rnode_debug(rn, re->vrf_id, "rn %p, un-removed re %p", (void *)rn, (void *)re); UNSET_FLAG(re->status, ROUTE_ENTRY_REMOVED); return; } rib_link(rn, re, process); } /* * rib_unlink * * Detach a rib structure from a route_node. * * Note that a call to rib_unlink() should be followed by a call to * rib_gc_dest() at some point. This allows a rib_dest_t that is no * longer required to be deleted. */ void rib_unlink(struct route_node *rn, struct route_entry *re) { rib_dest_t *dest; assert(rn && re); if (IS_ZEBRA_DEBUG_RIB) rnode_debug(rn, re->vrf_id, "rn %p, re %p", (void *)rn, (void *)re); dest = rib_dest_from_rnode(rn); re_list_del(&dest->routes, re); if (dest->selected_fib == re) dest->selected_fib = NULL; rib_re_nhg_free(re); zapi_re_opaque_free(re->opaque); XFREE(MTYPE_RE, re); } void rib_delnode(struct route_node *rn, struct route_entry *re) { afi_t afi; if (IS_ZEBRA_DEBUG_RIB) rnode_debug(rn, re->vrf_id, "rn %p, re %p, removing", (void *)rn, (void *)re); SET_FLAG(re->status, ROUTE_ENTRY_REMOVED); afi = (rn->p.family == AF_INET) ? AFI_IP : (rn->p.family == AF_INET6) ? AFI_IP6 : AFI_MAX; if (is_zebra_import_table_enabled(afi, re->vrf_id, re->table)) { struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(re->vrf_id); zebra_del_import_table_entry(zvrf, rn, re); /* Just clean up if non main table */ if (IS_ZEBRA_DEBUG_RIB) zlog_debug("%s(%u):%pRN: Freeing route rn %p, re %p (%s)", vrf_id_to_name(re->vrf_id), re->vrf_id, rn, rn, re, zebra_route_string(re->type)); } rib_queue_add(rn); } /* * Helper that debugs a single nexthop within a route-entry */ static void _route_entry_dump_nh(const struct route_entry *re, const char *straddr, const struct nexthop *nexthop) { char nhname[PREFIX_STRLEN]; char backup_str[50]; char wgt_str[50]; char temp_str[10]; char label_str[MPLS_LABEL_STRLEN]; int i; struct interface *ifp; struct vrf *vrf = vrf_lookup_by_id(nexthop->vrf_id); switch (nexthop->type) { case NEXTHOP_TYPE_BLACKHOLE: snprintf(nhname, sizeof(nhname), "Blackhole"); break; case NEXTHOP_TYPE_IFINDEX: ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); snprintf(nhname, sizeof(nhname), "%s", ifp ? ifp->name : "Unknown"); break; case NEXTHOP_TYPE_IPV4: /* fallthrough */ case NEXTHOP_TYPE_IPV4_IFINDEX: inet_ntop(AF_INET, &nexthop->gate, nhname, INET6_ADDRSTRLEN); break; case NEXTHOP_TYPE_IPV6: case NEXTHOP_TYPE_IPV6_IFINDEX: inet_ntop(AF_INET6, &nexthop->gate, nhname, INET6_ADDRSTRLEN); break; } /* Label stack */ label_str[0] = '\0'; if (nexthop->nh_label && nexthop->nh_label->num_labels > 0) { mpls_label2str(nexthop->nh_label->num_labels, nexthop->nh_label->label, label_str, sizeof(label_str), 0 /*pretty*/); strlcat(label_str, ", ", sizeof(label_str)); } backup_str[0] = '\0'; if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { snprintf(backup_str, sizeof(backup_str), "backup "); for (i = 0; i < nexthop->backup_num; i++) { snprintf(temp_str, sizeof(temp_str), "%d, ", nexthop->backup_idx[i]); strlcat(backup_str, temp_str, sizeof(backup_str)); } } wgt_str[0] = '\0'; if (nexthop->weight) snprintf(wgt_str, sizeof(wgt_str), "wgt %d,", nexthop->weight); zlog_debug("%s: %s %s[%u] %svrf %s(%u) %s%s with flags %s%s%s%s%s%s%s%s%s", straddr, (nexthop->rparent ? " NH" : "NH"), nhname, nexthop->ifindex, label_str, vrf ? vrf->name : "Unknown", nexthop->vrf_id, wgt_str, backup_str, (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) ? "ACTIVE " : ""), (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) ? "FIB " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE) ? "RECURSIVE " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK) ? "ONLINK " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE) ? "DUPLICATE " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED) ? "FILTERED " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP) ? "BACKUP " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE) ? "SRTE " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN) ? "EVPN " : "")); } /* This function dumps the contents of a given RE entry into * standard debug log. Calling function name and IP prefix in * question are passed as 1st and 2nd arguments. */ void _route_entry_dump(const char *func, union prefixconstptr pp, union prefixconstptr src_pp, const struct route_entry *re) { const struct prefix *src_p = src_pp.p; bool is_srcdst = src_p && src_p->prefixlen; char straddr[PREFIX_STRLEN]; char srcaddr[PREFIX_STRLEN]; char flags_buf[128]; char status_buf[128]; struct nexthop *nexthop; struct vrf *vrf = vrf_lookup_by_id(re->vrf_id); struct nexthop_group *nhg; prefix2str(pp, straddr, sizeof(straddr)); zlog_debug("%s: dumping RE entry %p for %s%s%s vrf %s(%u)", func, (const void *)re, straddr, is_srcdst ? " from " : "", is_srcdst ? prefix2str(src_pp, srcaddr, sizeof(srcaddr)) : "", VRF_LOGNAME(vrf), re->vrf_id); zlog_debug("%s: uptime == %lu, type == %u, instance == %d, table == %d", straddr, (unsigned long)re->uptime, re->type, re->instance, re->table); zlog_debug( "%s: metric == %u, mtu == %u, distance == %u, flags == %sstatus == %s", straddr, re->metric, re->mtu, re->distance, zclient_dump_route_flags(re->flags, flags_buf, sizeof(flags_buf)), _dump_re_status(re, status_buf, sizeof(status_buf))); zlog_debug("%s: nexthop_num == %u, nexthop_active_num == %u", straddr, nexthop_group_nexthop_num(&(re->nhe->nhg)), nexthop_group_active_nexthop_num(&(re->nhe->nhg))); /* Dump nexthops */ for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) _route_entry_dump_nh(re, straddr, nexthop); if (zebra_nhg_get_backup_nhg(re->nhe)) { zlog_debug("%s: backup nexthops:", straddr); nhg = zebra_nhg_get_backup_nhg(re->nhe); for (ALL_NEXTHOPS_PTR(nhg, nexthop)) _route_entry_dump_nh(re, straddr, nexthop); } zlog_debug("%s: dump complete", straddr); } static int rib_meta_queue_early_route_add(struct meta_queue *mq, void *data) { struct zebra_early_route *ere = data; listnode_add(mq->subq[META_QUEUE_EARLY_ROUTE], data); mq->size++; if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( "Route %pFX(%u) queued for processing into sub-queue %s", &ere->p, ere->re->vrf_id, subqueue2str(META_QUEUE_EARLY_ROUTE)); return 0; } struct route_entry *zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, uint8_t instance, uint32_t flags, uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu, uint8_t distance, route_tag_t tag) { struct route_entry *re; re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); re->type = type; re->instance = instance; re->distance = distance; re->flags = flags; re->metric = metric; re->mtu = mtu; re->table = table_id; re->vrf_id = vrf_id; re->uptime = monotime(NULL); re->tag = tag; re->nhe_id = nhe_id; return re; } /* * Internal route-add implementation; there are a couple of different public * signatures. Callers in this path are responsible for the memory they * allocate: if they allocate a nexthop_group or backup nexthop info, they * must free those objects. If this returns < 0, an error has occurred and the * route_entry 're' has not been captured; the caller should free that also. * * -1 -> error * 0 -> Add * 1 -> update */ int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, struct prefix_ipv6 *src_p, struct route_entry *re, struct nhg_hash_entry *re_nhe, bool startup) { struct zebra_early_route *ere; if (!re) return -1; assert(!src_p || !src_p->prefixlen || afi == AFI_IP6); ere = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(*ere)); ere->afi = afi; ere->safi = safi; ere->p = *p; if (src_p) ere->src_p = *src_p; ere->src_p_provided = !!src_p; ere->re = re; ere->re_nhe = re_nhe; ere->startup = startup; return mq_add_handler(ere, rib_meta_queue_early_route_add); } /* * Add a single route. */ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, struct prefix_ipv6 *src_p, struct route_entry *re, struct nexthop_group *ng, bool startup) { int ret; struct nhg_hash_entry nhe, *n; if (!re) return -1; /* We either need nexthop(s) or an existing nexthop id */ if (ng == NULL && re->nhe_id == 0) return -1; /* * Use a temporary nhe to convey info to the common/main api. */ zebra_nhe_init(&nhe, afi, (ng ? ng->nexthop : NULL)); if (ng) nhe.nhg.nexthop = ng->nexthop; else if (re->nhe_id > 0) nhe.id = re->nhe_id; n = zebra_nhe_copy(&nhe, 0); ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, n, startup); /* In error cases, free the route also */ if (ret < 0) XFREE(MTYPE_RE, re); return ret; } void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, uint32_t flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint8_t distance, bool fromkernel) { struct zebra_early_route *ere; struct route_entry *re = NULL; struct nhg_hash_entry *nhe = NULL; re = zebra_rib_route_entry_new(vrf_id, type, instance, flags, nhe_id, table_id, metric, 0, distance, 0); if (nh) { nhe = zebra_nhg_alloc(); nhe->nhg.nexthop = nexthop_dup(nh, NULL); } ere = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(*ere)); ere->afi = afi; ere->safi = safi; ere->p = *p; if (src_p) ere->src_p = *src_p; ere->src_p_provided = !!src_p; ere->re = re; ere->re_nhe = nhe; ere->startup = false; ere->deletion = true; ere->fromkernel = fromkernel; mq_add_handler(ere, rib_meta_queue_early_route_add); } int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, uint32_t flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu, uint8_t distance, route_tag_t tag, bool startup) { struct route_entry *re = NULL; struct nexthop nexthop = {}; struct nexthop_group ng = {}; /* Allocate new route_entry structure. */ re = zebra_rib_route_entry_new(vrf_id, type, instance, flags, nhe_id, table_id, metric, mtu, distance, tag); /* If the owner of the route supplies a shared nexthop-group id, * we'll use that. Otherwise, pass the nexthop along directly. */ if (!nhe_id) { /* Add nexthop. */ nexthop = *nh; nexthop_group_add_sorted(&ng, &nexthop); } return rib_add_multipath(afi, safi, p, src_p, re, &ng, startup); } static const char *rib_update_event2str(enum rib_update_event event) { const char *ret = "UNKNOWN"; switch (event) { case RIB_UPDATE_KERNEL: ret = "RIB_UPDATE_KERNEL"; break; case RIB_UPDATE_RMAP_CHANGE: ret = "RIB_UPDATE_RMAP_CHANGE"; break; case RIB_UPDATE_OTHER: ret = "RIB_UPDATE_OTHER"; break; case RIB_UPDATE_MAX: break; } return ret; } /* Schedule route nodes to be processed if they match the type */ static void rib_update_route_node(struct route_node *rn, int type) { struct route_entry *re, *next; bool re_changed = false; RNODE_FOREACH_RE_SAFE (rn, re, next) { if (type == ZEBRA_ROUTE_ALL || type == re->type) { SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); re_changed = true; } } if (re_changed) rib_queue_add(rn); } /* Schedule routes of a particular table (address-family) based on event. */ void rib_update_table(struct route_table *table, enum rib_update_event event, int rtype) { struct route_node *rn; if (IS_ZEBRA_DEBUG_EVENT) { struct zebra_vrf *zvrf; struct vrf *vrf; zvrf = table->info ? ((struct rib_table_info *)table->info)->zvrf : NULL; vrf = zvrf ? zvrf->vrf : NULL; zlog_debug("%s: %s VRF %s Table %u event %s Route type: %s", __func__, table->info ? afi2str( ((struct rib_table_info *)table->info)->afi) : "Unknown", VRF_LOGNAME(vrf), zvrf ? zvrf->table_id : 0, rib_update_event2str(event), zebra_route_string(rtype)); } /* Walk all routes and queue for processing, if appropriate for * the trigger event. */ for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { /* * If we are looking at a route node and the node * has already been queued we don't * need to queue it up again */ if (rn->info && CHECK_FLAG(rib_dest_from_rnode(rn)->flags, RIB_ROUTE_ANY_QUEUED)) continue; switch (event) { case RIB_UPDATE_KERNEL: rib_update_route_node(rn, ZEBRA_ROUTE_KERNEL); break; case RIB_UPDATE_RMAP_CHANGE: case RIB_UPDATE_OTHER: rib_update_route_node(rn, rtype); break; default: break; } } } static void rib_update_handle_vrf(vrf_id_t vrf_id, enum rib_update_event event, int rtype) { struct route_table *table; if (IS_ZEBRA_DEBUG_EVENT) zlog_debug("%s: Handling VRF %s event %s", __func__, vrf_id_to_name(vrf_id), rib_update_event2str(event)); /* Process routes of interested address-families. */ table = zebra_vrf_table(AFI_IP, SAFI_UNICAST, vrf_id); if (table) rib_update_table(table, event, rtype); table = zebra_vrf_table(AFI_IP6, SAFI_UNICAST, vrf_id); if (table) rib_update_table(table, event, rtype); } static void rib_update_handle_vrf_all(enum rib_update_event event, int rtype) { struct zebra_router_table *zrt; if (IS_ZEBRA_DEBUG_EVENT) zlog_debug("%s: Handling VRF (ALL) event %s", __func__, rib_update_event2str(event)); /* Just iterate over all the route tables, rather than vrf lookups */ RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) rib_update_table(zrt->table, event, rtype); } struct rib_update_ctx { enum rib_update_event event; bool vrf_all; vrf_id_t vrf_id; }; static struct rib_update_ctx *rib_update_ctx_init(vrf_id_t vrf_id, enum rib_update_event event) { struct rib_update_ctx *ctx; ctx = XCALLOC(MTYPE_RIB_UPDATE_CTX, sizeof(struct rib_update_ctx)); ctx->event = event; ctx->vrf_id = vrf_id; return ctx; } static void rib_update_ctx_fini(struct rib_update_ctx **ctx) { XFREE(MTYPE_RIB_UPDATE_CTX, *ctx); } static void rib_update_handler(struct thread *thread) { struct rib_update_ctx *ctx; ctx = THREAD_ARG(thread); if (ctx->vrf_all) rib_update_handle_vrf_all(ctx->event, ZEBRA_ROUTE_ALL); else rib_update_handle_vrf(ctx->vrf_id, ctx->event, ZEBRA_ROUTE_ALL); rib_update_ctx_fini(&ctx); } /* * Thread list to ensure we don't schedule a ton of events * if interfaces are flapping for instance. */ static struct thread *t_rib_update_threads[RIB_UPDATE_MAX]; void rib_update_finish(void) { int i; for (i = RIB_UPDATE_KERNEL; i < RIB_UPDATE_MAX; i++) { if (thread_is_scheduled(t_rib_update_threads[i])) { struct rib_update_ctx *ctx; ctx = THREAD_ARG(t_rib_update_threads[i]); rib_update_ctx_fini(&ctx); THREAD_OFF(t_rib_update_threads[i]); } } } /* Schedule a RIB update event for all vrfs */ void rib_update(enum rib_update_event event) { struct rib_update_ctx *ctx; if (thread_is_scheduled(t_rib_update_threads[event])) return; if (zebra_router_in_shutdown()) return; ctx = rib_update_ctx_init(0, event); ctx->vrf_all = true; thread_add_event(zrouter.master, rib_update_handler, ctx, 0, &t_rib_update_threads[event]); if (IS_ZEBRA_DEBUG_EVENT) zlog_debug("%s: Scheduled VRF (ALL), event %s", __func__, rib_update_event2str(event)); } /* Delete self installed routes after zebra is relaunched. */ void rib_sweep_table(struct route_table *table) { struct route_node *rn; struct route_entry *re; struct route_entry *next; struct nexthop *nexthop; if (!table) return; if (IS_ZEBRA_DEBUG_RIB) zlog_debug("%s: starting", __func__); for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { RNODE_FOREACH_RE_SAFE (rn, re, next) { if (IS_ZEBRA_DEBUG_RIB) route_entry_dump(&rn->p, NULL, re); if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) continue; if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_SELFROUTE)) continue; /* * If routes are older than startup_time then * we know we read them in from the kernel. * As such we can safely remove them. */ if (zrouter.startup_time < re->uptime) continue; /* * So we are starting up and have received * routes from the kernel that we have installed * from a previous run of zebra but not cleaned * up ( say a kill -9 ) * But since we haven't actually installed * them yet( we received them from the kernel ) * we don't think they are active. * So let's pretend they are active to actually * remove them. * In all honesty I'm not sure if we should * mark them as active when we receive them * This is startup only so probably ok. * * If we ever decide to move rib_sweep_table * to a different spot (ie startup ) * this decision needs to be revisited */ SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); rib_uninstall_kernel(rn, re); rib_delnode(rn, re); } } if (IS_ZEBRA_DEBUG_RIB) zlog_debug("%s: ends", __func__); } /* Sweep all RIB tables. */ void rib_sweep_route(struct thread *t) { struct vrf *vrf; struct zebra_vrf *zvrf; RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { if ((zvrf = vrf->info) == NULL) continue; rib_sweep_table(zvrf->table[AFI_IP][SAFI_UNICAST]); rib_sweep_table(zvrf->table[AFI_IP6][SAFI_UNICAST]); } zebra_router_sweep_route(); zebra_router_sweep_nhgs(); } /* Remove specific by protocol routes from 'table'. */ unsigned long rib_score_proto_table(uint8_t proto, unsigned short instance, struct route_table *table) { struct route_node *rn; struct route_entry *re; struct route_entry *next; unsigned long n = 0; if (table) for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) RNODE_FOREACH_RE_SAFE (rn, re, next) { if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) continue; if (re->type == proto && re->instance == instance) { rib_delnode(rn, re); n++; } } return n; } /* Remove specific by protocol routes. */ unsigned long rib_score_proto(uint8_t proto, unsigned short instance) { struct vrf *vrf; struct zebra_vrf *zvrf; struct other_route_table *ort; unsigned long cnt = 0; RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) { zvrf = vrf->info; if (!zvrf) continue; cnt += rib_score_proto_table(proto, instance, zvrf->table[AFI_IP][SAFI_UNICAST]) + rib_score_proto_table( proto, instance, zvrf->table[AFI_IP6][SAFI_UNICAST]); frr_each(otable, &zvrf->other_tables, ort) cnt += rib_score_proto_table(proto, instance, ort->table); } return cnt; } /* Close RIB and clean up kernel routes. */ void rib_close_table(struct route_table *table) { struct route_node *rn; rib_dest_t *dest; if (!table) return; for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { dest = rib_dest_from_rnode(rn); if (dest && dest->selected_fib) { rib_uninstall_kernel(rn, dest->selected_fib); dest->selected_fib = NULL; } } } /* * Handler for async dataplane results after a pseudowire installation */ static void handle_pw_result(struct zebra_dplane_ctx *ctx) { struct zebra_pw *pw; struct zebra_vrf *vrf; /* The pseudowire code assumes success - we act on an error * result for installation attempts here. */ if (dplane_ctx_get_op(ctx) != DPLANE_OP_PW_INSTALL) return; if (dplane_ctx_get_status(ctx) != ZEBRA_DPLANE_REQUEST_SUCCESS) { vrf = zebra_vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); pw = zebra_pw_find(vrf, dplane_ctx_get_ifname(ctx)); if (pw) zebra_pw_install_failure(pw, dplane_ctx_get_pw_status(ctx)); } } /* * Handle results from the dataplane system. Dequeue update context * structs, dispatch to appropriate internal handlers. */ static void rib_process_dplane_results(struct thread *thread) { struct zebra_dplane_ctx *ctx; struct dplane_ctx_q ctxlist; bool shut_p = false; #ifdef HAVE_SCRIPTING char *script_name = frrscript_names_get_script_name(ZEBRA_ON_RIB_PROCESS_HOOK_CALL); int ret = 1; struct frrscript *fs = NULL; if (script_name) { fs = frrscript_new(script_name); if (fs) ret = frrscript_load(fs, ZEBRA_ON_RIB_PROCESS_HOOK_CALL, NULL); } #endif /* HAVE_SCRIPTING */ /* Dequeue a list of completed updates with one lock/unlock cycle */ do { TAILQ_INIT(&ctxlist); /* Take lock controlling queue of results */ frr_with_mutex (&dplane_mutex) { /* Dequeue list of context structs */ dplane_ctx_list_append(&ctxlist, &rib_dplane_q); } /* Dequeue context block */ ctx = dplane_ctx_dequeue(&ctxlist); /* If we've emptied the results queue, we're done */ if (ctx == NULL) break; /* If zebra is shutting down, avoid processing results, * just drain the results queue. */ shut_p = atomic_load_explicit(&zrouter.in_shutdown, memory_order_relaxed); if (shut_p) { while (ctx) { dplane_ctx_fini(&ctx); ctx = dplane_ctx_dequeue(&ctxlist); } continue; } while (ctx) { #ifdef HAVE_SCRIPTING if (ret == 0) frrscript_call(fs, ZEBRA_ON_RIB_PROCESS_HOOK_CALL, ("ctx", ctx)); #endif /* HAVE_SCRIPTING */ switch (dplane_ctx_get_op(ctx)) { case DPLANE_OP_ROUTE_INSTALL: case DPLANE_OP_ROUTE_UPDATE: case DPLANE_OP_ROUTE_DELETE: /* Bit of special case for route updates * that were generated by async notifications: * we don't want to continue processing these * in the rib. */ if (dplane_ctx_get_notif_provider(ctx) == 0) rib_process_result(ctx); break; case DPLANE_OP_ROUTE_NOTIFY: rib_process_dplane_notify(ctx); break; case DPLANE_OP_NH_INSTALL: case DPLANE_OP_NH_UPDATE: case DPLANE_OP_NH_DELETE: zebra_nhg_dplane_result(ctx); break; case DPLANE_OP_LSP_INSTALL: case DPLANE_OP_LSP_UPDATE: case DPLANE_OP_LSP_DELETE: /* Bit of special case for LSP updates * that were generated by async notifications: * we don't want to continue processing these. */ if (dplane_ctx_get_notif_provider(ctx) == 0) zebra_mpls_lsp_dplane_result(ctx); break; case DPLANE_OP_LSP_NOTIFY: zebra_mpls_process_dplane_notify(ctx); break; case DPLANE_OP_PW_INSTALL: case DPLANE_OP_PW_UNINSTALL: handle_pw_result(ctx); break; case DPLANE_OP_SYS_ROUTE_ADD: case DPLANE_OP_SYS_ROUTE_DELETE: break; case DPLANE_OP_MAC_INSTALL: case DPLANE_OP_MAC_DELETE: zebra_vxlan_handle_result(ctx); break; case DPLANE_OP_RULE_ADD: case DPLANE_OP_RULE_DELETE: case DPLANE_OP_RULE_UPDATE: case DPLANE_OP_IPTABLE_ADD: case DPLANE_OP_IPTABLE_DELETE: case DPLANE_OP_IPSET_ADD: case DPLANE_OP_IPSET_DELETE: case DPLANE_OP_IPSET_ENTRY_ADD: case DPLANE_OP_IPSET_ENTRY_DELETE: zebra_pbr_dplane_result(ctx); break; case DPLANE_OP_INTF_ADDR_ADD: case DPLANE_OP_INTF_ADDR_DEL: case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: case DPLANE_OP_INTF_NETCONFIG: zebra_if_dplane_result(ctx); break; case DPLANE_OP_TC_INSTALL: case DPLANE_OP_TC_UPDATE: case DPLANE_OP_TC_DELETE: break; /* Some op codes not handled here */ case DPLANE_OP_ADDR_INSTALL: case DPLANE_OP_ADDR_UNINSTALL: case DPLANE_OP_NEIGH_INSTALL: case DPLANE_OP_NEIGH_UPDATE: case DPLANE_OP_NEIGH_DELETE: case DPLANE_OP_NEIGH_IP_INSTALL: case DPLANE_OP_NEIGH_IP_DELETE: case DPLANE_OP_VTEP_ADD: case DPLANE_OP_VTEP_DELETE: case DPLANE_OP_NEIGH_DISCOVER: case DPLANE_OP_BR_PORT_UPDATE: case DPLANE_OP_NEIGH_TABLE_UPDATE: case DPLANE_OP_GRE_SET: case DPLANE_OP_NONE: break; } /* Dispatch by op code */ dplane_ctx_fini(&ctx); ctx = dplane_ctx_dequeue(&ctxlist); } } while (1); #ifdef HAVE_SCRIPTING if (fs) frrscript_delete(fs); #endif } /* * Results are returned from the dataplane subsystem, in the context of * the dataplane pthread. We enqueue the results here for processing by * the main thread later. */ static int rib_dplane_results(struct dplane_ctx_q *ctxlist) { /* Take lock controlling queue of results */ frr_with_mutex (&dplane_mutex) { /* Enqueue context blocks */ dplane_ctx_list_append(&rib_dplane_q, ctxlist); } /* Ensure event is signalled to zebra main pthread */ thread_add_event(zrouter.master, rib_process_dplane_results, NULL, 0, &t_dplane); return 0; } /* * Ensure there are no empty slots in the route_info array. * Every route type in zebra should be present there. */ static void check_route_info(void) { int len = array_size(route_info); /* * ZEBRA_ROUTE_SYSTEM is special cased since * its key is 0 anyway. * * ZEBRA_ROUTE_ALL is also ignored. */ for (int i = 0; i < len; i++) { assert(route_info[i].key >= ZEBRA_ROUTE_SYSTEM && route_info[i].key < ZEBRA_ROUTE_MAX); assert(route_info[i].meta_q_map < MQ_SIZE); } } /* Routing information base initialize. */ void rib_init(void) { check_route_info(); rib_queue_init(); /* Init dataplane, and register for results */ pthread_mutex_init(&dplane_mutex, NULL); TAILQ_INIT(&rib_dplane_q); zebra_dplane_init(rib_dplane_results); } /* * vrf_id_get_next * * Get the first vrf id that is greater than the given vrf id if any. * * Returns true if a vrf id was found, false otherwise. */ static inline int vrf_id_get_next(vrf_id_t vrf_id, vrf_id_t *next_id_p) { struct vrf *vrf; vrf = vrf_lookup_by_id(vrf_id); if (vrf) { vrf = RB_NEXT(vrf_id_head, vrf); if (vrf) { *next_id_p = vrf->vrf_id; return 1; } } return 0; } /* * rib_tables_iter_next * * Returns the next table in the iteration. */ struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter) { struct route_table *table; /* * Array that helps us go over all AFI/SAFI combinations via one * index. */ static const struct { afi_t afi; safi_t safi; } afi_safis[] = { {AFI_IP, SAFI_UNICAST}, {AFI_IP, SAFI_MULTICAST}, {AFI_IP, SAFI_LABELED_UNICAST}, {AFI_IP6, SAFI_UNICAST}, {AFI_IP6, SAFI_MULTICAST}, {AFI_IP6, SAFI_LABELED_UNICAST}, }; table = NULL; switch (iter->state) { case RIB_TABLES_ITER_S_INIT: iter->vrf_id = VRF_DEFAULT; iter->afi_safi_ix = -1; /* Fall through */ case RIB_TABLES_ITER_S_ITERATING: iter->afi_safi_ix++; while (1) { while (iter->afi_safi_ix < (int)array_size(afi_safis)) { table = zebra_vrf_table( afi_safis[iter->afi_safi_ix].afi, afi_safis[iter->afi_safi_ix].safi, iter->vrf_id); if (table) break; iter->afi_safi_ix++; } /* * Found another table in this vrf. */ if (table) break; /* * Done with all tables in the current vrf, go to the * next * one. */ if (!vrf_id_get_next(iter->vrf_id, &iter->vrf_id)) break; iter->afi_safi_ix = 0; } break; case RIB_TABLES_ITER_S_DONE: return NULL; } if (table) iter->state = RIB_TABLES_ITER_S_ITERATING; else iter->state = RIB_TABLES_ITER_S_DONE; return table; }