diff options
Diffstat (limited to 'zebra/zebra_gr.c')
-rw-r--r-- | zebra/zebra_gr.c | 672 |
1 files changed, 672 insertions, 0 deletions
diff --git a/zebra/zebra_gr.c b/zebra/zebra_gr.c new file mode 100644 index 0000000..39fd864 --- /dev/null +++ b/zebra/zebra_gr.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra GR related helper functions. + * + * Portions: + * Copyright (C) 2019 VMware, Inc. + * et al. + */ + +#include <zebra.h> +#include <libgen.h> + +#include "lib/prefix.h" +#include "lib/command.h" +#include "lib/if.h" +#include "frrevent.h" +#include "lib/stream.h" +#include "lib/memory.h" +#include "lib/table.h" +#include "lib/network.h" +#include "lib/sockunion.h" +#include "lib/log.h" +#include "lib/zclient.h" +#include "lib/privs.h" +#include "lib/network.h" +#include "lib/buffer.h" +#include "lib/nexthop.h" +#include "lib/vrf.h" +#include "lib/libfrr.h" +#include "lib/sockopt.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/zapi_msg.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_GR, "GR"); + +/* + * Forward declaration. + */ +static struct zserv *zebra_gr_find_stale_client(struct zserv *client); +static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread); +static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info); +static void zebra_gr_process_client_stale_routes(struct zserv *client, + struct client_gr_info *info); +static void zebra_gr_delete_stale_route_table_afi(struct event *event); +/* + * Debug macros. + */ +#define LOG_GR(msg, ...) \ + do { \ + if (IS_ZEBRA_DEBUG_EVENT) \ + zlog_debug(msg, ##__VA_ARGS__); \ + } while (0) + +/* + * Client connection functions + */ + +/* + * Function to clean all the stale clients, + * function will also clean up all per instance + * capabilities that are exchanged. + */ +void zebra_gr_stale_client_cleanup(struct list *client_list) +{ + struct listnode *node, *nnode; + struct zserv *s_client = NULL; + struct client_gr_info *info, *ninfo; + + /* Find the stale client */ + for (ALL_LIST_ELEMENTS(client_list, node, nnode, s_client)) { + + LOG_GR("%s: Stale client %s is being deleted", __func__, + zebra_route_string(s_client->proto)); + + TAILQ_FOREACH_SAFE (info, &s_client->gr_info_queue, gr_info, + ninfo) { + + /* Cancel the stale timer */ + if (info->t_stale_removal != NULL) { + EVENT_OFF(info->t_stale_removal); + info->do_delete = true; + /* Process the stale routes */ + event_execute( + zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, + info, 0, NULL); + } + } + } +} + +/* + * A helper function to create client info. + */ +static struct client_gr_info *zebra_gr_client_info_create(struct zserv *client) +{ + struct client_gr_info *info; + + info = XCALLOC(MTYPE_ZEBRA_GR, sizeof(struct client_gr_info)); + + info->stale_client_ptr = client; + + TAILQ_INSERT_TAIL(&(client->gr_info_queue), info, gr_info); + return info; +} + +/* + * A helper function to delete and destroy client info. + */ +static void zebra_gr_client_info_delete(struct zserv *client, + struct client_gr_info *info) +{ + struct vrf *vrf = vrf_lookup_by_id(info->vrf_id); + + TAILQ_REMOVE(&(client->gr_info_queue), info, gr_info); + + EVENT_OFF(info->t_stale_removal); + + LOG_GR("%s: Instance info is being deleted for client %s vrf %s(%u)", + __func__, zebra_route_string(client->proto), VRF_LOGNAME(vrf), + info->vrf_id); + + /* Delete all the stale routes. */ + info->do_delete = true; + zebra_gr_delete_stale_routes(info); + + XFREE(MTYPE_ZEBRA_GR, info); +} + +/* + * Function to handle client when it disconnect. + */ +int32_t zebra_gr_client_disconnect(struct zserv *client) +{ + struct zserv *stale_client; + struct timeval tv; + struct client_gr_info *info = NULL; + + /* Find the stale client */ + stale_client = zebra_gr_find_stale_client(client); + + /* + * We should never be here. + */ + if (stale_client) { + LOG_GR("%s: Stale client %s exist, we should not be here!", + __func__, zebra_route_string(client->proto)); + assert(0); + } + + client->restart_time = monotime(&tv); + + /* For all the GR instance start the stale removal timer. */ + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (ZEBRA_CLIENT_GR_ENABLED(info->capabilities) + && (info->t_stale_removal == NULL)) { + struct vrf *vrf = vrf_lookup_by_id(info->vrf_id); + + event_add_timer( + zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, info, + info->stale_removal_time, + &info->t_stale_removal); + info->stale_client_ptr = client; + info->stale_client = true; + LOG_GR("%s: Client %s vrf %s(%u) Stale timer update to %d", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id, + info->stale_removal_time); + } + } + + listnode_add(zrouter.stale_client_list, client); + + return 0; +} + +/* + * Function to delete stale client + */ +static void zebra_gr_delete_stale_client(struct client_gr_info *info) +{ + struct client_gr_info *bgp_info; + struct zserv *s_client = NULL; + struct vrf *vrf = vrf_lookup_by_id(info->vrf_id); + + s_client = info->stale_client_ptr; + + if (!s_client || !info->stale_client) + return; + + /* + * If there are bgp instances with the stale delete timer pending + * then stale client is not deleted + */ + if ((s_client->gr_instance_count > 0) && info->gr_enable) + s_client->gr_instance_count--; + + TAILQ_REMOVE(&(s_client->gr_info_queue), info, gr_info); + + LOG_GR("%s: Client %s gr count %d", __func__, + zebra_route_string(s_client->proto), + s_client->gr_instance_count); + + TAILQ_FOREACH (bgp_info, &s_client->gr_info_queue, gr_info) { + if (bgp_info->t_stale_removal != NULL) + return; + } + + LOG_GR("%s: Client %s vrf %s(%u) is being deleted", __func__, + zebra_route_string(s_client->proto), VRF_LOGNAME(vrf), + info->vrf_id); + + TAILQ_INIT(&(s_client->gr_info_queue)); + listnode_delete(zrouter.stale_client_list, s_client); + if (info->stale_client) + zserv_client_delete(s_client); + XFREE(MTYPE_ZEBRA_GR, info); +} + +/* + * Function to find stale client. + */ +static struct zserv *zebra_gr_find_stale_client(struct zserv *client) +{ + struct listnode *node, *nnode; + struct zserv *stale_client; + + /* Find the stale client */ + for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode, + stale_client)) { + if (client->proto == stale_client->proto + && client->instance == stale_client->instance) { + return stale_client; + } + } + + return NULL; +} + +/* + * Function to handle reconnect of client post restart. + */ +void zebra_gr_client_reconnect(struct zserv *client) +{ + struct listnode *node, *nnode; + struct zserv *old_client = NULL; + struct client_gr_info *info = NULL; + + /* Find the stale client */ + for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode, + old_client)) { + if (client->proto == old_client->proto + && client->instance == old_client->instance) + break; + } + + /* Copy the timers */ + if (!old_client) + return; + + client->gr_instance_count = old_client->gr_instance_count; + client->restart_time = old_client->restart_time; + + LOG_GR("%s : old client %s, gr_instance_count %d", __func__, + zebra_route_string(old_client->proto), + old_client->gr_instance_count); + + if (TAILQ_FIRST(&old_client->gr_info_queue)) { + TAILQ_CONCAT(&client->gr_info_queue, &old_client->gr_info_queue, + gr_info); + TAILQ_INIT(&old_client->gr_info_queue); + } + + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + info->stale_client_ptr = client; + info->stale_client = false; + } + + /* Delete the stale client */ + listnode_delete(zrouter.stale_client_list, old_client); + /* Delete old client */ + zserv_client_delete(old_client); +} + +struct zebra_gr_afi_clean { + struct client_gr_info *info; + afi_t afi; + uint8_t proto; + uint8_t instance; + + struct event *t_gac; +}; + +/* + * Functions to deal with capabilities + */ + +/* + * Function to decode and call appropriate functions + * to handle client capabilities. + */ +void zread_client_capabilities(ZAPI_HANDLER_ARGS) +{ + struct zapi_cap api; + struct client_gr_info *info = NULL; + struct stream *s; + struct vrf *vrf; + + s = msg; + + if (zapi_capabilities_decode(s, &api)) { + LOG_GR("%s: Error in reading capabilities for client %s", + __func__, zebra_route_string(client->proto)); + return; + } + + vrf = vrf_lookup_by_id(api.vrf_id); + + /* + * If this ever matters uncomment and add safi to the + * arrays as needed to track + */ + if (api.safi != SAFI_UNICAST) + return; + + /* GR only for dynamic clients */ + if (client->proto <= ZEBRA_ROUTE_CONNECT) { + LOG_GR("%s: GR capabilities for client %s not supported", + __func__, zebra_route_string(client->proto)); + return; + } + + /* Find the bgp information for the specified vrf id */ + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (info->vrf_id == api.vrf_id) + break; + } + + /* + * If the command is delete, then cancel the stale timer and + * delete the bgp info + */ + switch (api.cap) { + case ZEBRA_CLIENT_GR_DISABLE: + if (!info) + return; + + LOG_GR("%s: Client %s instance GR disabled count %d", __func__, + zebra_route_string(client->proto), + client->gr_instance_count); + + if ((info->gr_enable) && (client->gr_instance_count > 0)) + client->gr_instance_count--; + + zebra_gr_client_info_delete(client, info); + break; + case ZEBRA_CLIENT_GR_CAPABILITIES: + /* Allocate bgp info */ + if (!info) + info = zebra_gr_client_info_create(client); + + /* Update other parameters */ + if (!info->gr_enable) { + client->gr_instance_count++; + + LOG_GR("%s: Cient %s vrf %s(%u) GR enabled count %d", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), api.vrf_id, + client->gr_instance_count); + + info->capabilities = api.cap; + info->stale_removal_time = api.stale_removal_time; + info->vrf_id = api.vrf_id; + info->gr_enable = true; + } + break; + case ZEBRA_CLIENT_RIB_STALE_TIME: + LOG_GR("%s: Client %s stale time update event", __func__, + zebra_route_string(client->proto)); + + /* Update the stale removal timer */ + if (info && info->t_stale_removal == NULL) { + + LOG_GR("%s: vrf %s(%u) Stale time: %d is now update to: %d", + __func__, VRF_LOGNAME(vrf), info->vrf_id, + info->stale_removal_time, + api.stale_removal_time); + + info->stale_removal_time = api.stale_removal_time; + } + + break; + case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE: + if (!info) { + LOG_GR("%s: Client %s route update complete for AFI %d, SAFI %d, no Graceful Restart communication, returning", + __func__, zebra_route_string(client->proto), + api.afi, api.safi); + return; + } + + LOG_GR("%s: Client %s vrf %s(%u) route update complete for AFI %d, SAFI %d", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id, api.afi, api.safi); + info->route_sync[api.afi] = true; + + /* + * Schedule for after anything already in the meta Q + */ + rib_add_gr_run(api.afi, api.vrf_id, client->proto, + client->instance); + zebra_gr_process_client_stale_routes(client, info); + break; + case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING: + if (!info) { + LOG_GR("%s: Client %s route update pending for AFI %d, SAFI %d", + __func__, zebra_route_string(client->proto), + api.afi, api.safi); + } else { + LOG_GR("%s: Client %s vrf %s(%u) route update pending for AFI %d, SAFI %d", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id, api.afi, + api.safi); + + info->af_enabled[api.afi] = true; + } + break; + } +} + +/* + * Stale route handling + */ + +/* + * Delete all the stale routes that have not been refreshed + * post restart. + */ +static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread) +{ + struct client_gr_info *info = EVENT_ARG(thread); + int32_t cnt = 0; + struct zserv *client; + struct vrf *vrf = vrf_lookup_by_id(info->vrf_id); + + client = (struct zserv *)info->stale_client_ptr; + + cnt = zebra_gr_delete_stale_routes(info); + + /* Restart the timer */ + if (cnt > 0) { + LOG_GR("%s: Client %s vrf %s(%u) processed %d routes. Start timer again", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id, cnt); + + event_add_timer(zrouter.master, + zebra_gr_route_stale_delete_timer_expiry, info, + ZEBRA_DEFAULT_STALE_UPDATE_DELAY, + &info->t_stale_removal); + } else { + /* No routes to delete for the VRF */ + LOG_GR("%s: Client %s vrf %s(%u) all stale routes processed", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id); + + zebra_gr_delete_stale_client(info); + } +} + + +/* + * Function to process to check if route entry is stale + * or has been updated. + * + * Returns true when a node is deleted else false + */ +static bool zebra_gr_process_route_entry(struct zserv *client, + struct route_node *rn, + struct route_entry *re) +{ + /* If the route is not refreshed after restart, delete the entry */ + if (re->uptime < client->restart_time) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%s: Client %s stale route %pFX is deleted", + __func__, zebra_route_string(client->proto), + &rn->p); + rib_delnode(rn, re); + + return true; + } + + return false; +} + +static void zebra_gr_delete_stale_route_table_afi(struct event *event) +{ + struct zebra_gr_afi_clean *gac = EVENT_ARG(event); + struct route_table *table; + struct route_node *rn; + struct route_entry *re, *next; + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(gac->info->vrf_id); + int32_t n = 0; + + if (!zvrf) + goto done; + + table = zvrf->table[gac->afi][SAFI_UNICAST]; + if (!table) + goto done; + + for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) { + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) + continue; + + /* If the route refresh is received + * after restart then do not delete + * the route + */ + + if (re->type == gac->proto && + re->instance == gac->instance && + zebra_gr_process_route_entry( + gac->info->stale_client_ptr, rn, re)) + n++; + + /* If the max route count is reached + * then timer thread will be restarted + * Store the current prefix and afi + */ + if ((n >= ZEBRA_MAX_STALE_ROUTE_COUNT) && + (gac->info->do_delete == false)) { + event_add_timer( + zrouter.master, + zebra_gr_delete_stale_route_table_afi, + gac, ZEBRA_DEFAULT_STALE_UPDATE_DELAY, + &gac->t_gac); + } + } + } + +done: + XFREE(MTYPE_ZEBRA_GR, gac); +} + +/* + * This function walks through the route table for all vrf and deletes + * the stale routes for the restarted client specified by the protocol + * type + */ +static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info, + struct zebra_vrf *zvrf) +{ + afi_t afi; + uint8_t proto; + uint16_t instance; + struct zserv *s_client; + + s_client = info->stale_client_ptr; + if (s_client == NULL) { + LOG_GR("%s: Stale client %s(%u) not present", __func__, + zvrf->vrf->name, zvrf->vrf->vrf_id); + return -1; + } + + proto = s_client->proto; + instance = s_client->instance; + + LOG_GR("%s: Client %s %s(%u) stale routes are being deleted", __func__, + zebra_route_string(proto), zvrf->vrf->name, zvrf->vrf->vrf_id); + + /* Process routes for all AFI */ + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + + /* + * Schedule for immediately after anything in the + * meta-Q + */ + rib_add_gr_run(afi, info->vrf_id, proto, instance); + } + return 0; +} + +/* + * Delete the stale routes when client is restarted and routes are not + * refreshed within the stale timeout + */ +static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info) +{ + struct zebra_vrf *zvrf; + uint64_t cnt = 0; + + if (info == NULL) + return -1; + + zvrf = zebra_vrf_lookup_by_id(info->vrf_id); + if (zvrf == NULL) { + LOG_GR("%s: Invalid VRF entry %u", __func__, info->vrf_id); + return -1; + } + + cnt = zebra_gr_delete_stale_route(info, zvrf); + return cnt; +} + +/* + * This function checks if route update for all AFI, SAFI is completed + * and cancels the stale timer + */ +static void zebra_gr_process_client_stale_routes(struct zserv *client, + struct client_gr_info *info) +{ + afi_t afi; + + if (info == NULL) + return; + + /* Check if route update completed for all AFI, SAFI */ + for (afi = AFI_IP; afi < AFI_MAX; afi++) { + if (info->af_enabled[afi] && !info->route_sync[afi]) { + struct vrf *vrf = vrf_lookup_by_id(info->vrf_id); + + LOG_GR("%s: Client %s vrf: %s(%u) route update not completed for AFI %d", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id, afi); + return; + } + } + + /* + * Route update completed for all AFI, SAFI + * Cancel the stale timer, routes are already being processed + */ + if (info->t_stale_removal) { + struct vrf *vrf = vrf_lookup_by_id(info->vrf_id); + + LOG_GR("%s: Client %s canceled stale delete timer vrf %s(%d)", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id); + EVENT_OFF(info->t_stale_removal); + } +} + +void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto, + uint8_t instance) +{ + struct zserv *client = zserv_find_client(proto, instance); + struct client_gr_info *info = NULL; + struct zebra_gr_afi_clean *gac; + + if (client == NULL) + return; + + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (info->vrf_id == vrf_id) + break; + } + + if (info == NULL) + return; + + gac = XCALLOC(MTYPE_ZEBRA_GR, sizeof(*gac)); + gac->info = info; + gac->afi = afi; + gac->proto = proto; + gac->instance = instance; + + event_add_event(zrouter.master, zebra_gr_delete_stale_route_table_afi, + gac, 0, &gac->t_gac); +} |