/* * This is an implementation of PIM MLAG Functionality * * Module name: PIM MLAG * * Author: sathesh Kumar karra * * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program; see the file COPYING; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include "pimd.h" #include "pim_mlag.h" #include "pim_upstream.h" #include "pim_vxlan.h" extern struct zclient *zclient; #define PIM_MLAG_METADATA_LEN 4 /*********************ACtual Data processing *****************************/ /* TBD: There can be duplicate updates to FIB***/ #define PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil) \ do { \ if (PIM_DEBUG_MLAG) \ zlog_debug( \ "%s: add Dual-active Interface to %s " \ "to oil:%s", \ __func__, ch->interface->name, ch->sg_str); \ pim_channel_update_oif_mute(ch_oil, ch->interface->info); \ } while (0) #define PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil) \ do { \ if (PIM_DEBUG_MLAG) \ zlog_debug( \ "%s: del Dual-active Interface to %s " \ "to oil:%s", \ __func__, ch->interface->name, ch->sg_str); \ pim_channel_update_oif_mute(ch_oil, ch->interface->info); \ } while (0) static void pim_mlag_calculate_df_for_ifchannels(struct pim_upstream *up, bool is_df) { struct listnode *chnode; struct listnode *chnextnode; struct pim_ifchannel *ch; struct pim_interface *pim_ifp = NULL; struct channel_oil *ch_oil = NULL; ch_oil = (up) ? up->channel_oil : NULL; if (!ch_oil) return; if (PIM_DEBUG_MLAG) zlog_debug("%s: Calculating DF for Dual active if-channel%s", __func__, up->sg_str); for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) { pim_ifp = (ch->interface) ? ch->interface->info : NULL; if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp)) continue; if (is_df) PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil); else PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil); } } static void pim_mlag_inherit_mlag_flags(struct pim_upstream *up, bool is_df) { struct listnode *listnode; struct pim_upstream *child; struct listnode *chnode; struct listnode *chnextnode; struct pim_ifchannel *ch; struct pim_interface *pim_ifp = NULL; struct channel_oil *ch_oil = NULL; if (PIM_DEBUG_MLAG) zlog_debug("%s: Updating DF for uptream:%s children", __func__, up->sg_str); for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) { pim_ifp = (ch->interface) ? ch->interface->info : NULL; if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp)) continue; for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, child)) { if (PIM_DEBUG_MLAG) zlog_debug("%s: Updating DF for child:%s", __func__, child->sg_str); ch_oil = (child) ? child->channel_oil : NULL; if (!ch_oil) continue; if (is_df) PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil); else PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil); } } } /******************************* pim upstream sync **************************/ /* Update DF role for the upstream entry and return true on role change */ bool pim_mlag_up_df_role_update(struct pim_instance *pim, struct pim_upstream *up, bool is_df, const char *reason) { struct channel_oil *c_oil = up->channel_oil; bool old_is_df = !PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags); struct pim_interface *vxlan_ifp; if (is_df == old_is_df) { if (PIM_DEBUG_MLAG) zlog_debug( "%s: Ignoring Role update for %s, since no change", __func__, up->sg_str); return false; } if (PIM_DEBUG_MLAG) zlog_debug("local MLAG mroute %s role changed to %s based on %s", up->sg_str, is_df ? "df" : "non-df", reason); if (is_df) PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(up->flags); else PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags); /* * This Upstream entry synced to peer Because of Dual-active * Interface configuration */ if (PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) { pim_mlag_inherit_mlag_flags(up, is_df); pim_mlag_calculate_df_for_ifchannels(up, is_df); } /* If the DF role has changed check if ipmr-lo needs to be * muted/un-muted. Active-Active devices and vxlan termination * devices (ipmr-lo) are suppressed on the non-DF. * This may leave the mroute with the empty OIL in which case the * the forwarding entry's sole purpose is to just blackhole the flow * headed to the switch. */ if (c_oil) { vxlan_ifp = pim_vxlan_get_term_ifp(pim); if (vxlan_ifp) pim_channel_update_oif_mute(c_oil, vxlan_ifp); } /* If DF role changed on a (*,G) termination mroute update the * associated DF role on the inherited (S,G) entries */ if (pim_addr_is_any(up->sg.src) && PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags)) pim_vxlan_inherit_mlag_flags(pim, up, true /* inherit */); return true; } /* Run per-upstream entry DF election and return true on role change */ static bool pim_mlag_up_df_role_elect(struct pim_instance *pim, struct pim_upstream *up) { bool is_df; uint32_t peer_cost; uint32_t local_cost; bool rv; if (!pim_up_mlag_is_local(up)) return false; /* We are yet to rx a status update from the local MLAG daemon so * we will assume DF status. */ if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) return pim_mlag_up_df_role_update(pim, up, true /*is_df*/, "mlagd-down"); /* If not connected to peer assume DF role on the MLAG primary * switch (and non-DF on the secondary switch. */ if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) { is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false; return pim_mlag_up_df_role_update(pim, up, is_df, "peer-down"); } /* If MLAG peer session is up but zebra is down on the peer * assume DF role. */ if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) return pim_mlag_up_df_role_update(pim, up, true /*is_df*/, "zebra-down"); /* If we are connected to peer switch but don't have a mroute * from it we have to assume non-DF role to avoid duplicates. * Note: When the peer connection comes up we wait for initial * replay to complete before moving "strays" i.e. local-mlag-mroutes * without a peer reference to non-df role. */ if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) return pim_mlag_up_df_role_update(pim, up, false /*is_df*/, "no-peer-mroute"); /* switch with the lowest RPF cost wins. if both switches have the same * cost MLAG role is used as a tie breaker (MLAG primary wins). */ peer_cost = up->mlag.peer_mrib_metric; local_cost = pim_up_mlag_local_cost(up); if (local_cost == peer_cost) { is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false; rv = pim_mlag_up_df_role_update(pim, up, is_df, "equal-cost"); } else { is_df = (local_cost < peer_cost) ? true : false; rv = pim_mlag_up_df_role_update(pim, up, is_df, "cost"); } return rv; } /* Handle upstream entry add from the peer MLAG switch - * - if a local entry doesn't exist one is created with reference * _MLAG_PEER * - if a local entry exists and has a MLAG OIF DF election is run. * the non-DF switch stop forwarding traffic to MLAG devices. */ static void pim_mlag_up_peer_add(struct mlag_mroute_add *msg) { struct pim_upstream *up; struct pim_instance *pim; int flags = 0; pim_sgaddr sg; struct vrf *vrf; memset(&sg, 0, sizeof(sg)); sg.src.s_addr = htonl(msg->source_ip); sg.grp.s_addr = htonl(msg->group_ip); if (PIM_DEBUG_MLAG) zlog_debug("peer MLAG mroute add %s:%pSG cost %d", msg->vrf_name, &sg, msg->cost_to_rp); /* XXX - this is not correct. we MUST cache updates to avoid losing * an entry because of race conditions with the peer switch. */ vrf = vrf_lookup_by_name(msg->vrf_name); if (!vrf) { if (PIM_DEBUG_MLAG) zlog_debug( "peer MLAG mroute add failed %s:%pSG; no vrf", msg->vrf_name, &sg); return; } pim = vrf->info; up = pim_upstream_find(pim, &sg); if (up) { /* upstream already exists; create peer reference if it * doesn't already exist. */ if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) pim_upstream_ref(up, PIM_UPSTREAM_FLAG_MASK_MLAG_PEER, __func__); } else { PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags); up = pim_upstream_add(pim, &sg, NULL /*iif*/, flags, __func__, NULL /*if_ch*/); if (!up) { if (PIM_DEBUG_MLAG) zlog_debug( "peer MLAG mroute add failed %s:%pSG", vrf->name, &sg); return; } } up->mlag.peer_mrib_metric = msg->cost_to_rp; pim_mlag_up_df_role_elect(pim, up); } /* Handle upstream entry del from the peer MLAG switch - * - peer reference is removed. this can result in the upstream * being deleted altogether. * - if a local entry continues to exisy and has a MLAG OIF DF election * is re-run (at the end of which the local entry will be the DF). */ static struct pim_upstream *pim_mlag_up_peer_deref(struct pim_instance *pim, struct pim_upstream *up) { if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) return up; PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(up->flags); up = pim_upstream_del(pim, up, __func__); if (up) pim_mlag_up_df_role_elect(pim, up); return up; } static void pim_mlag_up_peer_del(struct mlag_mroute_del *msg) { struct pim_upstream *up; struct pim_instance *pim; pim_sgaddr sg; struct vrf *vrf; memset(&sg, 0, sizeof(sg)); sg.src.s_addr = htonl(msg->source_ip); sg.grp.s_addr = htonl(msg->group_ip); if (PIM_DEBUG_MLAG) zlog_debug("peer MLAG mroute del %s:%pSG", msg->vrf_name, &sg); vrf = vrf_lookup_by_name(msg->vrf_name); if (!vrf) { if (PIM_DEBUG_MLAG) zlog_debug( "peer MLAG mroute del skipped %s:%pSG; no vrf", msg->vrf_name, &sg); return; } pim = vrf->info; up = pim_upstream_find(pim, &sg); if (!up) { if (PIM_DEBUG_MLAG) zlog_debug( "peer MLAG mroute del skipped %s:%pSG; no up", vrf->name, &sg); return; } (void)pim_mlag_up_peer_deref(pim, up); } /* When we lose connection to the local MLAG daemon we can drop all peer * references. */ static void pim_mlag_up_peer_del_all(void) { struct list *temp = list_new(); struct pim_upstream *up; struct vrf *vrf; struct pim_instance *pim; /* * So why these gyrations? * pim->upstream_head has the list of *,G and S,G * that are in the system. The problem of course * is that it is an ordered list: * (*,G1) -> (S1,G1) -> (S2,G2) -> (S3, G2) -> (*,G2) -> (S1,G2) * And the *,G1 has pointers to S1,G1 and S2,G1 * if we delete *,G1 then we have a situation where * S1,G1 and S2,G2 can be deleted as well. Then a * simple ALL_LIST_ELEMENTS will have the next listnode * pointer become invalid and we crash. * So let's grab the list of MLAG_PEER upstreams * add a refcount put on another list and delete safely */ RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { pim = vrf->info; frr_each (rb_pim_upstream, &pim->upstream_head, up) { if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) continue; listnode_add(temp, up); /* * Add a reference since we are adding to this * list for deletion */ up->ref_count++; } while (temp->count) { up = listnode_head(temp); listnode_delete(temp, up); up = pim_mlag_up_peer_deref(pim, up); /* * This is the deletion of the reference added * above */ if (up) pim_upstream_del(pim, up, __func__); } } list_delete(&temp); } /* Send upstream entry to the local MLAG daemon (which will subsequently * send it to the peer MLAG switch). */ static void pim_mlag_up_local_add_send(struct pim_instance *pim, struct pim_upstream *up) { struct stream *s = NULL; struct vrf *vrf = pim->vrf; if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) return; s = stream_new(sizeof(struct mlag_mroute_add) + PIM_MLAG_METADATA_LEN); if (!s) return; if (PIM_DEBUG_MLAG) zlog_debug("local MLAG mroute add %s:%s", vrf->name, up->sg_str); ++router->mlag_stats.msg.mroute_add_tx; stream_putl(s, MLAG_MROUTE_ADD); stream_put(s, vrf->name, VRF_NAMSIZ); stream_putl(s, ntohl(up->sg.src.s_addr)); stream_putl(s, ntohl(up->sg.grp.s_addr)); stream_putl(s, pim_up_mlag_local_cost(up)); /* XXX - who is addding*/ stream_putl(s, MLAG_OWNER_VXLAN); /* XXX - am_i_DR field should be removed */ stream_putc(s, false); stream_putc(s, !(PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags))); stream_putl(s, vrf->vrf_id); /* XXX - this field is a No-op for VXLAN*/ stream_put(s, NULL, INTERFACE_NAMSIZ); stream_fifo_push_safe(router->mlag_fifo, s); pim_mlag_signal_zpthread(); } static void pim_mlag_up_local_del_send(struct pim_instance *pim, struct pim_upstream *up) { struct stream *s = NULL; struct vrf *vrf = pim->vrf; if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) return; s = stream_new(sizeof(struct mlag_mroute_del) + PIM_MLAG_METADATA_LEN); if (!s) return; if (PIM_DEBUG_MLAG) zlog_debug("local MLAG mroute del %s:%s", vrf->name, up->sg_str); ++router->mlag_stats.msg.mroute_del_tx; stream_putl(s, MLAG_MROUTE_DEL); stream_put(s, vrf->name, VRF_NAMSIZ); stream_putl(s, ntohl(up->sg.src.s_addr)); stream_putl(s, ntohl(up->sg.grp.s_addr)); /* XXX - who is adding */ stream_putl(s, MLAG_OWNER_VXLAN); stream_putl(s, vrf->vrf_id); /* XXX - this field is a No-op for VXLAN */ stream_put(s, NULL, INTERFACE_NAMSIZ); /* XXX - is this the the most optimal way to do things */ stream_fifo_push_safe(router->mlag_fifo, s); pim_mlag_signal_zpthread(); } /* Called when a local upstream entry is created or if it's cost changes */ void pim_mlag_up_local_add(struct pim_instance *pim, struct pim_upstream *up) { pim_mlag_up_df_role_elect(pim, up); /* XXX - need to add some dup checks here */ pim_mlag_up_local_add_send(pim, up); } /* Called when local MLAG reference is removed from an upstream entry */ void pim_mlag_up_local_del(struct pim_instance *pim, struct pim_upstream *up) { pim_mlag_up_df_role_elect(pim, up); pim_mlag_up_local_del_send(pim, up); } /* When connection to local MLAG daemon is established all the local * MLAG upstream entries are replayed to it. */ static void pim_mlag_up_local_replay(void) { struct pim_upstream *up; struct vrf *vrf; struct pim_instance *pim; RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { pim = vrf->info; frr_each (rb_pim_upstream, &pim->upstream_head, up) { if (pim_up_mlag_is_local(up)) pim_mlag_up_local_add_send(pim, up); } } } /* on local/peer mlag connection and role changes the DF status needs * to be re-evaluated */ static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code) { struct pim_upstream *up; struct vrf *vrf; struct pim_instance *pim; if (PIM_DEBUG_MLAG) zlog_debug("%s re-run DF election because of %s", __func__, reason_code); RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { pim = vrf->info; frr_each (rb_pim_upstream, &pim->upstream_head, up) { if (!pim_up_mlag_is_local(up)) continue; /* if role changes re-send to peer */ if (pim_mlag_up_df_role_elect(pim, up) && mlagd_send) pim_mlag_up_local_add_send(pim, up); } } } /*****************PIM Actions for MLAG state changes**********************/ /* notify the anycast VTEP component about state changes */ static inline void pim_mlag_vxlan_state_update(void) { bool enable = !!(router->mlag_flags & PIM_MLAGF_STATUS_RXED); bool peer_state = !!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP); pim_vxlan_mlag_update(enable, peer_state, router->mlag_role, router->peerlink_rif_p, &router->local_vtep_ip); } /**************End of PIM Actions for MLAG State changes******************/ /********************API to process PIM MLAG Data ************************/ static void pim_mlag_process_mlagd_state_change(struct mlag_status msg) { bool role_chg = false; bool state_chg = false; bool notify_vxlan = false; struct interface *peerlink_rif_p; char buf[MLAG_ROLE_STRSIZE]; if (PIM_DEBUG_MLAG) zlog_debug("%s: msg dump: my_role: %s, peer_state: %s", __func__, mlag_role2str(msg.my_role, buf, sizeof(buf)), (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING" : "DOWN")); if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { if (PIM_DEBUG_MLAG) zlog_debug("%s: msg ignored mlagd process state down", __func__); return; } ++router->mlag_stats.msg.mlag_status_updates; /* evaluate the changes first */ if (router->mlag_role != msg.my_role) { role_chg = true; notify_vxlan = true; router->mlag_role = msg.my_role; } strlcpy(router->peerlink_rif, msg.peerlink_rif, sizeof(router->peerlink_rif)); /* XXX - handle the case where we may rx the interface name from the * MLAG daemon before we get the interface from zebra. */ peerlink_rif_p = if_lookup_by_name(router->peerlink_rif, VRF_DEFAULT); if (router->peerlink_rif_p != peerlink_rif_p) { router->peerlink_rif_p = peerlink_rif_p; notify_vxlan = true; } if (msg.peer_state == MLAG_STATE_RUNNING) { if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) { state_chg = true; notify_vxlan = true; router->mlag_flags |= PIM_MLAGF_PEER_CONN_UP; } router->connected_to_mlag = true; } else { if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) { ++router->mlag_stats.peer_session_downs; state_chg = true; notify_vxlan = true; router->mlag_flags &= ~PIM_MLAGF_PEER_CONN_UP; } router->connected_to_mlag = false; } /* apply the changes */ /* when connection to mlagd comes up we hold send mroutes till we have * rxed the status and had a chance to re-valuate DF state */ if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) { router->mlag_flags |= PIM_MLAGF_STATUS_RXED; pim_mlag_vxlan_state_update(); /* on session up re-eval DF status */ pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up"); /* replay all the upstream entries to the local MLAG daemon */ pim_mlag_up_local_replay(); return; } if (notify_vxlan) pim_mlag_vxlan_state_update(); if (state_chg) { if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) /* when a connection goes down the primary takes over * DF role for all entries */ pim_mlag_up_local_reeval(true /*mlagd_send*/, "peer_down"); else /* XXX - when session comes up we need to wait for * PEER_REPLAY_DONE before running re-election on * local-mlag entries that are missing peer reference */ pim_mlag_up_local_reeval(true /*mlagd_send*/, "peer_up"); } else if (role_chg) { /* MLAG role changed without a state change */ pim_mlag_up_local_reeval(true /*mlagd_send*/, "role_chg"); } } static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg) { if (PIM_DEBUG_MLAG) zlog_debug( "%s: msg dump: peer_frr_state: %s", __func__, (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN")); if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { if (PIM_DEBUG_MLAG) zlog_debug("%s: msg ignored mlagd process state down", __func__); return; } ++router->mlag_stats.msg.peer_zebra_status_updates; /* evaluate the changes first */ if (msg.frr_state == MLAG_FRR_STATE_UP) { if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) { router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP; /* XXX - when peer zebra comes up we need to wait for * for some time to let the peer setup MDTs before * before relinquishing DF status */ pim_mlag_up_local_reeval(true /*mlagd_send*/, "zebra_up"); } } else { if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) { ++router->mlag_stats.peer_zebra_downs; router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP; /* when a peer zebra goes down we assume DF role */ pim_mlag_up_local_reeval(true /*mlagd_send*/, "zebra_down"); } } } static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg) { char addr_buf1[INET_ADDRSTRLEN]; char addr_buf2[INET_ADDRSTRLEN]; uint32_t local_ip; if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { if (PIM_DEBUG_MLAG) zlog_debug("%s: msg ignored mlagd process state down", __func__); return; } ++router->mlag_stats.msg.vxlan_updates; router->anycast_vtep_ip.s_addr = htonl(msg->anycast_ip); local_ip = htonl(msg->local_ip); if (router->local_vtep_ip.s_addr != local_ip) { router->local_vtep_ip.s_addr = local_ip; pim_mlag_vxlan_state_update(); } if (PIM_DEBUG_MLAG) { inet_ntop(AF_INET, &router->local_vtep_ip, addr_buf1, INET_ADDRSTRLEN); inet_ntop(AF_INET, &router->anycast_vtep_ip, addr_buf2, INET_ADDRSTRLEN); zlog_debug("%s: msg dump: local-ip:%s, anycast-ip:%s", __func__, addr_buf1, addr_buf2); } } static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg) { if (PIM_DEBUG_MLAG) { pim_sgaddr sg; sg.grp.s_addr = ntohl(msg.group_ip); sg.src.s_addr = ntohl(msg.source_ip); zlog_debug( "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x (%pSG) cost: %u", __func__, msg.vrf_name, msg.source_ip, msg.group_ip, &sg, msg.cost_to_rp); zlog_debug( "(%pSG)owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s", &sg, msg.owner_id, msg.am_i_dr, msg.am_i_dual_active, msg.vrf_id, msg.intf_name); } if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { if (PIM_DEBUG_MLAG) zlog_debug("%s: msg ignored mlagd process state down", __func__); return; } ++router->mlag_stats.msg.mroute_add_rx; pim_mlag_up_peer_add(&msg); } static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg) { if (PIM_DEBUG_MLAG) { pim_sgaddr sg; sg.grp.s_addr = ntohl(msg.group_ip); sg.src.s_addr = ntohl(msg.source_ip); zlog_debug( "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x(%pSG)", __func__, msg.vrf_name, msg.source_ip, msg.group_ip, &sg); zlog_debug("(%pSG)owner_id: %d, vrf_id: 0x%x intf_name: %s", &sg, msg.owner_id, msg.vrf_id, msg.intf_name); } if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { if (PIM_DEBUG_MLAG) zlog_debug("%s: msg ignored mlagd process state down", __func__); return; } ++router->mlag_stats.msg.mroute_del_rx; pim_mlag_up_peer_del(&msg); } int pim_zebra_mlag_handle_msg(int cmd, struct zclient *zclient, uint16_t zapi_length, vrf_id_t vrf_id) { struct stream *s = zclient->ibuf; struct mlag_msg mlag_msg; char buf[80]; int rc = 0; size_t length; rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length); if (rc) return (rc); if (PIM_DEBUG_MLAG) zlog_debug("%s: Received msg type: %s length: %d, bulk_cnt: %d", __func__, mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, sizeof(buf)), mlag_msg.data_len, mlag_msg.msg_cnt); switch (mlag_msg.msg_type) { case MLAG_STATUS_UPDATE: { struct mlag_status msg; rc = mlag_lib_decode_mlag_status(s, &msg); if (rc) return (rc); pim_mlag_process_mlagd_state_change(msg); } break; case MLAG_PEER_FRR_STATUS: { struct mlag_frr_status msg; rc = mlag_lib_decode_frr_status(s, &msg); if (rc) return (rc); pim_mlag_process_peer_frr_state_change(msg); } break; case MLAG_VXLAN_UPDATE: { struct mlag_vxlan msg; rc = mlag_lib_decode_vxlan_update(s, &msg); if (rc) return rc; pim_mlag_process_vxlan_update(&msg); } break; case MLAG_MROUTE_ADD: { struct mlag_mroute_add msg; rc = mlag_lib_decode_mroute_add(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_add(msg); } break; case MLAG_MROUTE_DEL: { struct mlag_mroute_del msg; rc = mlag_lib_decode_mroute_del(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_del(msg); } break; case MLAG_MROUTE_ADD_BULK: { struct mlag_mroute_add msg; int i; for (i = 0; i < mlag_msg.msg_cnt; i++) { rc = mlag_lib_decode_mroute_add(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_add(msg); } } break; case MLAG_MROUTE_DEL_BULK: { struct mlag_mroute_del msg; int i; for (i = 0; i < mlag_msg.msg_cnt; i++) { rc = mlag_lib_decode_mroute_del(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_del(msg); } } break; default: break; } return 0; } /****************End of PIM Mesasge processing handler********************/ int pim_zebra_mlag_process_up(ZAPI_CALLBACK_ARGS) { if (PIM_DEBUG_MLAG) zlog_debug("%s: Received Process-Up from Mlag", __func__); /* * Incase of local MLAG restart, PIM needs to replay all the data * since MLAG is empty. */ router->connected_to_mlag = true; router->mlag_flags |= PIM_MLAGF_LOCAL_CONN_UP; return 0; } static void pim_mlag_param_reset(void) { /* reset the cached params and stats */ router->mlag_flags &= ~(PIM_MLAGF_STATUS_RXED | PIM_MLAGF_LOCAL_CONN_UP | PIM_MLAGF_PEER_CONN_UP | PIM_MLAGF_PEER_ZEBRA_UP); router->local_vtep_ip.s_addr = INADDR_ANY; router->anycast_vtep_ip.s_addr = INADDR_ANY; router->mlag_role = MLAG_ROLE_NONE; memset(&router->mlag_stats.msg, 0, sizeof(router->mlag_stats.msg)); router->peerlink_rif[0] = '\0'; } int pim_zebra_mlag_process_down(ZAPI_CALLBACK_ARGS) { if (PIM_DEBUG_MLAG) zlog_debug("%s: Received Process-Down from Mlag", __func__); /* Local CLAG is down, reset peer data and forward the traffic if * we are DR */ if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) ++router->mlag_stats.peer_session_downs; if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) ++router->mlag_stats.peer_zebra_downs; router->connected_to_mlag = false; pim_mlag_param_reset(); /* on mlagd session down re-eval DF status */ pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_down"); /* flush all peer references */ pim_mlag_up_peer_del_all(); /* notify the vxlan component */ pim_mlag_vxlan_state_update(); return 0; } static void pim_mlag_register_handler(struct thread *thread) { uint32_t bit_mask = 0; if (!zclient) return; SET_FLAG(bit_mask, (1 << MLAG_STATUS_UPDATE)); SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD)); SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL)); SET_FLAG(bit_mask, (1 << MLAG_DUMP)); SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD_BULK)); SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL_BULK)); SET_FLAG(bit_mask, (1 << MLAG_PIM_CFG_DUMP)); SET_FLAG(bit_mask, (1 << MLAG_VXLAN_UPDATE)); SET_FLAG(bit_mask, (1 << MLAG_PEER_FRR_STATUS)); if (PIM_DEBUG_MLAG) zlog_debug("%s: Posting Client Register to MLAG mask: 0x%x", __func__, bit_mask); zclient_send_mlag_register(zclient, bit_mask); } void pim_mlag_register(void) { if (router->mlag_process_register) return; router->mlag_process_register = true; thread_add_event(router->master, pim_mlag_register_handler, NULL, 0, NULL); } static void pim_mlag_deregister_handler(struct thread *thread) { if (!zclient) return; if (PIM_DEBUG_MLAG) zlog_debug("%s: Posting Client De-Register to MLAG from PIM", __func__); router->connected_to_mlag = false; zclient_send_mlag_deregister(zclient); } void pim_mlag_deregister(void) { /* if somebody still interested in the MLAG channel skip de-reg */ if (router->pim_mlag_intf_cnt || pim_vxlan_do_mlag_reg()) return; /* not registered; nothing do */ if (!router->mlag_process_register) return; router->mlag_process_register = false; thread_add_event(router->master, pim_mlag_deregister_handler, NULL, 0, NULL); } void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp) { if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == true) return; if (PIM_DEBUG_MLAG) zlog_debug("%s: Configuring active-active on Interface: %s", __func__, "NULL"); pim_ifp->activeactive = true; if (pim_ifp->pim) pim_ifp->pim->inst_mlag_intf_cnt++; router->pim_mlag_intf_cnt++; if (PIM_DEBUG_MLAG) zlog_debug( "%s: Total MLAG configured Interfaces on router: %d, Inst: %d", __func__, router->pim_mlag_intf_cnt, pim_ifp->pim->inst_mlag_intf_cnt); if (router->pim_mlag_intf_cnt == 1) { /* * at least one Interface is configured for MLAG, send register * to Zebra for receiving MLAG Updates */ pim_mlag_register(); } } void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp) { if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == false) return; if (PIM_DEBUG_MLAG) zlog_debug("%s: UnConfiguring active-active on Interface: %s", __func__, "NULL"); pim_ifp->activeactive = false; pim_ifp->pim->inst_mlag_intf_cnt--; router->pim_mlag_intf_cnt--; if (PIM_DEBUG_MLAG) zlog_debug( "%s: Total MLAG configured Interfaces on router: %d, Inst: %d", __func__, router->pim_mlag_intf_cnt, pim_ifp->pim->inst_mlag_intf_cnt); if (router->pim_mlag_intf_cnt == 0) { /* * all the Interfaces are MLAG un-configured, post MLAG * De-register to Zebra */ pim_mlag_deregister(); pim_mlag_param_reset(); } } void pim_instance_mlag_init(struct pim_instance *pim) { if (!pim) return; pim->inst_mlag_intf_cnt = 0; } void pim_instance_mlag_terminate(struct pim_instance *pim) { struct interface *ifp; if (!pim) return; FOR_ALL_INTERFACES (pim->vrf, ifp) { struct pim_interface *pim_ifp = ifp->info; if (!pim_ifp || pim_ifp->activeactive == false) continue; pim_if_unconfigure_mlag_dualactive(pim_ifp); } pim->inst_mlag_intf_cnt = 0; } void pim_mlag_terminate(void) { stream_free(router->mlag_stream); router->mlag_stream = NULL; stream_fifo_free(router->mlag_fifo); router->mlag_fifo = NULL; } void pim_mlag_init(void) { pim_mlag_param_reset(); router->pim_mlag_intf_cnt = 0; router->connected_to_mlag = false; router->mlag_fifo = stream_fifo_new(); router->zpthread_mlag_write = NULL; router->mlag_stream = stream_new(MLAG_BUF_LIMIT); }