diff options
Diffstat (limited to 'bgpd/bgp_packet.c')
-rw-r--r-- | bgpd/bgp_packet.c | 3540 |
1 files changed, 3540 insertions, 0 deletions
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c new file mode 100644 index 0000000..cae82cb --- /dev/null +++ b/bgpd/bgp_packet.c @@ -0,0 +1,3540 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* BGP packet management routine. + * Contains utility functions for constructing and consuming BGP messages. + * Copyright (C) 2017 Cumulus Networks + * Copyright (C) 1999 Kunihiro Ishiguro + */ + +#include <zebra.h> +#include <sys/time.h> + +#include "frrevent.h" +#include "stream.h" +#include "network.h" +#include "prefix.h" +#include "command.h" +#include "log.h" +#include "memory.h" +#include "sockunion.h" /* for inet_ntop () */ +#include "sockopt.h" +#include "linklist.h" +#include "plist.h" +#include "queue.h" +#include "filter.h" +#include "lib_errors.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_dump.h" +#include "bgpd/bgp_bmp.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_errors.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_open.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_community.h" +#include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_lcommunity.h" +#include "bgpd/bgp_network.h" +#include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_evpn.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_vty.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_label.h" +#include "bgpd/bgp_io.h" +#include "bgpd/bgp_keepalives.h" +#include "bgpd/bgp_flowspec.h" +#include "bgpd/bgp_trace.h" + +DEFINE_HOOK(bgp_packet_dump, + (struct peer *peer, uint8_t type, bgp_size_t size, + struct stream *s), + (peer, type, size, s)); + +DEFINE_HOOK(bgp_packet_send, + (struct peer *peer, uint8_t type, bgp_size_t size, + struct stream *s), + (peer, type, size, s)); + +/** + * Sets marker and type fields for a BGP message. + * + * @param s the stream containing the packet + * @param type the packet type + * @return the size of the stream + */ +int bgp_packet_set_marker(struct stream *s, uint8_t type) +{ + int i; + + /* Fill in marker. */ + for (i = 0; i < BGP_MARKER_SIZE; i++) + stream_putc(s, 0xff); + + /* Dummy total length. This field is should be filled in later on. */ + stream_putw(s, 0); + + /* BGP packet type. */ + stream_putc(s, type); + + /* Return current stream size. */ + return stream_get_endp(s); +} + +/** + * Sets size field for a BGP message. + * + * Size field is set to the size of the stream passed. + * + * @param s the stream containing the packet + */ +void bgp_packet_set_size(struct stream *s) +{ + int cp; + + /* Preserve current pointer. */ + cp = stream_get_endp(s); + stream_putw_at(s, BGP_MARKER_SIZE, cp); +} + +/* + * Push a packet onto the beginning of the peer's output queue. + * This function acquires the peer's write mutex before proceeding. + */ +static void bgp_packet_add(struct peer_connection *connection, + struct peer *peer, struct stream *s) +{ + intmax_t delta; + uint32_t holdtime; + intmax_t sendholdtime; + + frr_with_mutex (&connection->io_mtx) { + /* if the queue is empty, reset the "last OK" timestamp to + * now, otherwise if we write another packet immediately + * after it'll get confused + */ + if (!stream_fifo_count_safe(connection->obuf)) + peer->last_sendq_ok = monotime(NULL); + + stream_fifo_push(connection->obuf, s); + + delta = monotime(NULL) - peer->last_sendq_ok; + + if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) + holdtime = atomic_load_explicit(&peer->holdtime, + memory_order_relaxed); + else + holdtime = peer->bgp->default_holdtime; + + sendholdtime = holdtime * 2; + + /* Note that when we're here, we're adding some packet to the + * OutQ. That includes keepalives when there is nothing to + * do, so there's a guarantee we pass by here once in a while. + * + * That implies there is no need to go set up another separate + * timer that ticks down SendHoldTime, as we'll be here sooner + * or later anyway and will see the checks below failing. + */ + if (!holdtime) { + /* no holdtime, do nothing. */ + } else if (delta > sendholdtime) { + flog_err( + EC_BGP_SENDQ_STUCK_PROPER, + "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session", + peer, sendholdtime); + BGP_EVENT_ADD(connection, TCP_fatal_error); + } else if (delta > (intmax_t)holdtime && + monotime(NULL) - peer->last_sendq_warn > 5) { + flog_warn( + EC_BGP_SENDQ_STUCK_WARN, + "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?", + peer, holdtime); + peer->last_sendq_warn = monotime(NULL); + } + } +} + +static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi, + safi_t safi) +{ + struct stream *s; + iana_afi_t pkt_afi = IANA_AFI_IPV4; + iana_safi_t pkt_safi = IANA_SAFI_UNICAST; + + if (DISABLE_BGP_ANNOUNCE) + return NULL; + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("send End-of-RIB for %s to %s", + get_afi_safi_str(afi, safi, false), peer->host); + + s = stream_new(peer->max_packet_size); + + /* Make BGP update packet. */ + bgp_packet_set_marker(s, BGP_MSG_UPDATE); + + /* Unfeasible Routes Length */ + stream_putw(s, 0); + + if (afi == AFI_IP && safi == SAFI_UNICAST) { + /* Total Path Attribute Length */ + stream_putw(s, 0); + } else { + /* Convert AFI, SAFI to values for packet. */ + bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi); + + /* Total Path Attribute Length */ + stream_putw(s, 6); + stream_putc(s, BGP_ATTR_FLAG_OPTIONAL); + stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI); + stream_putc(s, 3); + stream_putw(s, pkt_afi); + stream_putc(s, pkt_safi); + } + + bgp_packet_set_size(s); + return s; +} + +/* Called when there is a change in the EOR(implicit or explicit) status of a + * peer. Ends the update-delay if all expected peers are done with EORs. */ +void bgp_check_update_delay(struct bgp *bgp) +{ + struct listnode *node, *nnode; + struct peer *peer = NULL; + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d", + bgp->established, bgp->restarted_peers, + bgp->implicit_eors, bgp->explicit_eors); + + if (bgp->established + <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) { + /* + * This is an extra sanity check to make sure we wait for all + * the eligible configured peers. This check is performed if + * establish wait timer is on, or establish wait option is not + * given with the update-delay command + */ + if (bgp->t_establish_wait + || (bgp->v_establish_wait == bgp->v_update_delay)) + for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) { + if (CHECK_FLAG(peer->flags, + PEER_FLAG_CONFIG_NODE) + && !CHECK_FLAG(peer->flags, + PEER_FLAG_SHUTDOWN) + && !CHECK_FLAG(peer->bgp->flags, + BGP_FLAG_SHUTDOWN) + && !peer->update_delay_over) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + " Peer %s pending, continuing read-only mode", + peer->host); + return; + } + } + + zlog_info( + "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d", + bgp->restarted_peers, bgp->implicit_eors, + bgp->explicit_eors); + bgp_update_delay_end(bgp); + } +} + +/* + * Called if peer is known to have restarted. The restart-state bit in + * Graceful-Restart capability is used for that + */ +void bgp_update_restarted_peers(struct peer *peer) +{ + if (!bgp_update_delay_active(peer->bgp)) + return; /* BGP update delay has ended */ + if (peer->update_delay_over) + return; /* This peer has already been considered */ + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("Peer %s: Checking restarted", peer->host); + + if (peer_established(peer->connection)) { + peer->update_delay_over = 1; + peer->bgp->restarted_peers++; + bgp_check_update_delay(peer->bgp); + } +} + +/* + * Called as peer receives a keep-alive. Determines if this occurence can be + * taken as an implicit EOR for this peer. + * NOTE: The very first keep-alive after the Established state of a peer is + * considered implicit EOR for the update-delay purposes + */ +void bgp_update_implicit_eors(struct peer *peer) +{ + if (!bgp_update_delay_active(peer->bgp)) + return; /* BGP update delay has ended */ + if (peer->update_delay_over) + return; /* This peer has already been considered */ + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("Peer %s: Checking implicit EORs", peer->host); + + if (peer_established(peer->connection)) { + peer->update_delay_over = 1; + peer->bgp->implicit_eors++; + bgp_check_update_delay(peer->bgp); + } +} + +/* + * Should be called only when there is a change in the EOR_RECEIVED status + * for any afi/safi on a peer. + */ +static void bgp_update_explicit_eors(struct peer *peer) +{ + afi_t afi; + safi_t safi; + + if (!bgp_update_delay_active(peer->bgp)) + return; /* BGP update delay has ended */ + if (peer->update_delay_over) + return; /* This peer has already been considered */ + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("Peer %s: Checking explicit EORs", peer->host); + + FOREACH_AFI_SAFI (afi, safi) { + if (peer->afc_nego[afi][safi] + && !CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EOR_RECEIVED)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + " afi %d safi %d didn't receive EOR", + afi, safi); + return; + } + } + + peer->update_delay_over = 1; + peer->bgp->explicit_eors++; + bgp_check_update_delay(peer->bgp); +} + +/** + * Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers. + * + * mp_withdraw, if set, is used to nullify attr structure on most of the + * calling safi function and for evpn, passed as parameter + */ +int bgp_nlri_parse(struct peer *peer, struct attr *attr, + struct bgp_nlri *packet, bool mp_withdraw) +{ + switch (packet->safi) { + case SAFI_UNICAST: + case SAFI_MULTICAST: + return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr, + packet); + case SAFI_LABELED_UNICAST: + return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr, + packet); + case SAFI_MPLS_VPN: + return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr, + packet); + case SAFI_EVPN: + return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw); + case SAFI_FLOWSPEC: + return bgp_nlri_parse_flowspec(peer, attr, packet, mp_withdraw); + } + return BGP_NLRI_PARSE_ERROR; +} + + +/* + * Check if route-refresh request from peer is pending (received before EoR), + * and process it now. + */ +static void bgp_process_pending_refresh(struct peer *peer, afi_t afi, + safi_t safi) +{ + if (CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_REFRESH_PENDING)) { + UNSET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_REFRESH_PENDING); + bgp_route_refresh_send(peer, afi, safi, 0, 0, 0, + BGP_ROUTE_REFRESH_BORR); + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP sending route-refresh (BoRR) for %s/%s (for pending REQUEST)", + peer, afi2str(afi), safi2str(safi)); + + SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_SEND); + UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_SEND); + bgp_announce_route(peer, afi, safi, true); + } +} + +/* + * Checks a variety of conditions to determine whether the peer needs to be + * rescheduled for packet generation again, and does so if necessary. + * + * @param peer to check for rescheduling + */ +static void bgp_write_proceed_actions(struct peer *peer) +{ + afi_t afi; + safi_t safi; + struct peer_af *paf; + struct bpacket *next_pkt; + struct update_subgroup *subgrp; + enum bgp_af_index index; + struct peer_connection *connection = peer->connection; + + for (index = BGP_AF_START; index < BGP_AF_MAX; index++) { + paf = peer->peer_af_array[index]; + if (!paf) + continue; + + subgrp = paf->subgroup; + if (!subgrp) + continue; + + next_pkt = paf->next_pkt_to_send; + if (next_pkt && next_pkt->buffer) { + BGP_TIMER_ON(connection->t_generate_updgrp_packets, + bgp_generate_updgrp_packets, 0); + return; + } + + /* No packets readily available for AFI/SAFI, are there + * subgroup packets + * that need to be generated? */ + if (bpacket_queue_is_full(SUBGRP_INST(subgrp), + SUBGRP_PKTQ(subgrp)) + || subgroup_packets_to_build(subgrp)) { + BGP_TIMER_ON(connection->t_generate_updgrp_packets, + bgp_generate_updgrp_packets, 0); + return; + } + + afi = paf->afi; + safi = paf->safi; + + /* No packets to send, see if EOR is pending */ + if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) { + if (!subgrp->t_coalesce && peer->afc_nego[afi][safi] + && peer->synctime + && !CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND) + && safi != SAFI_MPLS_VPN) { + BGP_TIMER_ON(connection->t_generate_updgrp_packets, + bgp_generate_updgrp_packets, 0); + return; + } + } + } +} + +/* + * Generate advertisement information (withdraws, updates, EOR) from each + * update group a peer belongs to, encode this information into packets, and + * enqueue the packets onto the peer's output buffer. + */ +void bgp_generate_updgrp_packets(struct event *thread) +{ + struct peer_connection *connection = EVENT_ARG(thread); + struct peer *peer = connection->peer; + struct stream *s; + struct peer_af *paf; + struct bpacket *next_pkt; + uint32_t wpq; + uint32_t generated = 0; + afi_t afi; + safi_t safi; + + wpq = atomic_load_explicit(&peer->bgp->wpkt_quanta, + memory_order_relaxed); + + /* + * The code beyond this part deals with update packets, proceed only + * if peer is Established and updates are not on hold (as part of + * update-delay processing). + */ + if (!peer_established(peer->connection)) + return; + + if ((peer->bgp->main_peers_update_hold) + || bgp_update_delay_active(peer->bgp)) + return; + + if (peer->connection->t_routeadv) + return; + + /* + * Since the following is a do while loop + * let's stop adding to the outq if we are + * already at the limit. + */ + if (connection->obuf->count >= bm->outq_limit) { + bgp_write_proceed_actions(peer); + return; + } + + do { + enum bgp_af_index index; + + s = NULL; + for (index = BGP_AF_START; index < BGP_AF_MAX; index++) { + paf = peer->peer_af_array[index]; + if (!paf || !PAF_SUBGRP(paf)) + continue; + + afi = paf->afi; + safi = paf->safi; + next_pkt = paf->next_pkt_to_send; + + /* + * Try to generate a packet for the peer if we are at + * the end of the list. Always try to push out + * WITHDRAWs first. + */ + if (!next_pkt || !next_pkt->buffer) { + next_pkt = subgroup_withdraw_packet( + PAF_SUBGRP(paf)); + if (!next_pkt || !next_pkt->buffer) + subgroup_update_packet(PAF_SUBGRP(paf)); + next_pkt = paf->next_pkt_to_send; + } + + /* + * If we still don't have a packet to send to the peer, + * then try to find out out if we have to send eor or + * if not, skip to the next AFI, SAFI. Don't send the + * EOR prematurely; if the subgroup's coalesce timer is + * running, the adjacency-out structure is not created + * yet. + */ + if (!next_pkt || !next_pkt->buffer) { + if (!paf->t_announce_route) { + /* Make sure we supress BGP UPDATES + * for normal processing later again. + */ + UNSET_FLAG(paf->subgroup->sflags, + SUBGRP_STATUS_FORCE_UPDATES); + + /* If route-refresh BoRR message was + * already sent and we are done with + * re-announcing tables for a decent + * afi/safi, we ready to send + * EoRR request. + */ + if (CHECK_FLAG( + peer->af_sflags[afi][safi], + PEER_STATUS_BORR_SEND)) { + bgp_route_refresh_send( + peer, afi, safi, 0, 0, + 0, + BGP_ROUTE_REFRESH_EORR); + + SET_FLAG(peer->af_sflags[afi] + [safi], + PEER_STATUS_EORR_SEND); + UNSET_FLAG( + peer->af_sflags[afi] + [safi], + PEER_STATUS_BORR_SEND); + + if (bgp_debug_neighbor_events( + peer)) + zlog_debug( + "%pBP sending route-refresh (EoRR) for %s/%s", + peer, + afi2str(afi), + safi2str(safi)); + } + } + + if (CHECK_FLAG(peer->cap, + PEER_CAP_RESTART_RCV)) { + if (!(PAF_SUBGRP(paf))->t_coalesce + && peer->afc_nego[afi][safi] + && peer->synctime + && !CHECK_FLAG( + peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND)) { + /* If EOR is disabled, + * the message is not sent + */ + if (BGP_SEND_EOR(peer->bgp, afi, + safi)) { + SET_FLAG( + peer->af_sflags + [afi] + [safi], + PEER_STATUS_EOR_SEND); + + /* Update EOR + * send time + */ + peer->eor_stime[afi] + [safi] = + monotime(NULL); + + BGP_UPDATE_EOR_PKT( + peer, afi, safi, + s); + bgp_process_pending_refresh( + peer, afi, + safi); + } + } + } + continue; + } + + /* Update packet send time */ + peer->pkt_stime[afi][safi] = monotime(NULL); + + /* Found a packet template to send, overwrite + * packet with appropriate attributes from peer + * and advance peer */ + s = bpacket_reformat_for_peer(next_pkt, paf); + bgp_packet_add(connection, peer, s); + bpacket_queue_advance_peer(paf); + } + } while (s && (++generated < wpq) && + (connection->obuf->count <= bm->outq_limit)); + + if (generated) + bgp_writes_on(connection); + + bgp_write_proceed_actions(peer); +} + +/* + * Creates a BGP Keepalive packet and appends it to the peer's output queue. + */ +void bgp_keepalive_send(struct peer *peer) +{ + struct stream *s; + + s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE); + + /* Make keepalive packet. */ + bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE); + + /* Set packet size. */ + bgp_packet_set_size(s); + + /* Dump packet if debug option is set. */ + /* bgp_packet_dump (s); */ + + if (bgp_debug_keepalive(peer)) + zlog_debug("%s sending KEEPALIVE", peer->host); + + /* Add packet to the peer. */ + bgp_packet_add(peer->connection, peer, s); + + bgp_writes_on(peer->connection); +} + +/* + * Creates a BGP Open packet and appends it to the peer's output queue. + * Sets capabilities as necessary. + */ +void bgp_open_send(struct peer_connection *connection) +{ + struct stream *s; + uint16_t send_holdtime; + as_t local_as; + struct peer *peer = connection->peer; + + if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) + send_holdtime = peer->holdtime; + else + send_holdtime = peer->bgp->default_holdtime; + + /* local-as Change */ + if (peer->change_local_as) + local_as = peer->change_local_as; + else + local_as = peer->local_as; + + s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE); + + /* Make open packet. */ + bgp_packet_set_marker(s, BGP_MSG_OPEN); + + /* Set open packet values. */ + stream_putc(s, BGP_VERSION_4); /* BGP version */ + stream_putw(s, (local_as <= BGP_AS_MAX) ? (uint16_t)local_as + : BGP_AS_TRANS); + stream_putw(s, send_holdtime); /* Hold Time */ + stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */ + + /* Set capabilities */ + if (CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) { + (void)bgp_open_capability(s, peer, true); + } else { + struct stream *tmp = stream_new(STREAM_SIZE(s)); + + stream_copy(tmp, s); + if (bgp_open_capability(tmp, peer, false) + > BGP_OPEN_NON_EXT_OPT_LEN) { + stream_free(tmp); + (void)bgp_open_capability(s, peer, true); + } else { + stream_copy(s, tmp); + stream_free(tmp); + } + } + + /* Set BGP packet length. */ + bgp_packet_set_size(s); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s sending OPEN, version %d, my as %u, holdtime %d, id %pI4", + peer->host, BGP_VERSION_4, local_as, send_holdtime, + &peer->local_id); + + /* Dump packet if debug option is set. */ + /* bgp_packet_dump (s); */ + hook_call(bgp_packet_send, peer, BGP_MSG_OPEN, stream_get_endp(s), s); + + /* Add packet to the peer. */ + bgp_packet_add(connection, peer, s); + + bgp_writes_on(connection); +} + +/* + * Writes NOTIFICATION message directly to a peer socket without waiting for + * the I/O thread. + * + * There must be exactly one stream on the peer->connection->obuf FIFO, and the + * data within this stream must match the format of a BGP NOTIFICATION message. + * Transmission is best-effort. + * + * @requires peer->connection->io_mtx + * @param peer + * @return 0 + */ +static void bgp_write_notify(struct peer_connection *connection, + struct peer *peer) +{ + int ret, val; + uint8_t type; + struct stream *s; + + /* There should be at least one packet. */ + s = stream_fifo_pop(connection->obuf); + + if (!s) + return; + + assert(stream_get_endp(s) >= BGP_HEADER_SIZE); + + /* + * socket is in nonblocking mode, if we can't deliver the NOTIFY, well, + * we only care about getting a clean shutdown at this point. + */ + ret = write(connection->fd, STREAM_DATA(s), stream_get_endp(s)); + + /* + * only connection reset/close gets counted as TCP_fatal_error, failure + * to write the entire NOTIFY doesn't get different FSM treatment + */ + if (ret <= 0) { + stream_free(s); + BGP_EVENT_ADD(connection, TCP_fatal_error); + return; + } + + /* Disable Nagle, make NOTIFY packet go out right away */ + val = 1; + (void)setsockopt(connection->fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, + sizeof(val)); + + /* Retrieve BGP packet type. */ + stream_set_getp(s, BGP_MARKER_SIZE + 2); + type = stream_getc(s); + + assert(type == BGP_MSG_NOTIFY); + + /* Type should be notify. */ + atomic_fetch_add_explicit(&peer->notify_out, 1, memory_order_relaxed); + + /* Double start timer. */ + peer->v_start *= 2; + + /* Overflow check. */ + if (peer->v_start >= (60 * 2)) + peer->v_start = (60 * 2); + + /* + * Handle Graceful Restart case where the state changes to + * Connect instead of Idle + */ + BGP_EVENT_ADD(connection, BGP_Stop); + + stream_free(s); +} + +/* + * Encapsulate an original BGP CEASE Notification into Hard Reset + */ +static uint8_t *bgp_notify_encapsulate_hard_reset(uint8_t code, uint8_t subcode, + uint8_t *data, size_t datalen) +{ + uint8_t *message = XCALLOC(MTYPE_BGP_NOTIFICATION, datalen + 2); + + /* ErrCode */ + message[0] = code; + /* Subcode */ + message[1] = subcode; + /* Data */ + if (datalen) + memcpy(message + 2, data, datalen); + + return message; +} + +/* + * Decapsulate an original BGP CEASE Notification from Hard Reset + */ +struct bgp_notify bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify) +{ + struct bgp_notify bn = {}; + + bn.code = notify->raw_data[0]; + bn.subcode = notify->raw_data[1]; + bn.length = notify->length - 2; + + bn.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, bn.length); + memcpy(bn.raw_data, notify->raw_data + 2, bn.length); + + return bn; +} + +/* Check if Graceful-Restart N-bit is exchanged */ +bool bgp_has_graceful_restart_notification(struct peer *peer) +{ + return CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV) && + CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV); +} + +/* + * Check if to send BGP CEASE Notification/Hard Reset? + */ +bool bgp_notify_send_hard_reset(struct peer *peer, uint8_t code, + uint8_t subcode) +{ + /* When the "N" bit has been exchanged, a Hard Reset message is used to + * indicate to the peer that the session is to be fully terminated. + */ + if (!bgp_has_graceful_restart_notification(peer)) + return false; + + /* + * https://datatracker.ietf.org/doc/html/rfc8538#section-5.1 + */ + if (code == BGP_NOTIFY_CEASE) { + switch (subcode) { + case BGP_NOTIFY_CEASE_MAX_PREFIX: + case BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN: + case BGP_NOTIFY_CEASE_PEER_UNCONFIG: + case BGP_NOTIFY_CEASE_HARD_RESET: + case BGP_NOTIFY_CEASE_BFD_DOWN: + return true; + case BGP_NOTIFY_CEASE_ADMIN_RESET: + /* Provide user control: + * `bgp hard-adminstrative-reset` + */ + if (CHECK_FLAG(peer->bgp->flags, + BGP_FLAG_HARD_ADMIN_RESET)) + return true; + else + return false; + default: + break; + } + } + + return false; +} + +/* + * Check if received BGP CEASE Notification/Hard Reset? + */ +bool bgp_notify_received_hard_reset(struct peer *peer, uint8_t code, + uint8_t subcode) +{ + /* When the "N" bit has been exchanged, a Hard Reset message is used to + * indicate to the peer that the session is to be fully terminated. + */ + if (!bgp_has_graceful_restart_notification(peer)) + return false; + + if (code == BGP_NOTIFY_CEASE && subcode == BGP_NOTIFY_CEASE_HARD_RESET) + return true; + + return false; +} + +/* + * Creates a BGP Notify and appends it to the peer's output queue. + * + * This function attempts to write the packet from the thread it is called + * from, to ensure the packet gets out ASAP. + * + * This function may be called from multiple threads. Since the function + * modifies I/O buffer(s) in the peer, these are locked for the duration of the + * call to prevent tampering from other threads. + * + * Delivery of the NOTIFICATION is attempted once and is best-effort. After + * return, the peer structure *must* be reset; no assumptions about session + * state are valid. + * + * @param peer + * @param code BGP error code + * @param sub_code BGP error subcode + * @param data Data portion + * @param datalen length of data portion + */ +static void bgp_notify_send_internal(struct peer_connection *connection, + uint8_t code, uint8_t sub_code, + uint8_t *data, size_t datalen, + bool use_curr) +{ + struct stream *s; + struct peer *peer = connection->peer; + bool hard_reset = bgp_notify_send_hard_reset(peer, code, sub_code); + + /* Lock I/O mutex to prevent other threads from pushing packets */ + frr_mutex_lock_autounlock(&connection->io_mtx); + /* ============================================== */ + + /* Allocate new stream. */ + s = stream_new(peer->max_packet_size); + + /* Make notify packet. */ + bgp_packet_set_marker(s, BGP_MSG_NOTIFY); + + /* Check if we should send Hard Reset Notification or not */ + if (hard_reset) { + uint8_t *hard_reset_message = bgp_notify_encapsulate_hard_reset( + code, sub_code, data, datalen); + + /* Hard Reset encapsulates another NOTIFICATION message + * in its data portion. + */ + stream_putc(s, BGP_NOTIFY_CEASE); + stream_putc(s, BGP_NOTIFY_CEASE_HARD_RESET); + stream_write(s, hard_reset_message, datalen + 2); + + XFREE(MTYPE_BGP_NOTIFICATION, hard_reset_message); + } else { + stream_putc(s, code); + stream_putc(s, sub_code); + if (data) + stream_write(s, data, datalen); + } + + /* Set BGP packet length. */ + bgp_packet_set_size(s); + + /* wipe output buffer */ + stream_fifo_clean(connection->obuf); + + /* + * If possible, store last packet for debugging purposes. This check is + * in place because we are sometimes called with a doppelganger peer, + * who tends to have a plethora of fields nulled out. + * + * Some callers should not attempt this - the io pthread for example + * should not touch internals of the peer struct. + */ + if (use_curr && peer->curr) { + size_t packetsize = stream_get_endp(peer->curr); + assert(packetsize <= peer->max_packet_size); + if (peer->last_reset_cause) + stream_free(peer->last_reset_cause); + peer->last_reset_cause = stream_dup(peer->curr); + } + + /* For debug */ + { + struct bgp_notify bgp_notify; + int first = 0; + int i; + char c[4]; + + bgp_notify.code = code; + bgp_notify.subcode = sub_code; + bgp_notify.data = NULL; + bgp_notify.length = datalen; + bgp_notify.raw_data = data; + + peer->notify.code = bgp_notify.code; + peer->notify.subcode = bgp_notify.subcode; + peer->notify.length = bgp_notify.length; + + if (bgp_notify.length && data) { + bgp_notify.data = XMALLOC(MTYPE_BGP_NOTIFICATION, + bgp_notify.length * 3); + for (i = 0; i < bgp_notify.length; i++) + if (first) { + snprintf(c, sizeof(c), " %02x", + data[i]); + + strlcat(bgp_notify.data, c, + bgp_notify.length); + + } else { + first = 1; + snprintf(c, sizeof(c), "%02x", data[i]); + + strlcpy(bgp_notify.data, c, + bgp_notify.length); + } + } + bgp_notify_print(peer, &bgp_notify, "sending", hard_reset); + + if (bgp_notify.data) { + if (data) { + XFREE(MTYPE_BGP_NOTIFICATION, + peer->notify.data); + peer->notify.data = XCALLOC( + MTYPE_BGP_NOTIFICATION, datalen); + memcpy(peer->notify.data, data, datalen); + } + + XFREE(MTYPE_BGP_NOTIFICATION, bgp_notify.data); + bgp_notify.length = 0; + } + } + + /* peer reset cause */ + if (code == BGP_NOTIFY_CEASE) { + if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET) + peer->last_reset = PEER_DOWN_USER_RESET; + else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN) { + if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN)) + peer->last_reset = PEER_DOWN_RTT_SHUTDOWN; + else + peer->last_reset = PEER_DOWN_USER_SHUTDOWN; + } else + peer->last_reset = PEER_DOWN_NOTIFY_SEND; + } else + peer->last_reset = PEER_DOWN_NOTIFY_SEND; + + /* Add packet to peer's output queue */ + stream_fifo_push(connection->obuf, s); + + bgp_peer_gr_flags_update(peer); + BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp, + peer->bgp->peer); + + bgp_write_notify(connection, peer); +} + +/* + * Creates a BGP Notify and appends it to the peer's output queue. + * + * This function attempts to write the packet from the thread it is called + * from, to ensure the packet gets out ASAP. + * + * @param peer + * @param code BGP error code + * @param sub_code BGP error subcode + */ +void bgp_notify_send(struct peer_connection *connection, uint8_t code, + uint8_t sub_code) +{ + bgp_notify_send_internal(connection, code, sub_code, NULL, 0, true); +} + +/* + * Enqueue notification; called from the main pthread, peer object access is ok. + */ +void bgp_notify_send_with_data(struct peer_connection *connection, uint8_t code, + uint8_t sub_code, uint8_t *data, size_t datalen) +{ + bgp_notify_send_internal(connection, code, sub_code, data, datalen, + true); +} + +/* + * For use by the io pthread, queueing a notification but avoiding access to + * the peer object. + */ +void bgp_notify_io_invalid(struct peer *peer, uint8_t code, uint8_t sub_code, + uint8_t *data, size_t datalen) +{ + /* Avoid touching the peer object */ + bgp_notify_send_internal(peer->connection, code, sub_code, data, + datalen, false); +} + +/* + * Creates BGP Route Refresh packet and appends it to the peer's output queue. + * + * @param peer + * @param afi Address Family Identifier + * @param safi Subsequent Address Family Identifier + * @param orf_type Outbound Route Filtering type + * @param when_to_refresh Whether to refresh immediately or defer + * @param remove Whether to remove ORF for specified AFI/SAFI + * @param subtype BGP enhanced route refresh optional subtypes + */ +void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi, + uint8_t orf_type, uint8_t when_to_refresh, + int remove, uint8_t subtype) +{ + struct stream *s; + struct bgp_filter *filter; + int orf_refresh = 0; + iana_afi_t pkt_afi = IANA_AFI_IPV4; + iana_safi_t pkt_safi = IANA_SAFI_UNICAST; + + if (DISABLE_BGP_ANNOUNCE) + return; + + filter = &peer->filter[afi][safi]; + + /* Convert AFI, SAFI to values for packet. */ + bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi); + + s = stream_new(peer->max_packet_size); + + /* Make BGP update packet. */ + if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_RCV)) + bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW); + else + bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD); + + /* Encode Route Refresh message. */ + stream_putw(s, pkt_afi); + if (subtype) + stream_putc(s, subtype); + else + stream_putc(s, 0); + stream_putc(s, pkt_safi); + + if (orf_type == ORF_TYPE_PREFIX) + if (remove || filter->plist[FILTER_IN].plist) { + uint16_t orf_len; + unsigned long orfp; + + orf_refresh = 1; + stream_putc(s, when_to_refresh); + stream_putc(s, orf_type); + orfp = stream_get_endp(s); + stream_putw(s, 0); + + if (remove) { + UNSET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_ORF_PREFIX_SEND); + stream_putc(s, ORF_COMMON_PART_REMOVE_ALL); + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %s/%s", + peer, orf_type, + (when_to_refresh == + REFRESH_DEFER + ? "defer" + : "immediate"), + iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } else { + SET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_ORF_PREFIX_SEND); + prefix_bgp_orf_entry( + s, filter->plist[FILTER_IN].plist, + ORF_COMMON_PART_ADD, + ORF_COMMON_PART_PERMIT, + ORF_COMMON_PART_DENY); + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %s/%s", + peer, orf_type, + (when_to_refresh == + REFRESH_DEFER + ? "defer" + : "immediate"), + iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } + + /* Total ORF Entry Len. */ + orf_len = stream_get_endp(s) - orfp - 2; + stream_putw_at(s, orfp, orf_len); + } + + /* Set packet size. */ + bgp_packet_set_size(s); + + if (bgp_debug_neighbor_events(peer)) { + if (!orf_refresh) + zlog_debug( + "%pBP sending REFRESH_REQ for afi/safi: %s/%s", + peer, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } + + /* Add packet to the peer. */ + bgp_packet_add(peer->connection, peer, s); + + bgp_writes_on(peer->connection); +} + +/* + * Create a BGP Capability packet and append it to the peer's output queue. + * + * @param peer + * @param afi Address Family Identifier + * @param safi Subsequent Address Family Identifier + * @param capability_code BGP Capability Code + * @param action Set or Remove capability + */ +void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi, + int capability_code, int action) +{ + struct stream *s; + iana_afi_t pkt_afi = IANA_AFI_IPV4; + iana_safi_t pkt_safi = IANA_SAFI_UNICAST; + unsigned long cap_len; + uint16_t len; + uint32_t gr_restart_time; + const char *capability = lookup_msg(capcode_str, capability_code, + "Unknown"); + + if (!peer_established(peer->connection)) + return; + + if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_RCV) && + !CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) + return; + + /* Convert AFI, SAFI to values for packet. */ + bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi); + + s = stream_new(peer->max_packet_size); + + /* Make BGP update packet. */ + bgp_packet_set_marker(s, BGP_MSG_CAPABILITY); + + /* Encode MP_EXT capability. */ + switch (capability_code) { + case CAPABILITY_CODE_SOFT_VERSION: + SET_FLAG(peer->cap, PEER_CAP_SOFT_VERSION_ADV); + stream_putc(s, action); + stream_putc(s, CAPABILITY_CODE_SOFT_VERSION); + cap_len = stream_get_endp(s); + stream_putc(s, 0); /* Capability Length */ + + /* The Capability Length SHOULD be no greater than 64. + * This is the limit to allow other capabilities as much + * space as they require. + */ + const char *soft_version = cmd_software_version_get(); + + len = strlen(soft_version); + if (len > BGP_MAX_SOFT_VERSION) + len = BGP_MAX_SOFT_VERSION; + + stream_putc(s, len); + stream_put(s, soft_version, len); + + /* Software Version capability Len. */ + len = stream_get_endp(s) - cap_len - 1; + stream_putc_at(s, cap_len, len); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP sending CAPABILITY has %s %s for afi/safi: %s/%s", + peer, + action == CAPABILITY_ACTION_SET + ? "Advertising" + : "Removing", + capability, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + break; + case CAPABILITY_CODE_MP: + stream_putc(s, action); + stream_putc(s, CAPABILITY_CODE_MP); + stream_putc(s, CAPABILITY_CODE_MP_LEN); + stream_putw(s, pkt_afi); + stream_putc(s, 0); + stream_putc(s, pkt_safi); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP sending CAPABILITY has %s %s for afi/safi: %s/%s", + peer, + action == CAPABILITY_ACTION_SET + ? "Advertising" + : "Removing", + capability, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + break; + case CAPABILITY_CODE_RESTART: + if (!CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) && + !CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART_HELPER)) + return; + + SET_FLAG(peer->cap, PEER_CAP_RESTART_ADV); + stream_putc(s, action); + stream_putc(s, CAPABILITY_CODE_RESTART); + cap_len = stream_get_endp(s); + stream_putc(s, 0); + gr_restart_time = peer->bgp->restart_time; + + if (peer->bgp->t_startup) { + SET_FLAG(gr_restart_time, GRACEFUL_RESTART_R_BIT); + SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_R_BIT_ADV); + } + + if (CHECK_FLAG(peer->bgp->flags, + BGP_FLAG_GRACEFUL_NOTIFICATION)) { + SET_FLAG(gr_restart_time, GRACEFUL_RESTART_N_BIT); + SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV); + } + + stream_putw(s, gr_restart_time); + + if (CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)) { + FOREACH_AFI_SAFI (afi, safi) { + if (!peer->afc[afi][safi]) + continue; + + bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, + &pkt_safi); + stream_putw(s, pkt_afi); + stream_putc(s, pkt_safi); + if (CHECK_FLAG(peer->bgp->flags, + BGP_FLAG_GR_PRESERVE_FWD)) + stream_putc(s, GRACEFUL_RESTART_F_BIT); + else + stream_putc(s, 0); + } + } + + len = stream_get_endp(s) - cap_len - 1; + stream_putc_at(s, cap_len, len); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP sending CAPABILITY has %s %s for afi/safi: %s/%s", + peer, + action == CAPABILITY_ACTION_SET + ? "Advertising" + : "Removing", + capability, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + + break; + case CAPABILITY_CODE_LLGR: + if (!CHECK_FLAG(peer->cap, PEER_CAP_RESTART_ADV)) + return; + + SET_FLAG(peer->cap, PEER_CAP_LLGR_ADV); + + stream_putc(s, action); + stream_putc(s, CAPABILITY_CODE_LLGR); + cap_len = stream_get_endp(s); + stream_putc(s, 0); + + FOREACH_AFI_SAFI (afi, safi) { + if (!peer->afc[afi][safi]) + continue; + + bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, + &pkt_safi); + + stream_putw(s, pkt_afi); + stream_putc(s, pkt_safi); + stream_putc(s, LLGR_F_BIT); + stream_put3(s, peer->bgp->llgr_stale_time); + + SET_FLAG(peer->af_cap[afi][safi], PEER_CAP_LLGR_AF_ADV); + } + + len = stream_get_endp(s) - cap_len - 1; + stream_putc_at(s, cap_len, len); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP sending CAPABILITY has %s %s for afi/safi: %s/%s", + peer, + action == CAPABILITY_ACTION_SET + ? "Advertising" + : "Removing", + capability, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + break; + case CAPABILITY_CODE_REFRESH: + case CAPABILITY_CODE_ORF: + case CAPABILITY_CODE_AS4: + case CAPABILITY_CODE_DYNAMIC: + case CAPABILITY_CODE_ADDPATH: + case CAPABILITY_CODE_ENHANCED_RR: + case CAPABILITY_CODE_FQDN: + case CAPABILITY_CODE_ENHE: + case CAPABILITY_CODE_EXT_MESSAGE: + break; + case CAPABILITY_CODE_ROLE: + if (peer->local_role != ROLE_UNDEFINED) { + SET_FLAG(peer->cap, PEER_CAP_ROLE_ADV); + stream_putc(s, action); + stream_putc(s, CAPABILITY_CODE_ROLE); + stream_putc(s, CAPABILITY_CODE_ROLE_LEN); + stream_putc(s, peer->local_role); + } + break; + default: + break; + } + + /* Set packet size. */ + bgp_packet_set_size(s); + + /* Add packet to the peer. */ + bgp_packet_add(peer->connection, peer, s); + + bgp_writes_on(peer->connection); +} + +/* RFC1771 6.8 Connection collision detection. */ +static int bgp_collision_detect(struct peer_connection *connection, + struct peer *new, struct in_addr remote_id) +{ + struct peer *peer; + struct peer_connection *other; + + /* + * Upon receipt of an OPEN message, the local system must examine + * all of its connections that are in the OpenConfirm state. A BGP + * speaker may also examine connections in an OpenSent state if it + * knows the BGP Identifier of the peer by means outside of the + * protocol. If among these connections there is a connection to a + * remote BGP speaker whose BGP Identifier equals the one in the + * OPEN message, then the local system performs the following + * collision resolution procedure: + */ + peer = new->doppelganger; + if (peer == NULL) + return 0; + + other = peer->connection; + + /* + * Do not accept the new connection in Established or Clearing + * states. Note that a peer GR is handled by closing the existing + * connection upon receipt of new one. + */ + if (peer_established(other) || + other->status == Clearing) { + bgp_notify_send(connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_COLLISION_RESOLUTION); + return -1; + } + + if ((other->status != OpenConfirm) && + (other->status != OpenSent)) + return 0; + + /* + * 1. The BGP Identifier of the local system is + * compared to the BGP Identifier of the remote + * system (as specified in the OPEN message). + * + * If the BGP Identifiers of the peers + * involved in the connection collision + * are identical, then the connection + * initiated by the BGP speaker with the + * larger AS number is preserved. + */ + if (ntohl(peer->local_id.s_addr) < ntohl(remote_id.s_addr) + || (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr) + && peer->local_as < peer->as)) + if (!CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) { + /* + * 2. If the value of the local BGP + * Identifier is less than the remote one, + * the local system closes BGP connection + * that already exists (the one that is + * already in the OpenConfirm state), + * and accepts BGP connection initiated by + * the remote system. + */ + bgp_notify_send(other, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_COLLISION_RESOLUTION); + return 1; + } else { + bgp_notify_send(connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_COLLISION_RESOLUTION); + return -1; + } + else { + if (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr) + && peer->local_as == peer->as) + flog_err(EC_BGP_ROUTER_ID_SAME, + "Peer's router-id %pI4 is the same as ours", + &remote_id); + + /* + * 3. Otherwise, the local system closes newly + * created BGP connection (the one associated with the + * newly received OPEN message), and continues to use + * the existing one (the one that is already in the + * OpenConfirm state). + */ + if (CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) { + bgp_notify_send(other, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_COLLISION_RESOLUTION); + return 1; + } else { + bgp_notify_send(connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_COLLISION_RESOLUTION); + return -1; + } + } +} + +/* Packet processing routines ---------------------------------------------- */ +/* + * This is a family of functions designed to be called from + * bgp_process_packet(). These functions all share similar behavior and should + * adhere to the following invariants and restrictions: + * + * Return codes + * ------------ + * The return code of any one of those functions should be one of the FSM event + * codes specified in bgpd.h. If a NOTIFY was sent, this event code MUST be + * BGP_Stop. Otherwise, the code SHOULD correspond to the function's expected + * packet type. For example, bgp_open_receive() should return BGP_Stop upon + * error and Receive_OPEN_message otherwise. + * + * If no action is necessary, the correct return code is BGP_PACKET_NOOP as + * defined below. + * + * Side effects + * ------------ + * - May send NOTIFY messages + * - May not modify peer->connection->status + * - May not call bgp_event_update() + */ + +#define BGP_PACKET_NOOP 0 + +/** + * Process BGP OPEN message for peer. + * + * If any errors are encountered in the OPEN message, immediately sends NOTIFY + * and returns BGP_Stop. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +static int bgp_open_receive(struct peer_connection *connection, + struct peer *peer, bgp_size_t size) +{ + int ret; + uint8_t version; + uint16_t optlen; + uint16_t holdtime; + uint16_t send_holdtime; + as_t remote_as; + as_t as4 = 0, as4_be; + struct in_addr remote_id; + int mp_capability; + uint8_t notify_data_remote_as[2]; + uint8_t notify_data_remote_as4[4]; + uint8_t notify_data_remote_id[4]; + uint16_t *holdtime_ptr; + + /* Parse open packet. */ + version = stream_getc(peer->curr); + memcpy(notify_data_remote_as, stream_pnt(peer->curr), 2); + remote_as = stream_getw(peer->curr); + holdtime_ptr = (uint16_t *)stream_pnt(peer->curr); + holdtime = stream_getw(peer->curr); + memcpy(notify_data_remote_id, stream_pnt(peer->curr), 4); + remote_id.s_addr = stream_get_ipv4(peer->curr); + + /* BEGIN to read the capability here, but dont do it yet */ + mp_capability = 0; + optlen = stream_getc(peer->curr); + + /* Extended Optional Parameters Length for BGP OPEN Message */ + if (optlen == BGP_OPEN_NON_EXT_OPT_LEN + || CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) { + uint8_t opttype; + + if (STREAM_READABLE(peer->curr) < 1) { + flog_err( + EC_BGP_PKT_OPEN, + "%s: stream does not have enough bytes for extended optional parameters", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_MALFORMED_ATTR); + return BGP_Stop; + } + + opttype = stream_getc(peer->curr); + if (opttype == BGP_OPEN_NON_EXT_OPT_TYPE_EXTENDED_LENGTH) { + if (STREAM_READABLE(peer->curr) < 2) { + flog_err( + EC_BGP_PKT_OPEN, + "%s: stream does not have enough bytes to read the extended optional parameters optlen", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_MALFORMED_ATTR); + return BGP_Stop; + } + optlen = stream_getw(peer->curr); + SET_FLAG(peer->sflags, + PEER_STATUS_EXT_OPT_PARAMS_LENGTH); + } + } + + /* Receive OPEN message log */ + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s rcv OPEN%s, version %d, remote-as (in open) %u, holdtime %d, id %pI4", + peer->host, + CHECK_FLAG(peer->sflags, + PEER_STATUS_EXT_OPT_PARAMS_LENGTH) + ? " (Extended)" + : "", + version, remote_as, holdtime, &remote_id); + + if (optlen != 0) { + /* If not enough bytes, it is an error. */ + if (STREAM_READABLE(peer->curr) < optlen) { + flog_err(EC_BGP_PKT_OPEN, + "%s: stream has not enough bytes (%u)", + peer->host, optlen); + bgp_notify_send(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_MALFORMED_ATTR); + return BGP_Stop; + } + + /* We need the as4 capability value *right now* because + * if it is there, we have not got the remote_as yet, and + * without + * that we do not know which peer is connecting to us now. + */ + as4 = peek_for_as4_capability(peer, optlen); + } + + as4_be = htonl(as4); + memcpy(notify_data_remote_as4, &as4_be, 4); + + /* Just in case we have a silly peer who sends AS4 capability set to 0 + */ + if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) { + flog_err(EC_BGP_PKT_OPEN, + "%s bad OPEN, got AS4 capability, but AS4 set to 0", + peer->host); + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as4, 4); + return BGP_Stop; + } + + /* Codification of AS 0 Processing */ + if (remote_as == BGP_AS_ZERO) { + flog_err(EC_BGP_PKT_OPEN, "%s bad OPEN, got AS set to 0", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS); + return BGP_Stop; + } + + if (remote_as == BGP_AS_TRANS) { + /* Take the AS4 from the capability. We must have received the + * capability now! Otherwise we have a asn16 peer who uses + * BGP_AS_TRANS, for some unknown reason. + */ + if (as4 == BGP_AS_TRANS) { + flog_err( + EC_BGP_PKT_OPEN, + "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed", + peer->host); + bgp_notify_send_with_data(connection, + BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as4, 4); + return BGP_Stop; + } + + if (!as4 && BGP_DEBUG(as4, AS4)) + zlog_debug( + "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4. Odd, but proceeding.", + peer->host); + else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4)) + zlog_debug( + "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits in 2-bytes, very odd peer.", + peer->host, as4); + if (as4) + remote_as = as4; + } else { + /* We may have a partner with AS4 who has an asno < BGP_AS_MAX + */ + /* If we have got the capability, peer->as4cap must match + * remote_as */ + if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) + && as4 != remote_as) { + /* raise error, log this, close session */ + flog_err( + EC_BGP_PKT_OPEN, + "%s bad OPEN, got AS4 capability, but remote_as %u mismatch with 16bit 'myasn' %u in open", + peer->host, as4, remote_as); + bgp_notify_send_with_data(connection, + BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as4, 4); + return BGP_Stop; + } + } + + /* rfc6286: + * If the BGP Identifier field of the OPEN message + * is zero, or if it is the same as the BGP Identifier + * of the local BGP speaker and the message is from an + * internal peer, then the Error Subcode is set to + * "Bad BGP Identifier". + */ + if (remote_id.s_addr == INADDR_ANY + || (peer->sort == BGP_PEER_IBGP + && ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr))) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s bad OPEN, wrong router identifier %pI4", + peer->host, &remote_id); + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_BGP_IDENT, + notify_data_remote_id, 4); + return BGP_Stop; + } + + /* Peer BGP version check. */ + if (version != BGP_VERSION_4) { + uint16_t maxver = htons(BGP_VERSION_4); + /* XXX this reply may not be correct if version < 4 XXX */ + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s bad protocol version, remote requested %d, local request %d", + peer->host, version, BGP_VERSION_4); + /* Data must be in network byte order here */ + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_UNSUP_VERSION, + (uint8_t *)&maxver, 2); + return BGP_Stop; + } + + /* Check neighbor as number. */ + if (peer->as_type == AS_UNSPECIFIED) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s bad OPEN, remote AS is unspecified currently", + peer->host); + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as, 2); + return BGP_Stop; + } else if (peer->as_type == AS_INTERNAL) { + if (remote_as != peer->bgp->as) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s bad OPEN, remote AS is %u, internal specified", + peer->host, remote_as); + bgp_notify_send_with_data(connection, + BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as, 2); + return BGP_Stop; + } + peer->as = peer->local_as; + } else if (peer->as_type == AS_EXTERNAL) { + if (remote_as == peer->bgp->as) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s bad OPEN, remote AS is %u, external specified", + peer->host, remote_as); + bgp_notify_send_with_data(connection, + BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as, 2); + return BGP_Stop; + } + peer->as = remote_as; + } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s bad OPEN, remote AS is %u, expected %u", + peer->host, remote_as, peer->as); + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_BAD_PEER_AS, + notify_data_remote_as, 2); + return BGP_Stop; + } + + /* + * When collision is detected and this peer is closed. + * Return immediately. + */ + ret = bgp_collision_detect(connection, peer, remote_id); + if (ret < 0) + return BGP_Stop; + + /* Get sockname. */ + if (bgp_getsockname(peer) < 0) { + flog_err_sys(EC_LIB_SOCKET, + "%s: bgp_getsockname() failed for peer: %s", + __func__, peer->host); + return BGP_Stop; + } + + /* Set remote router-id */ + peer->remote_id = remote_id; + + /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST + calculate the value of the Hold Timer by using the smaller of its + configured Hold Time and the Hold Time received in the OPEN message. + The Hold Time MUST be either zero or at least three seconds. An + implementation may reject connections on the basis of the Hold Time. + */ + + if (holdtime < 3 && holdtime != 0) { + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_UNACEP_HOLDTIME, + (uint8_t *)holdtime_ptr, 2); + return BGP_Stop; + } + + /* Send notification message when Hold Time received in the OPEN message + * is smaller than configured minimum Hold Time. */ + if (holdtime < peer->bgp->default_min_holdtime + && peer->bgp->default_min_holdtime != 0) { + bgp_notify_send_with_data(connection, BGP_NOTIFY_OPEN_ERR, + BGP_NOTIFY_OPEN_UNACEP_HOLDTIME, + (uint8_t *)holdtime_ptr, 2); + return BGP_Stop; + } + + /* From the rfc: A reasonable maximum time between KEEPALIVE messages + would be one third of the Hold Time interval. KEEPALIVE messages + MUST NOT be sent more frequently than one per second. An + implementation MAY adjust the rate at which it sends KEEPALIVE + messages as a function of the Hold Time interval. */ + + if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) + send_holdtime = peer->holdtime; + else + send_holdtime = peer->bgp->default_holdtime; + + if (holdtime < send_holdtime) + peer->v_holdtime = holdtime; + else + peer->v_holdtime = send_holdtime; + + /* Set effective keepalive to 1/3 the effective holdtime. + * Use configured keeplive when < effective keepalive. + */ + peer->v_keepalive = peer->v_holdtime / 3; + if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) { + if (peer->keepalive && peer->keepalive < peer->v_keepalive) + peer->v_keepalive = peer->keepalive; + } else { + if (peer->bgp->default_keepalive + && peer->bgp->default_keepalive < peer->v_keepalive) + peer->v_keepalive = peer->bgp->default_keepalive; + } + + /* If another side disabled sending Software Version capability, + * we MUST drop the previous from showing in the outputs to avoid + * stale information and due to security reasons. + */ + if (peer->soft_version) + XFREE(MTYPE_BGP_SOFT_VERSION, peer->soft_version); + + /* Open option part parse. */ + if (optlen != 0) { + if (bgp_open_option_parse(peer, optlen, &mp_capability) < 0) + return BGP_Stop; + } else { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0", + peer->host); + } + + /* + * Assume that the peer supports the locally configured set of + * AFI/SAFIs if the peer did not send us any Mulitiprotocol + * capabilities, or if 'override-capability' is configured. + */ + if (!mp_capability + || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) { + peer->afc_nego[AFI_IP][SAFI_UNICAST] = + peer->afc[AFI_IP][SAFI_UNICAST]; + peer->afc_nego[AFI_IP][SAFI_MULTICAST] = + peer->afc[AFI_IP][SAFI_MULTICAST]; + peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] = + peer->afc[AFI_IP][SAFI_LABELED_UNICAST]; + peer->afc_nego[AFI_IP][SAFI_FLOWSPEC] = + peer->afc[AFI_IP][SAFI_FLOWSPEC]; + peer->afc_nego[AFI_IP6][SAFI_UNICAST] = + peer->afc[AFI_IP6][SAFI_UNICAST]; + peer->afc_nego[AFI_IP6][SAFI_MULTICAST] = + peer->afc[AFI_IP6][SAFI_MULTICAST]; + peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] = + peer->afc[AFI_IP6][SAFI_LABELED_UNICAST]; + peer->afc_nego[AFI_L2VPN][SAFI_EVPN] = + peer->afc[AFI_L2VPN][SAFI_EVPN]; + peer->afc_nego[AFI_IP6][SAFI_FLOWSPEC] = + peer->afc[AFI_IP6][SAFI_FLOWSPEC]; + } + + /* Verify valid local address present based on negotiated + * address-families. */ + if (peer->afc_nego[AFI_IP][SAFI_UNICAST] + || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] + || peer->afc_nego[AFI_IP][SAFI_MULTICAST] + || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN] + || peer->afc_nego[AFI_IP][SAFI_ENCAP]) { + if (peer->nexthop.v4.s_addr == INADDR_ANY) { +#if defined(HAVE_CUMULUS) + zlog_warn("%s: No local IPv4 addr, BGP routing may not work", + peer->host); +#endif + } + } + if (peer->afc_nego[AFI_IP6][SAFI_UNICAST] + || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] + || peer->afc_nego[AFI_IP6][SAFI_MULTICAST] + || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN] + || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) { + if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global) && + !bm->v6_with_v4_nexthops) { + flog_err(EC_BGP_SND_FAIL, +"%s: No local IPv6 address, and zebra does not support V6 routing with v4 nexthops, BGP routing for V6 will not work", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + } + peer->rtt = sockopt_tcp_rtt(connection->fd); + + return Receive_OPEN_message; +} + +/** + * Process BGP KEEPALIVE message for peer. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +static int bgp_keepalive_receive(struct peer_connection *connection, + struct peer *peer, bgp_size_t size) +{ + if (bgp_debug_keepalive(peer)) + zlog_debug("%s KEEPALIVE rcvd", peer->host); + + bgp_update_implicit_eors(peer); + + peer->rtt = sockopt_tcp_rtt(connection->fd); + + /* If the peer's RTT is higher than expected, shutdown + * the peer automatically. + */ + if (!CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN)) + return Receive_KEEPALIVE_message; + + if (peer->rtt > peer->rtt_expected) { + peer->rtt_keepalive_rcv++; + + if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) { + char rtt_shutdown_reason[BUFSIZ] = {}; + + snprintfrr( + rtt_shutdown_reason, + sizeof(rtt_shutdown_reason), + "shutdown due to high round-trip-time (%dms > %dms, hit %u times)", + peer->rtt, peer->rtt_expected, + peer->rtt_keepalive_rcv); + zlog_warn("%s %s", peer->host, rtt_shutdown_reason); + SET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN); + peer_tx_shutdown_message_set(peer, rtt_shutdown_reason); + peer_flag_set(peer, PEER_FLAG_SHUTDOWN); + } + } else { + if (peer->rtt_keepalive_rcv) + peer->rtt_keepalive_rcv--; + } + + return Receive_KEEPALIVE_message; +} + +static void bgp_refresh_stalepath_timer_expire(struct event *thread) +{ + struct peer_af *paf; + + paf = EVENT_ARG(thread); + + afi_t afi = paf->afi; + safi_t safi = paf->safi; + struct peer *peer = paf->peer; + + peer->t_refresh_stalepath = NULL; + + if (peer->nsf[afi][safi]) + bgp_clear_stale_route(peer, afi, safi); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP route-refresh (BoRR) timer expired for afi/safi: %d/%d", + peer, afi, safi); + + bgp_timer_set(peer->connection); +} + +/** + * Process BGP UPDATE message for peer. + * + * Parses UPDATE and creates attribute object. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +static int bgp_update_receive(struct peer_connection *connection, + struct peer *peer, bgp_size_t size) +{ + int ret, nlri_ret; + uint8_t *end; + struct stream *s; + struct attr attr; + bgp_size_t attribute_len; + bgp_size_t update_len; + bgp_size_t withdraw_len; + bool restart = false; + + enum NLRI_TYPES { + NLRI_UPDATE, + NLRI_WITHDRAW, + NLRI_MP_UPDATE, + NLRI_MP_WITHDRAW, + NLRI_TYPE_MAX + }; + struct bgp_nlri nlris[NLRI_TYPE_MAX]; + + /* Status must be Established. */ + if (!peer_established(connection)) { + flog_err(EC_BGP_INVALID_STATUS, + "%s [FSM] Update packet received under status %s", + peer->host, + lookup_msg(bgp_status_msg, peer->connection->status, + NULL)); + bgp_notify_send(connection, BGP_NOTIFY_FSM_ERR, + bgp_fsm_error_subcode(peer->connection->status)); + return BGP_Stop; + } + + /* Set initial values. */ + memset(&attr, 0, sizeof(attr)); + attr.label_index = BGP_INVALID_LABEL_INDEX; + attr.label = MPLS_INVALID_LABEL; + memset(&nlris, 0, sizeof(nlris)); + memset(peer->rcvd_attr_str, 0, BUFSIZ); + peer->rcvd_attr_printed = 0; + + s = peer->curr; + end = stream_pnt(s) + size; + + /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute + Length is too large (i.e., if Unfeasible Routes Length + Total + Attribute Length + 23 exceeds the message Length), then the Error + Subcode is set to Malformed Attribute List. */ + if (stream_pnt(s) + 2 > end) { + flog_err(EC_BGP_UPDATE_RCV, + "%s [Error] Update packet error (packet length is short for unfeasible length)", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_UPDATE_ERR, + BGP_NOTIFY_UPDATE_MAL_ATTR); + return BGP_Stop; + } + + /* Unfeasible Route Length. */ + withdraw_len = stream_getw(s); + + /* Unfeasible Route Length check. */ + if (stream_pnt(s) + withdraw_len > end) { + flog_err(EC_BGP_UPDATE_RCV, + "%s [Error] Update packet error (packet unfeasible length overflow %d)", + peer->host, withdraw_len); + bgp_notify_send(connection, BGP_NOTIFY_UPDATE_ERR, + BGP_NOTIFY_UPDATE_MAL_ATTR); + return BGP_Stop; + } + + /* Unfeasible Route packet format check. */ + if (withdraw_len > 0) { + nlris[NLRI_WITHDRAW].afi = AFI_IP; + nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST; + nlris[NLRI_WITHDRAW].nlri = stream_pnt(s); + nlris[NLRI_WITHDRAW].length = withdraw_len; + stream_forward_getp(s, withdraw_len); + } + + /* Attribute total length check. */ + if (stream_pnt(s) + 2 > end) { + flog_warn( + EC_BGP_UPDATE_PACKET_SHORT, + "%s [Error] Packet Error (update packet is short for attribute length)", + peer->host); + bgp_notify_send(peer->connection, BGP_NOTIFY_UPDATE_ERR, + BGP_NOTIFY_UPDATE_MAL_ATTR); + return BGP_Stop; + } + + /* Fetch attribute total length. */ + attribute_len = stream_getw(s); + + /* Attribute length check. */ + if (stream_pnt(s) + attribute_len > end) { + flog_warn( + EC_BGP_UPDATE_PACKET_LONG, + "%s [Error] Packet Error (update packet attribute length overflow %d)", + peer->host, attribute_len); + bgp_notify_send(connection, BGP_NOTIFY_UPDATE_ERR, + BGP_NOTIFY_UPDATE_MAL_ATTR); + return BGP_Stop; + } + + /* Certain attribute parsing errors should not be considered bad enough + * to reset the session for, most particularly any partial/optional + * attributes that have 'tunneled' over speakers that don't understand + * them. Instead we withdraw only the prefix concerned. + * + * Complicates the flow a little though.. + */ + enum bgp_attr_parse_ret attr_parse_ret = BGP_ATTR_PARSE_PROCEED; +/* This define morphs the update case into a withdraw when lower levels + * have signalled an error condition where this is best. + */ +#define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL) + + /* Parse attribute when it exists. */ + if (attribute_len) { + attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len, + &nlris[NLRI_MP_UPDATE], + &nlris[NLRI_MP_WITHDRAW]); + if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) { + bgp_attr_unintern_sub(&attr); + return BGP_Stop; + } + } + + /* Logging the attribute. */ + if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW + || BGP_DEBUG(update, UPDATE_IN) + || BGP_DEBUG(update, UPDATE_PREFIX)) { + ret = bgp_dump_attr(&attr, peer->rcvd_attr_str, + sizeof(peer->rcvd_attr_str)); + + peer->stat_upd_7606++; + + if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW) + flog_err( + EC_BGP_UPDATE_RCV, + "%pBP rcvd UPDATE with errors in attr(s)!! Withdrawing route.", + peer); + + if (ret && bgp_debug_update(peer, NULL, NULL, 1)) { + zlog_debug("%pBP rcvd UPDATE w/ attr: %s", peer, + peer->rcvd_attr_str); + peer->rcvd_attr_printed = 1; + } + } + + /* Network Layer Reachability Information. */ + update_len = end - stream_pnt(s); + + /* If we received MP_UNREACH_NLRI attribute, but also NLRIs, then + * NLRIs should be handled as a new data. Though, if we received + * NLRIs without mandatory attributes, they should be ignored. + */ + if (update_len && attribute_len && + attr_parse_ret != BGP_ATTR_PARSE_MISSING_MANDATORY) { + /* Set NLRI portion to structure. */ + nlris[NLRI_UPDATE].afi = AFI_IP; + nlris[NLRI_UPDATE].safi = SAFI_UNICAST; + nlris[NLRI_UPDATE].nlri = stream_pnt(s); + nlris[NLRI_UPDATE].length = update_len; + stream_forward_getp(s, update_len); + + if (CHECK_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_MP_REACH_NLRI))) { + /* + * We skipped nexthop attribute validation earlier so + * validate the nexthop now. + */ + if (bgp_attr_nexthop_valid(peer, &attr) < 0) { + bgp_attr_unintern_sub(&attr); + return BGP_Stop; + } + } + } + + if (BGP_DEBUG(update, UPDATE_IN)) + zlog_debug("%pBP rcvd UPDATE wlen %d attrlen %d alen %d", peer, + withdraw_len, attribute_len, update_len); + + /* Parse any given NLRIs */ + for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) { + if (!nlris[i].nlri) + continue; + + /* NLRI is processed iff the peer if configured for the specific + * afi/safi */ + if (!peer->afc[nlris[i].afi][nlris[i].safi]) { + zlog_info( + "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u", + peer->host, nlris[i].afi, nlris[i].safi); + continue; + } + + /* EoR handled later */ + if (nlris[i].length == 0) + continue; + + switch (i) { + case NLRI_UPDATE: + case NLRI_MP_UPDATE: + nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG, + &nlris[i], 0); + break; + case NLRI_WITHDRAW: + case NLRI_MP_WITHDRAW: + nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG, + &nlris[i], 1); + break; + default: + nlri_ret = BGP_NLRI_PARSE_ERROR; + } + + if (nlri_ret < BGP_NLRI_PARSE_OK + && nlri_ret != BGP_NLRI_PARSE_ERROR_PREFIX_OVERFLOW) { + flog_err(EC_BGP_UPDATE_RCV, + "%s [Error] Error parsing NLRI", peer->host); + if (peer_established(connection)) + bgp_notify_send(connection, + BGP_NOTIFY_UPDATE_ERR, + i <= NLRI_WITHDRAW + ? BGP_NOTIFY_UPDATE_INVAL_NETWORK + : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR); + bgp_attr_unintern_sub(&attr); + return BGP_Stop; + } + } + + /* EoR checks + * + * Non-MP IPv4/Unicast EoR is a completely empty UPDATE + * and MP EoR should have only an empty MP_UNREACH + */ + if (!update_len && !withdraw_len && nlris[NLRI_MP_UPDATE].length == 0) { + afi_t afi = 0; + safi_t safi; + struct graceful_restart_info *gr_info; + + /* Restarting router */ + if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer) + && BGP_PEER_RESTARTING_MODE(peer)) + restart = true; + + /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already + * checked + * update and withdraw NLRI lengths are 0. + */ + if (!attribute_len) { + afi = AFI_IP; + safi = SAFI_UNICAST; + } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI) + && nlris[NLRI_MP_WITHDRAW].length == 0) { + afi = nlris[NLRI_MP_WITHDRAW].afi; + safi = nlris[NLRI_MP_WITHDRAW].safi; + } + + if (afi && peer->afc[afi][safi]) { + struct vrf *vrf = vrf_lookup_by_id(peer->bgp->vrf_id); + + /* End-of-RIB received */ + if (!CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EOR_RECEIVED)) { + SET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EOR_RECEIVED); + bgp_update_explicit_eors(peer); + /* Update graceful restart information */ + gr_info = &(peer->bgp->gr_info[afi][safi]); + if (restart) + gr_info->eor_received++; + /* If EOR received from all peers and selection + * deferral timer is running, cancel the timer + * and invoke the best path calculation + */ + if (gr_info->eor_required + == gr_info->eor_received) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s %d, %s %d", + "EOR REQ", + gr_info->eor_required, + "EOR RCV", + gr_info->eor_received); + if (gr_info->t_select_deferral) { + void *info = EVENT_ARG( + gr_info->t_select_deferral); + XFREE(MTYPE_TMP, info); + } + EVENT_OFF(gr_info->t_select_deferral); + gr_info->eor_required = 0; + gr_info->eor_received = 0; + /* Best path selection */ + bgp_best_path_select_defer(peer->bgp, + afi, safi); + } + } + + /* NSF delete stale route */ + if (peer->nsf[afi][safi]) + bgp_clear_stale_route(peer, afi, safi); + + zlog_info( + "%s: rcvd End-of-RIB for %s from %s in vrf %s", + __func__, get_afi_safi_str(afi, safi, false), + peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME); + } + } + + /* Everything is done. We unintern temporary structures which + interned in bgp_attr_parse(). */ + bgp_attr_unintern_sub(&attr); + + peer->update_time = monotime(NULL); + + /* Notify BGP Conditional advertisement scanner process */ + peer->advmap_table_change = true; + + return Receive_UPDATE_message; +} + +/** + * Process BGP NOTIFY message for peer. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +static int bgp_notify_receive(struct peer_connection *connection, + struct peer *peer, bgp_size_t size) +{ + struct bgp_notify outer = {}; + struct bgp_notify inner = {}; + bool hard_reset = false; + + if (peer->notify.data) { + XFREE(MTYPE_BGP_NOTIFICATION, peer->notify.data); + peer->notify.length = 0; + peer->notify.hard_reset = false; + } + + outer.code = stream_getc(peer->curr); + outer.subcode = stream_getc(peer->curr); + outer.length = size - 2; + outer.data = NULL; + outer.raw_data = NULL; + if (outer.length) { + outer.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, outer.length); + memcpy(outer.raw_data, stream_pnt(peer->curr), outer.length); + } + + hard_reset = + bgp_notify_received_hard_reset(peer, outer.code, outer.subcode); + if (hard_reset && outer.length) { + inner = bgp_notify_decapsulate_hard_reset(&outer); + peer->notify.hard_reset = true; + } else { + inner = outer; + } + + /* Preserv notify code and sub code. */ + peer->notify.code = inner.code; + peer->notify.subcode = inner.subcode; + /* For further diagnostic record returned Data. */ + if (inner.length) { + peer->notify.length = inner.length; + peer->notify.data = + XMALLOC(MTYPE_BGP_NOTIFICATION, inner.length); + memcpy(peer->notify.data, inner.raw_data, inner.length); + } + + /* For debug */ + { + int i; + int first = 0; + char c[4]; + + if (inner.length) { + inner.data = XMALLOC(MTYPE_BGP_NOTIFICATION, + inner.length * 3); + for (i = 0; i < inner.length; i++) + if (first) { + snprintf(c, sizeof(c), " %02x", + stream_getc(peer->curr)); + + strlcat(inner.data, c, + inner.length * 3); + + } else { + first = 1; + snprintf(c, sizeof(c), "%02x", + stream_getc(peer->curr)); + + strlcpy(inner.data, c, + inner.length * 3); + } + } + + bgp_notify_print(peer, &inner, "received", hard_reset); + if (inner.length) { + XFREE(MTYPE_BGP_NOTIFICATION, inner.data); + inner.length = 0; + } + if (outer.length) { + XFREE(MTYPE_BGP_NOTIFICATION, outer.data); + XFREE(MTYPE_BGP_NOTIFICATION, outer.raw_data); + + /* If this is a Hard Reset notification, we MUST free + * the inner (encapsulated) notification too. + */ + if (hard_reset) + XFREE(MTYPE_BGP_NOTIFICATION, inner.raw_data); + outer.length = 0; + } + } + + /* peer count update */ + atomic_fetch_add_explicit(&peer->notify_in, 1, memory_order_relaxed); + + peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED; + + /* We have to check for Notify with Unsupported Optional Parameter. + in that case we fallback to open without the capability option. + But this done in bgp_stop. We just mark it here to avoid changing + the fsm tables. */ + if (inner.code == BGP_NOTIFY_OPEN_ERR && + inner.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM) + UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN); + + /* If Graceful-Restart N-bit (Notification) is exchanged, + * and it's not a Hard Reset, let's retain the routes. + */ + if (bgp_has_graceful_restart_notification(peer) && !hard_reset && + CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) + SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); + + bgp_peer_gr_flags_update(peer); + BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp, + peer->bgp->peer); + + return Receive_NOTIFICATION_message; +} + +/** + * Process BGP ROUTEREFRESH message for peer. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +static int bgp_route_refresh_receive(struct peer_connection *connection, + struct peer *peer, bgp_size_t size) +{ + iana_afi_t pkt_afi; + afi_t afi; + iana_safi_t pkt_safi; + safi_t safi; + struct stream *s; + struct peer_af *paf; + struct update_group *updgrp; + struct peer *updgrp_peer; + uint8_t subtype; + bool force_update = false; + bgp_size_t msg_length = + size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE); + + /* If peer does not have the capability, send notification. */ + if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) { + flog_err(EC_BGP_NO_CAP, + "%s [Error] BGP route refresh is not enabled", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_HEADER_ERR, + BGP_NOTIFY_HEADER_BAD_MESTYPE); + return BGP_Stop; + } + + /* Status must be Established. */ + if (!peer_established(connection)) { + flog_err(EC_BGP_INVALID_STATUS, + "%s [Error] Route refresh packet received under status %s", + peer->host, + lookup_msg(bgp_status_msg, peer->connection->status, + NULL)); + bgp_notify_send(connection, BGP_NOTIFY_FSM_ERR, + bgp_fsm_error_subcode(peer->connection->status)); + return BGP_Stop; + } + + s = peer->curr; + + /* Parse packet. */ + pkt_afi = stream_getw(s); + subtype = stream_getc(s); + pkt_safi = stream_getc(s); + + /* Convert AFI, SAFI to internal values and check. */ + if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) { + zlog_info( + "%s REFRESH_REQ for unrecognized afi/safi: %s/%s - ignored", + peer->host, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + return BGP_PACKET_NOOP; + } + + if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) { + uint8_t *end; + uint8_t when_to_refresh; + uint8_t orf_type; + uint16_t orf_len; + + if (subtype) { + /* If the length, excluding the fixed-size message + * header, of the received ROUTE-REFRESH message with + * Message Subtype 1 and 2 is not 4, then the BGP + * speaker MUST send a NOTIFICATION message with the + * Error Code of "ROUTE-REFRESH Message Error" and the + * subcode of "Invalid Message Length". + */ + if (msg_length != 4) { + zlog_err( + "%s Enhanced Route Refresh message length error", + peer->host); + bgp_notify_send(connection, + BGP_NOTIFY_ROUTE_REFRESH_ERR, + BGP_NOTIFY_ROUTE_REFRESH_INVALID_MSG_LEN); + } + + /* When the BGP speaker receives a ROUTE-REFRESH message + * with a "Message Subtype" field other than 0, 1, or 2, + * it MUST ignore the received ROUTE-REFRESH message. + */ + if (subtype > 2) + zlog_err( + "%s Enhanced Route Refresh invalid subtype", + peer->host); + } + + if (msg_length < 5) { + zlog_info("%s ORF route refresh length error", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + + when_to_refresh = stream_getc(s); + end = stream_pnt(s) + (size - 5); + + while ((stream_pnt(s) + 2) < end) { + orf_type = stream_getc(s); + orf_len = stream_getw(s); + + /* orf_len in bounds? */ + if ((stream_pnt(s) + orf_len) > end) + break; /* XXX: Notify instead?? */ + if (orf_type == ORF_TYPE_PREFIX) { + uint8_t *p_pnt = stream_pnt(s); + uint8_t *p_end = stream_pnt(s) + orf_len; + struct orf_prefix orfp; + uint8_t common = 0; + uint32_t seq; + int psize; + char name[BUFSIZ]; + int ret = CMD_SUCCESS; + + if (bgp_debug_neighbor_events(peer)) { + zlog_debug( + "%pBP rcvd Prefixlist ORF(%d) length %d", + peer, orf_type, orf_len); + } + + /* ORF prefix-list name */ + snprintf(name, sizeof(name), "%s.%d.%d", + peer->host, afi, safi); + + /* we're going to read at least 1 byte of common + * ORF header, + * and 7 bytes of ORF Address-filter entry from + * the stream + */ + if (p_pnt < p_end && + *p_pnt & ORF_COMMON_PART_REMOVE_ALL) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd Remove-All pfxlist ORF request", + peer); + prefix_bgp_orf_remove_all(afi, name); + break; + } + + if (orf_len < 7) + break; + + while (p_pnt < p_end) { + /* If the ORF entry is malformed, want + * to read as much of it + * as possible without going beyond the + * bounds of the entry, + * to maximise debug information. + */ + int ok; + memset(&orfp, 0, sizeof(orfp)); + common = *p_pnt++; + /* after ++: p_pnt <= p_end */ + ok = ((uint32_t)(p_end - p_pnt) + >= sizeof(uint32_t)); + if (ok) { + memcpy(&seq, p_pnt, + sizeof(uint32_t)); + p_pnt += sizeof(uint32_t); + orfp.seq = ntohl(seq); + } else + p_pnt = p_end; + + /* val checked in prefix_bgp_orf_set */ + if (p_pnt < p_end) + orfp.ge = *p_pnt++; + + /* val checked in prefix_bgp_orf_set */ + if (p_pnt < p_end) + orfp.le = *p_pnt++; + + if ((ok = (p_pnt < p_end))) + orfp.p.prefixlen = *p_pnt++; + + /* afi checked already */ + orfp.p.family = afi2family(afi); + + /* 0 if not ok */ + psize = PSIZE(orfp.p.prefixlen); + /* valid for family ? */ + if (psize > prefix_blen(&orfp.p)) { + ok = 0; + psize = prefix_blen(&orfp.p); + } + /* valid for packet ? */ + if (psize > (p_end - p_pnt)) { + ok = 0; + psize = p_end - p_pnt; + } + + if (psize > 0) + memcpy(&orfp.p.u.prefix, p_pnt, + psize); + p_pnt += psize; + + if (bgp_debug_neighbor_events(peer)) { + char buf[INET6_BUFSIZ]; + + zlog_debug( + "%pBP rcvd %s %s seq %u %s/%d ge %d le %d%s", + peer, + (common & ORF_COMMON_PART_REMOVE + ? "Remove" + : "Add"), + (common & ORF_COMMON_PART_DENY + ? "deny" + : "permit"), + orfp.seq, + inet_ntop( + orfp.p.family, + &orfp.p.u.prefix, + buf, + INET6_BUFSIZ), + orfp.p.prefixlen, + orfp.ge, orfp.le, + ok ? "" : " MALFORMED"); + } + + if (ok) + ret = prefix_bgp_orf_set( + name, afi, &orfp, + (common & ORF_COMMON_PART_DENY + ? 0 + : 1), + (common & ORF_COMMON_PART_REMOVE + ? 0 + : 1)); + + if (!ok || (ok && ret != CMD_SUCCESS)) { + zlog_info( + "%pBP Received misformatted prefixlist ORF. Remove All pfxlist", + peer); + prefix_bgp_orf_remove_all(afi, + name); + break; + } + } + + peer->orf_plist[afi][safi] = + prefix_bgp_orf_lookup(afi, name); + } + stream_forward_getp(s, orf_len); + } + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP rcvd Refresh %s ORF request", peer, + when_to_refresh == REFRESH_DEFER + ? "Defer" + : "Immediate"); + if (when_to_refresh == REFRESH_DEFER) + return BGP_PACKET_NOOP; + } + + /* First update is deferred until ORF or ROUTE-REFRESH is received */ + if (CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_ORF_WAIT_REFRESH)) + UNSET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_ORF_WAIT_REFRESH); + + paf = peer_af_find(peer, afi, safi); + if (paf && paf->subgroup) { + if (peer->orf_plist[afi][safi]) { + updgrp = PAF_UPDGRP(paf); + updgrp_peer = UPDGRP_PEER(updgrp); + updgrp_peer->orf_plist[afi][safi] = + peer->orf_plist[afi][safi]; + } + + /* Avoid supressing duplicate routes later + * when processing in subgroup_announce_table(). + */ + force_update = true; + + /* If the peer is configured for default-originate clear the + * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will + * re-advertise the + * default + */ + if (CHECK_FLAG(paf->subgroup->sflags, + SUBGRP_STATUS_DEFAULT_ORIGINATE)) + UNSET_FLAG(paf->subgroup->sflags, + SUBGRP_STATUS_DEFAULT_ORIGINATE); + } + + if (subtype == BGP_ROUTE_REFRESH_BORR) { + /* A BGP speaker that has received the Graceful Restart + * Capability from its neighbor MUST ignore any BoRRs for + * an <AFI, SAFI> from the neighbor before the speaker + * receives the EoR for the given <AFI, SAFI> from the + * neighbor. + */ + if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV) + && !CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EOR_RECEIVED)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd route-refresh (BoRR) for %s/%s before EoR", + peer, afi2str(afi), safi2str(safi)); + return BGP_PACKET_NOOP; + } + + if (peer->t_refresh_stalepath) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd route-refresh (BoRR) for %s/%s, whereas BoRR already received", + peer, afi2str(afi), safi2str(safi)); + return BGP_PACKET_NOOP; + } + + SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_RECEIVED); + UNSET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EORR_RECEIVED); + + /* When a BGP speaker receives a BoRR message from + * a peer, it MUST mark all the routes with the given + * Address Family Identifier and Subsequent Address + * Family Identifier, <AFI, SAFI> [RFC2918], from + * that peer as stale. + */ + if (peer_active_nego(peer)) { + SET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_ENHANCED_REFRESH); + bgp_set_stale_route(peer, afi, safi); + } + + if (peer_established(peer->connection)) + event_add_timer(bm->master, + bgp_refresh_stalepath_timer_expire, paf, + peer->bgp->stalepath_time, + &peer->t_refresh_stalepath); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd route-refresh (BoRR) for %s/%s, triggering timer for %u seconds", + peer, afi2str(afi), safi2str(safi), + peer->bgp->stalepath_time); + } else if (subtype == BGP_ROUTE_REFRESH_EORR) { + if (!peer->t_refresh_stalepath) { + zlog_err( + "%pBP rcvd route-refresh (EoRR) for %s/%s, whereas no BoRR received", + peer, afi2str(afi), safi2str(safi)); + return BGP_PACKET_NOOP; + } + + EVENT_OFF(peer->t_refresh_stalepath); + + SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_RECEIVED); + UNSET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_BORR_RECEIVED); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd route-refresh (EoRR) for %s/%s, stopping BoRR timer", + peer, afi2str(afi), safi2str(safi)); + + if (peer->nsf[afi][safi]) + bgp_clear_stale_route(peer, afi, safi); + } else { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd route-refresh (REQUEST) for %s/%s", + peer, afi2str(afi), safi2str(safi)); + + /* In response to a "normal route refresh request" from the + * peer, the speaker MUST send a BoRR message. + */ + if (CHECK_FLAG(peer->cap, PEER_CAP_ENHANCED_RR_RCV)) { + /* For a BGP speaker that supports the BGP Graceful + * Restart, it MUST NOT send a BoRR for an <AFI, SAFI> + * to a neighbor before it sends the EoR for the + * <AFI, SAFI> to the neighbor. + */ + if (!CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP rcvd route-refresh (REQUEST) for %s/%s before EoR", + peer, afi2str(afi), + safi2str(safi)); + /* Can't send BoRR now, postpone after EoR */ + SET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_REFRESH_PENDING); + return BGP_PACKET_NOOP; + } + + bgp_route_refresh_send(peer, afi, safi, 0, 0, 0, + BGP_ROUTE_REFRESH_BORR); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%pBP sending route-refresh (BoRR) for %s/%s", + peer, afi2str(afi), safi2str(safi)); + + /* Set flag Ready-To-Send to know when we can send EoRR + * message. + */ + SET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_BORR_SEND); + UNSET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_EORR_SEND); + } + } + + /* Perform route refreshment to the peer */ + bgp_announce_route(peer, afi, safi, force_update); + + /* No FSM action necessary */ + return BGP_PACKET_NOOP; +} + +static void bgp_dynamic_capability_llgr(uint8_t *pnt, int action, + struct capability_header *hdr, + struct peer *peer) +{ + uint8_t *data = pnt + 3; + uint8_t *end = data + hdr->length; + size_t len = end - data; + afi_t afi; + safi_t safi; + + if (action == CAPABILITY_ACTION_SET) { + if (len < BGP_CAP_LLGR_MIN_PACKET_LEN) { + zlog_err("%pBP: Received invalid Long-Lived Graceful-Restart capability length %zu", + peer, len); + return; + } + + SET_FLAG(peer->cap, PEER_CAP_LLGR_RCV); + + while (data + BGP_CAP_LLGR_MIN_PACKET_LEN <= end) { + afi_t afi; + safi_t safi; + iana_afi_t pkt_afi; + iana_safi_t pkt_safi; + struct graceful_restart_af graf; + + memcpy(&graf, data, sizeof(graf)); + pkt_afi = ntohs(graf.afi); + pkt_safi = safi_int2iana(graf.safi); + + /* Stale time is after AFI/SAFI/flags. + * It's encoded as 24 bits (= 3 bytes), so we need to + * put it into 32 bits. + */ + uint32_t stale_time; + uint8_t *stale_time_ptr = data + 4; + + stale_time = stale_time_ptr[0] << 16; + stale_time |= stale_time_ptr[1] << 8; + stale_time |= stale_time_ptr[2]; + + if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, + &safi)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s Addr-family %s/%s(afi/safi) not supported. Ignore the Long-lived Graceful Restart capability for this AFI/SAFI", + peer->host, + iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } else if (!peer->afc[afi][safi] || + !CHECK_FLAG(peer->af_cap[afi][safi], + PEER_CAP_RESTART_AF_RCV)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s Addr-family %s/%s(afi/safi) not enabled. Ignore the Long-lived Graceful Restart capability", + peer->host, + iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } else { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s Addr-family %s/%s(afi/safi) Long-lived Graceful Restart capability stale time %u sec", + peer->host, + iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi), + stale_time); + + peer->llgr[afi][safi].flags = graf.flag; + peer->llgr[afi][safi].stale_time = + MIN(stale_time, + peer->bgp->llgr_stale_time); + SET_FLAG(peer->af_cap[afi][safi], + PEER_CAP_LLGR_AF_RCV); + } + + data += BGP_CAP_LLGR_MIN_PACKET_LEN; + } + } else { + FOREACH_AFI_SAFI (afi, safi) { + UNSET_FLAG(peer->af_cap[afi][safi], + PEER_CAP_LLGR_AF_RCV); + + peer->llgr[afi][safi].flags = 0; + peer->llgr[afi][safi].stale_time = + BGP_DEFAULT_LLGR_STALE_TIME; + } + + UNSET_FLAG(peer->cap, PEER_CAP_LLGR_RCV); + } +} + +static void bgp_dynamic_capability_graceful_restart(uint8_t *pnt, int action, + struct capability_header *hdr, + struct peer *peer) +{ +#define GRACEFUL_RESTART_CAPABILITY_PER_AFI_SAFI_SIZE 4 + uint16_t gr_restart_flag_time; + uint8_t *data = pnt + 3; + uint8_t *end = pnt + hdr->length; + size_t len = end - data; + afi_t afi; + safi_t safi; + + if (action == CAPABILITY_ACTION_SET) { + if (len < sizeof(gr_restart_flag_time)) { + zlog_err("%pBP: Received invalid Graceful-Restart capability length %d", + peer, hdr->length); + return; + } + + SET_FLAG(peer->cap, PEER_CAP_RESTART_RCV); + ptr_get_be16(data, &gr_restart_flag_time); + data += sizeof(gr_restart_flag_time); + + if (CHECK_FLAG(gr_restart_flag_time, GRACEFUL_RESTART_R_BIT)) + SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_R_BIT_RCV); + else + UNSET_FLAG(peer->cap, + PEER_CAP_GRACEFUL_RESTART_R_BIT_RCV); + + if (CHECK_FLAG(gr_restart_flag_time, GRACEFUL_RESTART_N_BIT)) + SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV); + else + UNSET_FLAG(peer->cap, + PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV); + + UNSET_FLAG(gr_restart_flag_time, 0xF000); + peer->v_gr_restart = gr_restart_flag_time; + + while (data + GRACEFUL_RESTART_CAPABILITY_PER_AFI_SAFI_SIZE <= + end) { + afi_t afi; + safi_t safi; + iana_afi_t pkt_afi; + iana_safi_t pkt_safi; + struct graceful_restart_af graf; + + memcpy(&graf, data, sizeof(graf)); + pkt_afi = ntohs(graf.afi); + pkt_safi = safi_int2iana(graf.safi); + + /* Convert AFI, SAFI to internal values, check. */ + if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, + &safi)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP: Addr-family %s/%s(afi/safi) not supported. Ignore the Graceful Restart capability for this AFI/SAFI", + peer, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } else if (!peer->afc[afi][safi]) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP: Addr-family %s/%s(afi/safi) not enabled. Ignore the Graceful Restart capability", + peer, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + } else { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP: Address family %s is%spreserved", + peer, + get_afi_safi_str(afi, safi, + false), + CHECK_FLAG(peer->af_cap[afi] + [safi], + PEER_CAP_RESTART_AF_PRESERVE_RCV) + ? " " + : " not "); + + SET_FLAG(peer->af_cap[afi][safi], + PEER_CAP_RESTART_AF_RCV); + if (CHECK_FLAG(graf.flag, + GRACEFUL_RESTART_F_BIT)) + SET_FLAG(peer->af_cap[afi][safi], + PEER_CAP_RESTART_AF_PRESERVE_RCV); + } + + data += GRACEFUL_RESTART_CAPABILITY_PER_AFI_SAFI_SIZE; + } + } else { + FOREACH_AFI_SAFI (afi, safi) { + UNSET_FLAG(peer->af_cap[afi][safi], + PEER_CAP_RESTART_AF_RCV); + UNSET_FLAG(peer->af_cap[afi][safi], + PEER_CAP_RESTART_AF_PRESERVE_RCV); + } + + UNSET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_R_BIT_RCV); + UNSET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV); + UNSET_FLAG(peer->cap, PEER_CAP_RESTART_RCV); + } +} + +static void bgp_dynamic_capability_software_version(uint8_t *pnt, int action, + struct capability_header *hdr, + struct peer *peer) +{ + uint8_t *data = pnt + 3; + uint8_t *end = data + hdr->length; + uint8_t len = *data; + char soft_version[BGP_MAX_SOFT_VERSION + 1] = {}; + + if (action == CAPABILITY_ACTION_SET) { + if (data + len > end) { + zlog_err("%pBP: Received invalid Software Version capability length %d", + peer, len); + return; + } + data++; + + if (len > BGP_MAX_SOFT_VERSION) + len = BGP_MAX_SOFT_VERSION; + + memcpy(&soft_version, data, len); + soft_version[len] = '\0'; + + XFREE(MTYPE_BGP_SOFT_VERSION, peer->soft_version); + peer->soft_version = XSTRDUP(MTYPE_BGP_SOFT_VERSION, + soft_version); + + SET_FLAG(peer->cap, PEER_CAP_SOFT_VERSION_RCV); + } else { + UNSET_FLAG(peer->cap, PEER_CAP_SOFT_VERSION_RCV); + XFREE(MTYPE_BGP_SOFT_VERSION, peer->soft_version); + } +} + +/** + * Parse BGP CAPABILITY message for peer. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +static int bgp_capability_msg_parse(struct peer *peer, uint8_t *pnt, + bgp_size_t length) +{ + uint8_t *end; + struct capability_mp_data mpc; + struct capability_header *hdr; + uint8_t action; + iana_afi_t pkt_afi; + afi_t afi; + iana_safi_t pkt_safi; + safi_t safi; + const char *capability; + + end = pnt + length; + + while (pnt < end) { + /* We need at least action, capability code and capability + * length. */ + if (pnt + 3 > end) { + zlog_err("%pBP: Capability length error", peer); + bgp_notify_send(peer->connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + action = *pnt; + hdr = (struct capability_header *)(pnt + 1); + + /* Action value check. */ + if (action != CAPABILITY_ACTION_SET + && action != CAPABILITY_ACTION_UNSET) { + zlog_err("%pBP: Capability Action Value error %d", peer, + action); + bgp_notify_send(peer->connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP: CAPABILITY has action: %d, code: %u, length %u", + peer, action, hdr->code, hdr->length); + + /* Capability length check. */ + if ((pnt + hdr->length + 3) > end) { + zlog_err("%pBP: Capability length error", peer); + bgp_notify_send(peer->connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + + /* Ignore capability when override-capability is set. */ + if (CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + continue; + + capability = lookup_msg(capcode_str, hdr->code, "Unknown"); + + switch (hdr->code) { + case CAPABILITY_CODE_SOFT_VERSION: + bgp_dynamic_capability_software_version(pnt, action, + hdr, peer); + break; + case CAPABILITY_CODE_MP: + if (hdr->length < sizeof(struct capability_mp_data)) { + zlog_err("%pBP: Capability (%s) structure is not properly filled out, expected at least %zu bytes but header length specified is %d", + peer, capability, + sizeof(struct capability_mp_data), + hdr->length); + return BGP_Stop; + } + + memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data)); + pkt_afi = ntohs(mpc.afi); + pkt_safi = mpc.safi; + + /* Convert AFI, SAFI to internal values. */ + if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, + &safi)) { + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP: Dynamic Capability %s afi/safi invalid (%s/%s)", + peer, capability, + iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + continue; + } + + /* Address family check. */ + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%pBP: CAPABILITY has %s %s CAP for afi/safi: %s/%s", + peer, + action == CAPABILITY_ACTION_SET + ? "Advertising" + : "Removing", + capability, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi)); + + if (action == CAPABILITY_ACTION_SET) { + peer->afc_recv[afi][safi] = 1; + if (peer->afc[afi][safi]) { + peer->afc_nego[afi][safi] = 1; + bgp_announce_route(peer, afi, safi, + false); + } + } else { + peer->afc_recv[afi][safi] = 0; + peer->afc_nego[afi][safi] = 0; + + if (peer_active_nego(peer)) + bgp_clear_route(peer, afi, safi); + else + return BGP_Stop; + } + break; + case CAPABILITY_CODE_RESTART: + if ((hdr->length - 2) % 4) { + zlog_err("%pBP: Received invalid Graceful-Restart capability length %d", + peer, hdr->length); + bgp_notify_send(peer->connection, + BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + + bgp_dynamic_capability_graceful_restart(pnt, action, + hdr, peer); + break; + case CAPABILITY_CODE_LLGR: + bgp_dynamic_capability_llgr(pnt, action, hdr, peer); + break; + case CAPABILITY_CODE_REFRESH: + case CAPABILITY_CODE_ORF: + case CAPABILITY_CODE_AS4: + case CAPABILITY_CODE_DYNAMIC: + case CAPABILITY_CODE_ADDPATH: + case CAPABILITY_CODE_ENHANCED_RR: + case CAPABILITY_CODE_FQDN: + case CAPABILITY_CODE_ENHE: + case CAPABILITY_CODE_EXT_MESSAGE: + break; + case CAPABILITY_CODE_ROLE: + if (hdr->length != CAPABILITY_CODE_ROLE_LEN) { + zlog_err("%pBP: Capability (%s) length error", + peer, capability); + bgp_notify_send(peer->connection, + BGP_NOTIFY_CEASE, + BGP_NOTIFY_SUBCODE_UNSPECIFIC); + return BGP_Stop; + } + + uint8_t role; + + if (action == CAPABILITY_ACTION_SET) { + SET_FLAG(peer->cap, PEER_CAP_ROLE_RCV); + memcpy(&role, pnt + 3, sizeof(role)); + + peer->remote_role = role; + } else { + UNSET_FLAG(peer->cap, PEER_CAP_ROLE_RCV); + peer->remote_role = ROLE_UNDEFINED; + } + break; + default: + flog_warn(EC_BGP_UNRECOGNIZED_CAPABILITY, + "%pBP: unrecognized capability code: %d - ignored", + peer, hdr->code); + break; + } + + pnt += hdr->length + 3; + } + + /* No FSM action necessary */ + return BGP_PACKET_NOOP; +} + +/** + * Parse BGP CAPABILITY message for peer. + * + * Exported for unit testing. + * + * @param peer + * @param size size of the packet + * @return as in summary + */ +int bgp_capability_receive(struct peer_connection *connection, + struct peer *peer, bgp_size_t size) +{ + uint8_t *pnt; + + /* Fetch pointer. */ + pnt = stream_pnt(peer->curr); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s rcv CAPABILITY", peer->host); + + /* If peer does not have the capability, send notification. */ + if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) { + flog_err(EC_BGP_NO_CAP, + "%s [Error] BGP dynamic capability is not enabled", + peer->host); + bgp_notify_send(connection, BGP_NOTIFY_HEADER_ERR, + BGP_NOTIFY_HEADER_BAD_MESTYPE); + return BGP_Stop; + } + + /* Status must be Established. */ + if (!peer_established(connection)) { + flog_err(EC_BGP_NO_CAP, + "%s [Error] Dynamic capability packet received under status %s", + peer->host, + lookup_msg(bgp_status_msg, connection->status, NULL)); + bgp_notify_send(connection, BGP_NOTIFY_FSM_ERR, + bgp_fsm_error_subcode(connection->status)); + return BGP_Stop; + } + + /* Parse packet. */ + return bgp_capability_msg_parse(peer, pnt, size); +} + +/** + * Processes a peer's input buffer. + * + * This function sidesteps the event loop and directly calls bgp_event_update() + * after processing each BGP message. This is necessary to ensure proper + * ordering of FSM events and unifies the behavior that was present previously, + * whereby some of the packet handling functions would update the FSM and some + * would not, making event flow difficult to understand. Please think twice + * before hacking this. + * + * Thread type: EVENT_EVENT + * @param thread + * @return 0 + */ +void bgp_process_packet(struct event *thread) +{ + /* Yes first of all get peer pointer. */ + struct peer *peer; // peer + struct peer_connection *connection; + uint32_t rpkt_quanta_old; // how many packets to read + int fsm_update_result; // return code of bgp_event_update() + int mprc; // message processing return code + + connection = EVENT_ARG(thread); + peer = connection->peer; + rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta, + memory_order_relaxed); + fsm_update_result = 0; + + /* Guard against scheduled events that occur after peer deletion. */ + if (connection->status == Deleted || connection->status == Clearing) + return; + + unsigned int processed = 0; + + while (processed < rpkt_quanta_old) { + uint8_t type = 0; + bgp_size_t size; + char notify_data_length[2]; + + frr_with_mutex (&connection->io_mtx) { + peer->curr = stream_fifo_pop(connection->ibuf); + } + + if (peer->curr == NULL) // no packets to process, hmm... + return; + + /* skip the marker and copy the packet length */ + stream_forward_getp(peer->curr, BGP_MARKER_SIZE); + memcpy(notify_data_length, stream_pnt(peer->curr), 2); + + /* read in the packet length and type */ + size = stream_getw(peer->curr); + type = stream_getc(peer->curr); + + hook_call(bgp_packet_dump, peer, type, size, peer->curr); + + /* adjust size to exclude the marker + length + type */ + size -= BGP_HEADER_SIZE; + + /* Read rest of the packet and call each sort of packet routine + */ + switch (type) { + case BGP_MSG_OPEN: + frrtrace(2, frr_bgp, open_process, peer, size); + atomic_fetch_add_explicit(&peer->open_in, 1, + memory_order_relaxed); + mprc = bgp_open_receive(connection, peer, size); + if (mprc == BGP_Stop) + flog_err( + EC_BGP_PKT_OPEN, + "%s: BGP OPEN receipt failed for peer: %s", + __func__, peer->host); + break; + case BGP_MSG_UPDATE: + frrtrace(2, frr_bgp, update_process, peer, size); + atomic_fetch_add_explicit(&peer->update_in, 1, + memory_order_relaxed); + peer->readtime = monotime(NULL); + mprc = bgp_update_receive(connection, peer, size); + if (mprc == BGP_Stop) + flog_err( + EC_BGP_UPDATE_RCV, + "%s: BGP UPDATE receipt failed for peer: %s", + __func__, peer->host); + break; + case BGP_MSG_NOTIFY: + frrtrace(2, frr_bgp, notification_process, peer, size); + atomic_fetch_add_explicit(&peer->notify_in, 1, + memory_order_relaxed); + mprc = bgp_notify_receive(connection, peer, size); + if (mprc == BGP_Stop) + flog_err( + EC_BGP_NOTIFY_RCV, + "%s: BGP NOTIFY receipt failed for peer: %s", + __func__, peer->host); + break; + case BGP_MSG_KEEPALIVE: + frrtrace(2, frr_bgp, keepalive_process, peer, size); + peer->readtime = monotime(NULL); + atomic_fetch_add_explicit(&peer->keepalive_in, 1, + memory_order_relaxed); + mprc = bgp_keepalive_receive(connection, peer, size); + if (mprc == BGP_Stop) + flog_err( + EC_BGP_KEEP_RCV, + "%s: BGP KEEPALIVE receipt failed for peer: %s", + __func__, peer->host); + break; + case BGP_MSG_ROUTE_REFRESH_NEW: + case BGP_MSG_ROUTE_REFRESH_OLD: + frrtrace(2, frr_bgp, refresh_process, peer, size); + atomic_fetch_add_explicit(&peer->refresh_in, 1, + memory_order_relaxed); + mprc = bgp_route_refresh_receive(connection, peer, size); + if (mprc == BGP_Stop) + flog_err( + EC_BGP_RFSH_RCV, + "%s: BGP ROUTEREFRESH receipt failed for peer: %s", + __func__, peer->host); + break; + case BGP_MSG_CAPABILITY: + frrtrace(2, frr_bgp, capability_process, peer, size); + atomic_fetch_add_explicit(&peer->dynamic_cap_in, 1, + memory_order_relaxed); + mprc = bgp_capability_receive(connection, peer, size); + if (mprc == BGP_Stop) + flog_err( + EC_BGP_CAP_RCV, + "%s: BGP CAPABILITY receipt failed for peer: %s", + __func__, peer->host); + break; + default: + /* Suppress uninitialized variable warning */ + mprc = 0; + (void)mprc; + /* + * The message type should have been sanitized before + * we ever got here. Receipt of a message with an + * invalid header at this point is indicative of a + * security issue. + */ + assert (!"Message of invalid type received during input processing"); + } + + /* delete processed packet */ + stream_free(peer->curr); + peer->curr = NULL; + processed++; + + /* Update FSM */ + if (mprc != BGP_PACKET_NOOP) + fsm_update_result = bgp_event_update(connection, mprc); + else + continue; + + /* + * If peer was deleted, do not process any more packets. This + * is usually due to executing BGP_Stop or a stub deletion. + */ + if (fsm_update_result == FSM_PEER_TRANSFERRED + || fsm_update_result == FSM_PEER_STOPPED) + break; + } + + if (fsm_update_result != FSM_PEER_TRANSFERRED + && fsm_update_result != FSM_PEER_STOPPED) { + frr_with_mutex (&connection->io_mtx) { + // more work to do, come back later + if (connection->ibuf->count > 0) + event_add_event(bm->master, bgp_process_packet, + connection, 0, + &connection->t_process_packet); + } + } +} + +/* Send EOR when routes are processed by selection deferral timer */ +void bgp_send_delayed_eor(struct bgp *bgp) +{ + struct peer *peer; + struct listnode *node, *nnode; + + /* EOR message sent in bgp_write_proceed_actions */ + for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) + bgp_write_proceed_actions(peer); +} + +/* + * Task callback to handle socket error encountered in the io pthread. We avoid + * having the io pthread try to enqueue fsm events or mess with the peer + * struct. + */ +void bgp_packet_process_error(struct event *thread) +{ + struct peer_connection *connection; + struct peer *peer; + int code; + + connection = EVENT_ARG(thread); + peer = connection->peer; + code = EVENT_VAL(thread); + + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s [Event] BGP error %d on fd %d", peer->host, code, + connection->fd); + + /* Closed connection or error on the socket */ + if (peer_established(connection)) { + if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) + || CHECK_FLAG(peer->flags, + PEER_FLAG_GRACEFUL_RESTART_HELPER)) + && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { + peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; + SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); + } else + peer->last_reset = PEER_DOWN_CLOSE_SESSION; + } + + bgp_event_update(connection, code); +} |