diff options
Diffstat (limited to 'zebra/dplane_fpm_nl.c')
-rw-r--r-- | zebra/dplane_fpm_nl.c | 1666 |
1 files changed, 1666 insertions, 0 deletions
diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c new file mode 100644 index 0000000..2a87925 --- /dev/null +++ b/zebra/dplane_fpm_nl.c @@ -0,0 +1,1666 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Zebra dataplane plugin for Forwarding Plane Manager (FPM) using netlink. + * + * Copyright (C) 2019 Network Device Education Foundation, Inc. ("NetDEF") + * Rafael Zalamena + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" /* Include this explicitly */ +#endif + +#include <arpa/inet.h> + +#include <sys/types.h> +#include <sys/socket.h> + +#include <errno.h> +#include <string.h> + +#include "lib/zebra.h" +#include "lib/json.h" +#include "lib/libfrr.h" +#include "lib/frratomic.h" +#include "lib/command.h" +#include "lib/memory.h" +#include "lib/network.h" +#include "lib/ns.h" +#include "lib/frr_pthread.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_mpls.h" +#include "zebra/zebra_router.h" +#include "zebra/interface.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_evpn.h" +#include "zebra/zebra_evpn_mac.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/debug.h" +#include "fpm/fpm.h" + +#define SOUTHBOUND_DEFAULT_ADDR INADDR_LOOPBACK +#define SOUTHBOUND_DEFAULT_PORT 2620 + +/** + * FPM header: + * { + * version: 1 byte (always 1), + * type: 1 byte (1 for netlink, 2 protobuf), + * len: 2 bytes (network order), + * } + * + * This header is used with any format to tell the users how many bytes to + * expect. + */ +#define FPM_HEADER_SIZE 4 + +static const char *prov_name = "dplane_fpm_nl"; + +struct fpm_nl_ctx { + /* data plane connection. */ + int socket; + bool disabled; + bool connecting; + bool use_nhg; + bool use_route_replace; + struct sockaddr_storage addr; + + /* data plane buffers. */ + struct stream *ibuf; + struct stream *obuf; + pthread_mutex_t obuf_mutex; + + /* + * data plane context queue: + * When a FPM server connection becomes a bottleneck, we must keep the + * data plane contexts until we get a chance to process them. + */ + struct dplane_ctx_list_head ctxqueue; + pthread_mutex_t ctxqueue_mutex; + + /* data plane events. */ + struct zebra_dplane_provider *prov; + struct frr_pthread *fthread; + struct event *t_connect; + struct event *t_read; + struct event *t_write; + struct event *t_event; + struct event *t_nhg; + struct event *t_dequeue; + + /* zebra events. */ + struct event *t_lspreset; + struct event *t_lspwalk; + struct event *t_nhgreset; + struct event *t_nhgwalk; + struct event *t_ribreset; + struct event *t_ribwalk; + struct event *t_rmacreset; + struct event *t_rmacwalk; + + /* Statistic counters. */ + struct { + /* Amount of bytes read into ibuf. */ + _Atomic uint32_t bytes_read; + /* Amount of bytes written from obuf. */ + _Atomic uint32_t bytes_sent; + /* Output buffer current usage. */ + _Atomic uint32_t obuf_bytes; + /* Output buffer peak usage. */ + _Atomic uint32_t obuf_peak; + + /* Amount of connection closes. */ + _Atomic uint32_t connection_closes; + /* Amount of connection errors. */ + _Atomic uint32_t connection_errors; + + /* Amount of user configurations: FNE_RECONNECT. */ + _Atomic uint32_t user_configures; + /* Amount of user disable requests: FNE_DISABLE. */ + _Atomic uint32_t user_disables; + + /* Amount of data plane context processed. */ + _Atomic uint32_t dplane_contexts; + /* Amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len; + /* Peak amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len_peak; + + /* Amount of buffer full events. */ + _Atomic uint32_t buffer_full; + } counters; +} *gfnc; + +enum fpm_nl_events { + /* Ask for FPM to reconnect the external server. */ + FNE_RECONNECT, + /* Disable FPM. */ + FNE_DISABLE, + /* Reset counters. */ + FNE_RESET_COUNTERS, + /* Toggle next hop group feature. */ + FNE_TOGGLE_NHG, + /* Reconnect request by our own code to avoid races. */ + FNE_INTERNAL_RECONNECT, + + /* LSP walk finished. */ + FNE_LSP_FINISHED, + /* Next hop groups walk finished. */ + FNE_NHG_FINISHED, + /* RIB walk finished. */ + FNE_RIB_FINISHED, + /* RMAC walk finished. */ + FNE_RMAC_FINISHED, +}; + +#define FPM_RECONNECT(fnc) \ + event_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \ + FNE_INTERNAL_RECONNECT, &(fnc)->t_event) + +#define WALK_FINISH(fnc, ev) \ + event_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \ + (ev), NULL) + +/* + * Prototypes. + */ +static void fpm_process_event(struct event *t); +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx); +static void fpm_lsp_send(struct event *t); +static void fpm_lsp_reset(struct event *t); +static void fpm_nhg_send(struct event *t); +static void fpm_nhg_reset(struct event *t); +static void fpm_rib_send(struct event *t); +static void fpm_rib_reset(struct event *t); +static void fpm_rmac_send(struct event *t); +static void fpm_rmac_reset(struct event *t); + +/* + * CLI. + */ +#define FPM_STR "Forwarding Plane Manager configuration\n" + +DEFUN(fpm_set_address, fpm_set_address_cmd, + "fpm address <A.B.C.D|X:X::X:X> [port (1-65535)]", + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + uint16_t port = 0; + uint8_t naddr[INET6_BUFSIZ]; + + if (argc == 5) + port = strtol(argv[4]->arg, NULL, 10); + + /* Handle IPv4 addresses. */ + if (inet_pton(AF_INET, argv[2]->arg, naddr) == 1) { + sin = (struct sockaddr_in *)&gfnc->addr; + + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_port = + port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin->sin_len = sizeof(*sin); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin->sin_addr, naddr, sizeof(sin->sin_addr)); + + goto ask_reconnect; + } + + /* Handle IPv6 addresses. */ + if (inet_pton(AF_INET6, argv[2]->arg, naddr) != 1) { + vty_out(vty, "%% Invalid address: %s\n", argv[2]->arg); + return CMD_WARNING; + } + + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin6->sin6_len = sizeof(*sin6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin6->sin6_addr, naddr, sizeof(sin6->sin6_addr)); + +ask_reconnect: + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RECONNECT, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(no_fpm_set_address, no_fpm_set_address_cmd, + "no fpm address [<A.B.C.D|X:X::X:X> [port <1-65535>]]", + NO_STR + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_DISABLE, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_use_nhg, fpm_use_nhg_cmd, + "fpm use-next-hop-groups", + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already enabled. */ + if (gfnc->use_nhg) + return CMD_SUCCESS; + + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_nhg); + + return CMD_SUCCESS; +} + +DEFUN(no_fpm_use_nhg, no_fpm_use_nhg_cmd, + "no fpm use-next-hop-groups", + NO_STR + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already disabled. */ + if (!gfnc->use_nhg) + return CMD_SUCCESS; + + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_nhg); + + return CMD_SUCCESS; +} + +DEFUN(fpm_use_route_replace, fpm_use_route_replace_cmd, + "fpm use-route-replace", + FPM_STR + "Use netlink route replace semantics\n") +{ + gfnc->use_route_replace = true; + return CMD_SUCCESS; +} + +DEFUN(no_fpm_use_route_replace, no_fpm_use_route_replace_cmd, + "no fpm use-route-replace", + NO_STR + FPM_STR + "Use netlink route replace semantics\n") +{ + gfnc->use_route_replace = false; + return CMD_SUCCESS; +} + +DEFUN(fpm_reset_counters, fpm_reset_counters_cmd, + "clear fpm counters", + CLEAR_STR + FPM_STR + "FPM statistic counters\n") +{ + event_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RESET_COUNTERS, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters, fpm_show_counters_cmd, + "show fpm counters", + SHOW_STR + FPM_STR + "FPM statistic counters\n") +{ + vty_out(vty, "%30s\n%30s\n", "FPM counters", "============"); + +#define SHOW_COUNTER(label, counter) \ + vty_out(vty, "%28s: %u\n", (label), (counter)) + + SHOW_COUNTER("Input bytes", gfnc->counters.bytes_read); + SHOW_COUNTER("Output bytes", gfnc->counters.bytes_sent); + SHOW_COUNTER("Output buffer current size", gfnc->counters.obuf_bytes); + SHOW_COUNTER("Output buffer peak size", gfnc->counters.obuf_peak); + SHOW_COUNTER("Connection closes", gfnc->counters.connection_closes); + SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors); + SHOW_COUNTER("Data plane items processed", + gfnc->counters.dplane_contexts); + SHOW_COUNTER("Data plane items enqueued", + gfnc->counters.ctxqueue_len); + SHOW_COUNTER("Data plane items queue peak", + gfnc->counters.ctxqueue_len_peak); + SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full); + SHOW_COUNTER("User FPM configurations", gfnc->counters.user_configures); + SHOW_COUNTER("User FPM disable requests", gfnc->counters.user_disables); + +#undef SHOW_COUNTER + + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd, + "show fpm counters json", + SHOW_STR + FPM_STR + "FPM statistic counters\n" + JSON_STR) +{ + struct json_object *jo; + + jo = json_object_new_object(); + json_object_int_add(jo, "bytes-read", gfnc->counters.bytes_read); + json_object_int_add(jo, "bytes-sent", gfnc->counters.bytes_sent); + json_object_int_add(jo, "obuf-bytes", gfnc->counters.obuf_bytes); + json_object_int_add(jo, "obuf-bytes-peak", gfnc->counters.obuf_peak); + json_object_int_add(jo, "connection-closes", + gfnc->counters.connection_closes); + json_object_int_add(jo, "connection-errors", + gfnc->counters.connection_errors); + json_object_int_add(jo, "data-plane-contexts", + gfnc->counters.dplane_contexts); + json_object_int_add(jo, "data-plane-contexts-queue", + gfnc->counters.ctxqueue_len); + json_object_int_add(jo, "data-plane-contexts-queue-peak", + gfnc->counters.ctxqueue_len_peak); + json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full); + json_object_int_add(jo, "user-configures", + gfnc->counters.user_configures); + json_object_int_add(jo, "user-disables", gfnc->counters.user_disables); + vty_json(vty, jo); + + return CMD_SUCCESS; +} + +static int fpm_write_config(struct vty *vty) +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + int written = 0; + + if (gfnc->disabled) + return written; + + switch (gfnc->addr.ss_family) { + case AF_INET: + written = 1; + sin = (struct sockaddr_in *)&gfnc->addr; + vty_out(vty, "fpm address %pI4", &sin->sin_addr); + if (sin->sin_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin->sin_port)); + + vty_out(vty, "\n"); + break; + case AF_INET6: + written = 1; + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + vty_out(vty, "fpm address %pI6", &sin6->sin6_addr); + if (sin6->sin6_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin6->sin6_port)); + + vty_out(vty, "\n"); + break; + + default: + break; + } + + if (!gfnc->use_nhg) { + vty_out(vty, "no fpm use-next-hop-groups\n"); + written = 1; + } + + if (!gfnc->use_route_replace) { + vty_out(vty, "no fpm use-route-replace\n"); + written = 1; + } + + return written; +} + +static struct cmd_node fpm_node = { + .name = "fpm", + .node = FPM_NODE, + .prompt = "", + .config_write = fpm_write_config, +}; + +/* + * FPM functions. + */ +static void fpm_connect(struct event *t); + +static void fpm_reconnect(struct fpm_nl_ctx *fnc) +{ + /* Cancel all zebra threads first. */ + event_cancel_async(zrouter.master, &fnc->t_lspreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_lspwalk, NULL); + event_cancel_async(zrouter.master, &fnc->t_nhgreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_nhgwalk, NULL); + event_cancel_async(zrouter.master, &fnc->t_ribreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL); + event_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL); + event_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL); + + /* + * Grab the lock to empty the streams (data plane might try to + * enqueue updates while we are closing). + */ + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + /* Avoid calling close on `-1`. */ + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + stream_reset(fnc->ibuf); + stream_reset(fnc->obuf); + EVENT_OFF(fnc->t_read); + EVENT_OFF(fnc->t_write); + + /* FPM is disabled, don't attempt to connect. */ + if (fnc->disabled) + return; + + event_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); +} + +static void fpm_read(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + fpm_msg_hdr_t fpm; + ssize_t rv; + char buf[65535]; + struct nlmsghdr *hdr; + struct zebra_dplane_ctx *ctx; + size_t available_bytes; + size_t hdr_available_bytes; + + /* Let's ignore the input at the moment. */ + rv = stream_read_try(fnc->ibuf, fnc->socket, + STREAM_WRITEABLE(fnc->ibuf)); + if (rv == 0) { + atomic_fetch_add_explicit(&fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + + FPM_RECONNECT(fnc); + return; + } + if (rv == -1) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + FPM_RECONNECT(fnc); + return; + } + + /* Schedule the next read */ + event_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, + &fnc->t_read); + + /* We've got an interruption. */ + if (rv == -2) + return; + + + /* Account all bytes read. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_read, rv, + memory_order_relaxed); + + available_bytes = STREAM_READABLE(fnc->ibuf); + while (available_bytes) { + if (available_bytes < (ssize_t)FPM_MSG_HDR_LEN) { + stream_pulldown(fnc->ibuf); + return; + } + + fpm.version = stream_getc(fnc->ibuf); + fpm.msg_type = stream_getc(fnc->ibuf); + fpm.msg_len = stream_getw(fnc->ibuf); + + if (fpm.version != FPM_PROTO_VERSION && + fpm.msg_type != FPM_MSG_TYPE_NETLINK) { + stream_reset(fnc->ibuf); + zlog_warn( + "%s: Received version/msg_type %u/%u, expected 1/1", + __func__, fpm.version, fpm.msg_type); + + FPM_RECONNECT(fnc); + return; + } + + /* + * If the passed in length doesn't even fill in the header + * something is wrong and reset. + */ + if (fpm.msg_len < FPM_MSG_HDR_LEN) { + zlog_warn( + "%s: Received message length: %u that does not even fill the FPM header", + __func__, fpm.msg_len); + FPM_RECONNECT(fnc); + return; + } + + /* + * If we have not received the whole payload, reset the stream + * back to the beginning of the header and move it to the + * top. + */ + if (fpm.msg_len > available_bytes) { + stream_rewind_getp(fnc->ibuf, FPM_MSG_HDR_LEN); + stream_pulldown(fnc->ibuf); + return; + } + + available_bytes -= FPM_MSG_HDR_LEN; + + /* + * Place the data from the stream into a buffer + */ + hdr = (struct nlmsghdr *)buf; + stream_get(buf, fnc->ibuf, fpm.msg_len - FPM_MSG_HDR_LEN); + hdr_available_bytes = fpm.msg_len - FPM_MSG_HDR_LEN; + available_bytes -= hdr_available_bytes; + + /* Sanity check: must be at least header size. */ + if (hdr->nlmsg_len < sizeof(*hdr)) { + zlog_warn( + "%s: [seq=%u] invalid message length %u (< %zu)", + __func__, hdr->nlmsg_seq, hdr->nlmsg_len, + sizeof(*hdr)); + continue; + } + if (hdr->nlmsg_len > fpm.msg_len) { + zlog_warn( + "%s: Received a inner header length of %u that is greater than the fpm total length of %u", + __func__, hdr->nlmsg_len, fpm.msg_len); + FPM_RECONNECT(fnc); + } + /* Not enough bytes available. */ + if (hdr->nlmsg_len > hdr_available_bytes) { + zlog_warn( + "%s: [seq=%u] invalid message length %u (> %zu)", + __func__, hdr->nlmsg_seq, hdr->nlmsg_len, + available_bytes); + continue; + } + + if (!(hdr->nlmsg_flags & NLM_F_REQUEST)) { + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: [seq=%u] not a request, skipping", + __func__, hdr->nlmsg_seq); + + /* + * This request is a bust, go to the next one + */ + continue; + } + + switch (hdr->nlmsg_type) { + case RTM_NEWROUTE: + ctx = dplane_ctx_alloc(); + dplane_ctx_route_init(ctx, DPLANE_OP_ROUTE_NOTIFY, NULL, + NULL); + if (netlink_route_change_read_unicast_internal( + hdr, 0, false, ctx) != 1) { + dplane_ctx_fini(&ctx); + stream_pulldown(fnc->ibuf); + /* + * Let's continue to read other messages + * Even if we ignore this one. + */ + } + break; + default: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: Received message type %u which is not currently handled", + __func__, hdr->nlmsg_type); + break; + } + } + + stream_reset(fnc->ibuf); +} + +static void fpm_write(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + socklen_t statuslen; + ssize_t bwritten; + int rv, status; + size_t btotal; + + if (fnc->connecting == true) { + status = 0; + statuslen = sizeof(status); + + rv = getsockopt(fnc->socket, SOL_SOCKET, SO_ERROR, &status, + &statuslen); + if (rv == -1 || status != 0) { + if (rv != -1) + zlog_warn("%s: connection failed: %s", __func__, + strerror(status)); + else + zlog_warn("%s: SO_ERROR failed: %s", __func__, + strerror(status)); + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + + FPM_RECONNECT(fnc); + return; + } + + fnc->connecting = false; + + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + */ + event_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); + + /* Permit receiving messages now. */ + event_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, + &fnc->t_read); + } + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + while (true) { + /* Stream is empty: reset pointers and return. */ + if (STREAM_READABLE(fnc->obuf) == 0) { + stream_reset(fnc->obuf); + break; + } + + /* Try to write all at once. */ + btotal = stream_get_endp(fnc->obuf) - + stream_get_getp(fnc->obuf); + bwritten = write(fnc->socket, stream_pnt(fnc->obuf), btotal); + if (bwritten == 0) { + atomic_fetch_add_explicit( + &fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + break; + } + if (bwritten == -1) { + /* Attempt to continue if blocked by a signal. */ + if (errno == EINTR) + continue; + /* Receiver is probably slow, lets give it some time. */ + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + + FPM_RECONNECT(fnc); + return; + } + + /* Account all bytes sent. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_sent, bwritten, + memory_order_relaxed); + + /* Account number of bytes free. */ + atomic_fetch_sub_explicit(&fnc->counters.obuf_bytes, bwritten, + memory_order_relaxed); + + stream_forward_getp(fnc->obuf, (size_t)bwritten); + } + + /* Stream is not empty yet, we must schedule more writes. */ + if (STREAM_READABLE(fnc->obuf)) { + stream_pulldown(fnc->obuf); + event_add_write(fnc->fthread->master, fpm_write, fnc, + fnc->socket, &fnc->t_write); + return; + } +} + +static void fpm_connect(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct sockaddr_in *sin = (struct sockaddr_in *)&fnc->addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&fnc->addr; + socklen_t slen; + int rv, sock; + char addrstr[INET6_ADDRSTRLEN]; + + sock = socket(fnc->addr.ss_family, SOCK_STREAM, 0); + if (sock == -1) { + zlog_err("%s: fpm socket failed: %s", __func__, + strerror(errno)); + event_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return; + } + + set_nonblocking(sock); + + if (fnc->addr.ss_family == AF_INET) { + inet_ntop(AF_INET, &sin->sin_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin); + } else { + inet_ntop(AF_INET6, &sin6->sin6_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin6); + } + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: attempting to connect to %s:%d", __func__, + addrstr, ntohs(sin->sin_port)); + + rv = connect(sock, (struct sockaddr *)&fnc->addr, slen); + if (rv == -1 && errno != EINPROGRESS) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + close(sock); + zlog_warn("%s: fpm connection failed: %s", __func__, + strerror(errno)); + event_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return; + } + + fnc->connecting = (errno == EINPROGRESS); + fnc->socket = sock; + if (!fnc->connecting) + event_add_read(fnc->fthread->master, fpm_read, fnc, sock, + &fnc->t_read); + event_add_write(fnc->fthread->master, fpm_write, fnc, sock, + &fnc->t_write); + + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + * + * If we are not connected, then delay the objects reset/send. + */ + if (!fnc->connecting) + event_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); +} + +/** + * Encode data plane operation context into netlink and enqueue it in the FPM + * output buffer. + * + * @param fnc the netlink FPM context. + * @param ctx the data plane operation context data. + * @return 0 on success or -1 on not enough space. + */ +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) +{ + uint8_t nl_buf[NL_PKT_BUF_SIZE]; + size_t nl_buf_len; + ssize_t rv; + uint64_t obytes, obytes_peak; + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + /* + * If we were configured to not use next hop groups, then quit as soon + * as possible. + */ + if ((!fnc->use_nhg) + && (op == DPLANE_OP_NH_DELETE || op == DPLANE_OP_NH_INSTALL + || op == DPLANE_OP_NH_UPDATE)) + return 0; + + nl_buf_len = 0; + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + /* + * If route replace is enabled then directly encode the install which + * is going to use `NLM_F_REPLACE` (instead of delete/add operations). + */ + if (fnc->use_route_replace && op == DPLANE_OP_ROUTE_UPDATE) + op = DPLANE_OP_ROUTE_INSTALL; + + switch (op) { + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + rv = netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, + nl_buf, sizeof(nl_buf), + true, fnc->use_nhg, + false); + if (rv <= 0) { + zlog_err( + "%s: netlink_route_multipath_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + + /* UPDATE operations need a INSTALL, otherwise just quit. */ + if (op == DPLANE_OP_ROUTE_DELETE) + break; + + /* FALL THROUGH */ + case DPLANE_OP_ROUTE_INSTALL: + rv = netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, + &nl_buf[nl_buf_len], + sizeof(nl_buf) - + nl_buf_len, + true, fnc->use_nhg, + fnc->use_route_replace); + if (rv <= 0) { + zlog_err( + "%s: netlink_route_multipath_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + rv = netlink_macfdb_update_ctx(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_macfdb_update_ctx failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_NH_DELETE: + rv = netlink_nexthop_msg_encode(RTM_DELNEXTHOP, ctx, nl_buf, + sizeof(nl_buf), true); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + rv = netlink_nexthop_msg_encode(RTM_NEWNEXTHOP, ctx, nl_buf, + sizeof(nl_buf), true); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_msg_encode failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + rv = netlink_lsp_msg_encoder(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_lsp_msg_encoder failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + /* Un-handled by FPM at this time. */ + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + case DPLANE_OP_NEIGH_DISCOVER: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + case DPLANE_OP_INTF_NETCONFIG: + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_QDISC_INSTALL: + case DPLANE_OP_TC_QDISC_UNINSTALL: + case DPLANE_OP_TC_CLASS_ADD: + case DPLANE_OP_TC_CLASS_DELETE: + case DPLANE_OP_TC_CLASS_UPDATE: + case DPLANE_OP_TC_FILTER_ADD: + case DPLANE_OP_TC_FILTER_DELETE: + case DPLANE_OP_TC_FILTER_UPDATE: + case DPLANE_OP_NONE: + case DPLANE_OP_STARTUP_STAGE: + break; + + } + + /* Skip empty enqueues. */ + if (nl_buf_len == 0) + return 0; + + /* We must know if someday a message goes beyond 65KiB. */ + assert((nl_buf_len + FPM_HEADER_SIZE) <= UINT16_MAX); + + /* Check if we have enough buffer space. */ + if (STREAM_WRITEABLE(fnc->obuf) < (nl_buf_len + FPM_HEADER_SIZE)) { + atomic_fetch_add_explicit(&fnc->counters.buffer_full, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: buffer full: wants to write %zu but has %zu", + __func__, nl_buf_len + FPM_HEADER_SIZE, + STREAM_WRITEABLE(fnc->obuf)); + + return -1; + } + + /* + * Fill in the FPM header information. + * + * See FPM_HEADER_SIZE definition for more information. + */ + stream_putc(fnc->obuf, 1); + stream_putc(fnc->obuf, 1); + stream_putw(fnc->obuf, nl_buf_len + FPM_HEADER_SIZE); + + /* Write current data. */ + stream_write(fnc->obuf, nl_buf, (size_t)nl_buf_len); + + /* Account number of bytes waiting to be written. */ + atomic_fetch_add_explicit(&fnc->counters.obuf_bytes, + nl_buf_len + FPM_HEADER_SIZE, + memory_order_relaxed); + obytes = atomic_load_explicit(&fnc->counters.obuf_bytes, + memory_order_relaxed); + obytes_peak = atomic_load_explicit(&fnc->counters.obuf_peak, + memory_order_relaxed); + if (obytes_peak < obytes) + atomic_store_explicit(&fnc->counters.obuf_peak, obytes, + memory_order_relaxed); + + /* Tell the thread to start writing. */ + event_add_write(fnc->fthread->master, fpm_write, fnc, fnc->socket, + &fnc->t_write); + + return 0; +} + +/* + * LSP walk/send functions + */ +struct fpm_lsp_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_lsp_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_lsp *lsp = bucket->data; + struct fpm_lsp_arg *fla = arg; + + /* Skip entries which have already been sent */ + if (CHECK_FLAG(lsp->flags, LSP_FLAG_FPM)) + return HASHWALK_CONTINUE; + + dplane_ctx_reset(fla->ctx); + dplane_ctx_lsp_init(fla->ctx, DPLANE_OP_LSP_INSTALL, lsp); + + if (fpm_nl_enqueue(fla->fnc, fla->ctx) == -1) { + fla->complete = false; + return HASHWALK_ABORT; + } + + /* Mark entry as sent */ + SET_FLAG(lsp->flags, LSP_FLAG_FPM); + return HASHWALK_CONTINUE; +} + +static void fpm_lsp_send(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + struct fpm_lsp_arg fla; + + fla.fnc = fnc; + fla.ctx = dplane_ctx_alloc(); + fla.complete = true; + + hash_walk(zvrf->lsp_table, fpm_lsp_send_cb, &fla); + + dplane_ctx_fini(&fla.ctx); + + if (fla.complete) { + WALK_FINISH(fnc, FNE_LSP_FINISHED); + + /* Now move onto routes */ + event_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, + &fnc->t_nhgreset); + } else { + /* Didn't finish - reschedule LSP walk */ + event_add_timer(zrouter.master, fpm_lsp_send, fnc, 0, + &fnc->t_lspwalk); + } +} + +/* + * Next hop walk/send functions. + */ +struct fpm_nhg_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_nhg_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + struct fpm_nhg_arg *fna = arg; + + /* This entry was already sent, skip it. */ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_FPM)) + return HASHWALK_CONTINUE; + + /* Reset ctx to reuse allocated memory, take a snapshot and send it. */ + dplane_ctx_reset(fna->ctx); + dplane_ctx_nexthop_init(fna->ctx, DPLANE_OP_NH_INSTALL, nhe); + if (fpm_nl_enqueue(fna->fnc, fna->ctx) == -1) { + /* Our buffers are full, lets give it some cycles. */ + fna->complete = false; + return HASHWALK_ABORT; + } + + /* Mark group as sent, so it doesn't get sent again. */ + SET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); + + return HASHWALK_CONTINUE; +} + +static void fpm_nhg_send(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct fpm_nhg_arg fna; + + fna.fnc = fnc; + fna.ctx = dplane_ctx_alloc(); + fna.complete = true; + + /* Send next hops. */ + if (fnc->use_nhg) + hash_walk(zrouter.nhgs_id, fpm_nhg_send_cb, &fna); + + /* `free()` allocated memory. */ + dplane_ctx_fini(&fna.ctx); + + /* We are done sending next hops, lets install the routes now. */ + if (fna.complete) { + WALK_FINISH(fnc, FNE_NHG_FINISHED); + event_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, + &fnc->t_ribreset); + } else /* Otherwise reschedule next hop group again. */ + event_add_timer(zrouter.master, fpm_nhg_send, fnc, 0, + &fnc->t_nhgwalk); +} + +/** + * Send all RIB installed routes to the connected data plane. + */ +static void fpm_rib_send(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + struct zebra_dplane_ctx *ctx; + rib_tables_iter_t rt_iter; + + /* Allocate temporary context for all transactions. */ + ctx = dplane_ctx_alloc(); + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL || dest->selected_fib == NULL) + continue; + + /* Check for already sent routes. */ + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) + continue; + + /* Enqueue route install. */ + dplane_ctx_reset(ctx); + dplane_ctx_route_init(ctx, DPLANE_OP_ROUTE_INSTALL, rn, + dest->selected_fib); + if (fpm_nl_enqueue(fnc, ctx) == -1) { + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + event_add_timer(zrouter.master, fpm_rib_send, + fnc, 1, &fnc->t_ribwalk); + return; + } + + /* Mark as sent. */ + SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + /* All RIB routes sent! */ + WALK_FINISH(fnc, FNE_RIB_FINISHED); + + /* Schedule next event: RMAC reset. */ + event_add_event(zrouter.master, fpm_rmac_reset, fnc, 0, + &fnc->t_rmacreset); +} + +/* + * The next three functions will handle RMAC enqueue. + */ +struct fpm_rmac_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + struct zebra_l3vni *zl3vni; + bool complete; +}; + +static void fpm_enqueue_rmac_table(struct hash_bucket *bucket, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + struct zebra_mac *zrmac = bucket->data; + struct zebra_if *zif = fra->zl3vni->vxlan_if->info; + struct zebra_vxlan_vni *vni; + struct zebra_if *br_zif; + vlanid_t vid; + bool sticky; + + /* Entry already sent. */ + if (CHECK_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT) || !fra->complete) + return; + + sticky = !!CHECK_FLAG(zrmac->flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); + br_zif = (struct zebra_if *)(zif->brslave_info.br_if->info); + vni = zebra_vxlan_if_vni_find(zif, fra->zl3vni->vni); + vid = IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) ? vni->access_vlan : 0; + + dplane_ctx_reset(fra->ctx); + dplane_ctx_set_op(fra->ctx, DPLANE_OP_MAC_INSTALL); + dplane_mac_init(fra->ctx, fra->zl3vni->vxlan_if, + zif->brslave_info.br_if, vid, &zrmac->macaddr, vni->vni, + zrmac->fwd_info.r_vtep_ip, sticky, 0 /*nhg*/, + 0 /*update_flags*/); + if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) { + event_add_timer(zrouter.master, fpm_rmac_send, fra->fnc, 1, + &fra->fnc->t_rmacwalk); + fra->complete = false; + } +} + +static void fpm_enqueue_l3vni_table(struct hash_bucket *bucket, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + struct zebra_l3vni *zl3vni = bucket->data; + + fra->zl3vni = zl3vni; + hash_iterate(zl3vni->rmac_table, fpm_enqueue_rmac_table, zl3vni); +} + +static void fpm_rmac_send(struct event *t) +{ + struct fpm_rmac_arg fra; + + fra.fnc = EVENT_ARG(t); + fra.ctx = dplane_ctx_alloc(); + fra.complete = true; + hash_iterate(zrouter.l3vni_table, fpm_enqueue_l3vni_table, &fra); + dplane_ctx_fini(&fra.ctx); + + /* RMAC walk completed. */ + if (fra.complete) + WALK_FINISH(fra.fnc, FNE_RMAC_FINISHED); +} + +/* + * Resets the next hop FPM flags so we send all next hops again. + */ +static void fpm_nhg_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + + /* Unset FPM installation flag so it gets installed again. */ + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); +} + +static void fpm_nhg_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + + hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL); + + /* Schedule next step: send next hop groups. */ + event_add_event(zrouter.master, fpm_nhg_send, fnc, 0, &fnc->t_nhgwalk); +} + +/* + * Resets the LSP FPM flag so we send all LSPs again. + */ +static void fpm_lsp_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct zebra_lsp *lsp = bucket->data; + + UNSET_FLAG(lsp->flags, LSP_FLAG_FPM); +} + +static void fpm_lsp_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(VRF_DEFAULT); + + hash_iterate(zvrf->lsp_table, fpm_lsp_reset_cb, NULL); + + /* Schedule next step: send LSPs */ + event_add_event(zrouter.master, fpm_lsp_send, fnc, 0, &fnc->t_lspwalk); +} + +/** + * Resets the RIB FPM flags so we send all routes again. + */ +static void fpm_rib_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + rib_tables_iter_t rt_iter; + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL) + continue; + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Schedule next step: send RIB routes. */ + event_add_event(zrouter.master, fpm_rib_send, fnc, 0, &fnc->t_ribwalk); +} + +/* + * The next three function will handle RMAC table reset. + */ +static void fpm_unset_rmac_table(struct hash_bucket *bucket, void *arg) +{ + struct zebra_mac *zrmac = bucket->data; + + UNSET_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT); +} + +static void fpm_unset_l3vni_table(struct hash_bucket *bucket, void *arg) +{ + struct zebra_l3vni *zl3vni = bucket->data; + + hash_iterate(zl3vni->rmac_table, fpm_unset_rmac_table, zl3vni); +} + +static void fpm_rmac_reset(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + + hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL); + + /* Schedule next event: send RMAC entries. */ + event_add_event(zrouter.master, fpm_rmac_send, fnc, 0, + &fnc->t_rmacwalk); +} + +static void fpm_process_queue(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + struct zebra_dplane_ctx *ctx; + bool no_bufs = false; + uint64_t processed_contexts = 0; + + while (true) { + /* No space available yet. */ + if (STREAM_WRITEABLE(fnc->obuf) < NL_PKT_BUF_SIZE) { + no_bufs = true; + break; + } + + /* Dequeue next item or quit processing. */ + frr_with_mutex (&fnc->ctxqueue_mutex) { + ctx = dplane_ctx_dequeue(&fnc->ctxqueue); + } + if (ctx == NULL) + break; + + /* + * Intentionally ignoring the return value + * as that we are ensuring that we can write to + * the output data in the STREAM_WRITEABLE + * check above, so we can ignore the return + */ + if (fnc->socket != -1) + (void)fpm_nl_enqueue(fnc, ctx); + + /* Account the processed entries. */ + processed_contexts++; + atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1, + memory_order_relaxed); + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(fnc->prov, ctx); + } + + /* Update count of processed contexts */ + atomic_fetch_add_explicit(&fnc->counters.dplane_contexts, + processed_contexts, memory_order_relaxed); + + /* Re-schedule if we ran out of buffer space */ + if (no_bufs) + event_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0, + &fnc->t_dequeue); + + /* + * Let the dataplane thread know if there are items in the + * output queue to be processed. Otherwise they may sit + * until the dataplane thread gets scheduled for new, + * unrelated work. + */ + if (dplane_provider_out_ctx_queue_len(fnc->prov) > 0) + dplane_provider_work_ready(); +} + +/** + * Handles external (e.g. CLI, data plane or others) events. + */ +static void fpm_process_event(struct event *t) +{ + struct fpm_nl_ctx *fnc = EVENT_ARG(t); + enum fpm_nl_events event = EVENT_VAL(t); + + switch (event) { + case FNE_DISABLE: + zlog_info("%s: manual FPM disable event", __func__); + fnc->disabled = true; + atomic_fetch_add_explicit(&fnc->counters.user_disables, 1, + memory_order_relaxed); + + /* Call reconnect to disable timers and clean up context. */ + fpm_reconnect(fnc); + break; + + case FNE_RECONNECT: + zlog_info("%s: manual FPM reconnect event", __func__); + fnc->disabled = false; + atomic_fetch_add_explicit(&fnc->counters.user_configures, 1, + memory_order_relaxed); + fpm_reconnect(fnc); + break; + + case FNE_RESET_COUNTERS: + zlog_info("%s: manual FPM counters reset event", __func__); + memset(&fnc->counters, 0, sizeof(fnc->counters)); + break; + + case FNE_TOGGLE_NHG: + zlog_info("%s: toggle next hop groups support", __func__); + fnc->use_nhg = !fnc->use_nhg; + fpm_reconnect(fnc); + break; + + case FNE_INTERNAL_RECONNECT: + fpm_reconnect(fnc); + break; + + case FNE_NHG_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: next hop groups walk finished", + __func__); + break; + case FNE_RIB_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: RIB walk finished", __func__); + break; + case FNE_RMAC_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: RMAC walk finished", __func__); + break; + case FNE_LSP_FINISHED: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: LSP walk finished", __func__); + break; + } +} + +/* + * Data plane functions. + */ +static int fpm_nl_start(struct zebra_dplane_provider *prov) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + fnc->fthread = frr_pthread_new(NULL, prov_name, prov_name); + assert(frr_pthread_run(fnc->fthread, NULL) == 0); + fnc->ibuf = stream_new(NL_PKT_BUF_SIZE); + fnc->obuf = stream_new(NL_PKT_BUF_SIZE * 128); + pthread_mutex_init(&fnc->obuf_mutex, NULL); + fnc->socket = -1; + fnc->disabled = true; + fnc->prov = prov; + dplane_ctx_q_init(&fnc->ctxqueue); + pthread_mutex_init(&fnc->ctxqueue_mutex, NULL); + + /* Set default values. */ + fnc->use_nhg = true; + fnc->use_route_replace = true; + + return 0; +} + +static int fpm_nl_finish_early(struct fpm_nl_ctx *fnc) +{ + /* Disable all events and close socket. */ + EVENT_OFF(fnc->t_lspreset); + EVENT_OFF(fnc->t_lspwalk); + EVENT_OFF(fnc->t_nhgreset); + EVENT_OFF(fnc->t_nhgwalk); + EVENT_OFF(fnc->t_ribreset); + EVENT_OFF(fnc->t_ribwalk); + EVENT_OFF(fnc->t_rmacreset); + EVENT_OFF(fnc->t_rmacwalk); + EVENT_OFF(fnc->t_event); + EVENT_OFF(fnc->t_nhg); + event_cancel_async(fnc->fthread->master, &fnc->t_read, NULL); + event_cancel_async(fnc->fthread->master, &fnc->t_write, NULL); + event_cancel_async(fnc->fthread->master, &fnc->t_connect, NULL); + + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + return 0; +} + +static int fpm_nl_finish_late(struct fpm_nl_ctx *fnc) +{ + /* Stop the running thread. */ + frr_pthread_stop(fnc->fthread, NULL); + + /* Free all allocated resources. */ + pthread_mutex_destroy(&fnc->obuf_mutex); + pthread_mutex_destroy(&fnc->ctxqueue_mutex); + stream_free(fnc->ibuf); + stream_free(fnc->obuf); + free(gfnc); + gfnc = NULL; + + return 0; +} + +static int fpm_nl_finish(struct zebra_dplane_provider *prov, bool early) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + if (early) + return fpm_nl_finish_early(fnc); + + return fpm_nl_finish_late(fnc); +} + +static int fpm_nl_process(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + int counter, limit; + uint64_t cur_queue, peak_queue = 0, stored_peak_queue; + + fnc = dplane_provider_get_data(prov); + limit = dplane_provider_get_work_limit(prov); + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + + /* + * Skip all notifications if not connected, we'll walk the RIB + * anyway. + */ + if (fnc->socket != -1 && fnc->connecting == false) { + /* + * Update the number of queued contexts *before* + * enqueueing, to ensure counter consistency. + */ + atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len, + 1, memory_order_relaxed); + + frr_with_mutex (&fnc->ctxqueue_mutex) { + dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); + } + + cur_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len, + memory_order_relaxed); + if (peak_queue < cur_queue) + peak_queue = cur_queue; + continue; + } + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + /* Update peak queue length, if we just observed a new peak */ + stored_peak_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len_peak, memory_order_relaxed); + if (stored_peak_queue < peak_queue) + atomic_store_explicit(&fnc->counters.ctxqueue_len_peak, + peak_queue, memory_order_relaxed); + + if (atomic_load_explicit(&fnc->counters.ctxqueue_len, + memory_order_relaxed) + > 0) + event_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0, + &fnc->t_dequeue); + + /* Ensure dataplane thread is rescheduled if we hit the work limit */ + if (counter >= limit) + dplane_provider_work_ready(); + + return 0; +} + +static int fpm_nl_new(struct event_loop *tm) +{ + struct zebra_dplane_provider *prov = NULL; + int rv; + + gfnc = calloc(1, sizeof(*gfnc)); + rv = dplane_provider_register(prov_name, DPLANE_PRIO_POSTPROCESS, + DPLANE_PROV_FLAG_THREADED, fpm_nl_start, + fpm_nl_process, fpm_nl_finish, gfnc, + &prov); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s register status: %d", prov_name, rv); + + install_node(&fpm_node); + install_element(ENABLE_NODE, &fpm_show_counters_cmd); + install_element(ENABLE_NODE, &fpm_show_counters_json_cmd); + install_element(ENABLE_NODE, &fpm_reset_counters_cmd); + install_element(CONFIG_NODE, &fpm_set_address_cmd); + install_element(CONFIG_NODE, &no_fpm_set_address_cmd); + install_element(CONFIG_NODE, &fpm_use_nhg_cmd); + install_element(CONFIG_NODE, &no_fpm_use_nhg_cmd); + install_element(CONFIG_NODE, &fpm_use_route_replace_cmd); + install_element(CONFIG_NODE, &no_fpm_use_route_replace_cmd); + + return 0; +} + +static int fpm_nl_init(void) +{ + hook_register(frr_late_init, fpm_nl_new); + return 0; +} + +FRR_MODULE_SETUP( + .name = "dplane_fpm_nl", + .version = "0.0.1", + .description = "Data plane plugin for FPM using netlink.", + .init = fpm_nl_init, +); |