diff options
Diffstat (limited to '')
-rw-r--r-- | zebra/kernel_socket.c | 1644 |
1 files changed, 1644 insertions, 0 deletions
diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c new file mode 100644 index 0000000..e76d8c0 --- /dev/null +++ b/zebra/kernel_socket.c @@ -0,0 +1,1644 @@ +/* Kernel communication using routing socket. + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <zebra.h> + +#ifndef HAVE_NETLINK + +#include <net/if_types.h> +#ifdef __OpenBSD__ +#include <netmpls/mpls.h> +#endif + +#include "if.h" +#include "prefix.h" +#include "sockunion.h" +#include "connected.h" +#include "memory.h" +#include "ioctl.h" +#include "log.h" +#include "table.h" +#include "rib.h" +#include "privs.h" +#include "vrf.h" +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/kernel_socket.h" +#include "zebra/rib.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_ptm.h" + +extern struct zebra_privs_t zserv_privs; + +/* + * Historically, the BSD routing socket has aligned data following a + * struct sockaddr to sizeof(long), which was 4 bytes on some + * platforms, and 8 bytes on others. NetBSD 6 changed the routing + * socket to align to sizeof(uint64_t), which is 8 bytes. OS X + * appears to align to sizeof(int), which is 4 bytes. + * + * Alignment of zero-sized sockaddrs is nonsensical, but historically + * BSD defines RT_ROUNDUP(0) to be the alignment interval (rather than + * 0). We follow this practice without questioning it, but it is a + * bug if frr calls ROUNDUP with 0. + */ +#ifdef __APPLE__ +#define ROUNDUP_TYPE int +#else +#define ROUNDUP_TYPE long +#endif + +/* + * Because of these varying conventions, the only sane approach is for + * the <net/route.h> header to define some flavor of ROUNDUP macro. + */ + +/* OS X (Xcode as of 2014-12) is known not to define RT_ROUNDUP */ +#if defined(RT_ROUNDUP) +#define ROUNDUP(a) RT_ROUNDUP(a) +#endif /* defined(RT_ROUNDUP) */ + +/* + * If ROUNDUP has not yet been defined in terms of platform-provided + * defines, attempt to cope with heuristics. + */ +#if !defined(ROUNDUP) + +/* + * If you're porting to a platform that changed RT_ROUNDUP but doesn't + * have it in its headers, this will break rather obviously and you'll + * have to fix it here. + */ +#define ROUNDUP(a) \ + ((a) > 0 ? (1 + (((a)-1) | (sizeof(ROUNDUP_TYPE) - 1))) \ + : sizeof(ROUNDUP_TYPE)) + +#endif /* defined(ROUNDUP) */ + + +#if defined(SA_SIZE) +/* SAROUNDUP is the only thing we need, and SA_SIZE provides that */ +#define SAROUNDUP(a) SA_SIZE(a) +#else /* !SA_SIZE */ +/* + * Given a pointer (sockaddr or void *), return the number of bytes + * taken up by the sockaddr and any padding needed for alignment. + */ +#if defined(HAVE_STRUCT_SOCKADDR_SA_LEN) +#define SAROUNDUP(X) ROUNDUP(((struct sockaddr *)(X))->sa_len) +#else +/* + * One would hope all fixed-size structure definitions are aligned, + * but round them up nonetheless. + */ +#define SAROUNDUP(X) \ + (((struct sockaddr *)(X))->sa_family == AF_INET \ + ? ROUNDUP(sizeof(struct sockaddr_in)) \ + : (((struct sockaddr *)(X))->sa_family == AF_INET6 \ + ? ROUNDUP(sizeof(struct sockaddr_in6)) \ + : (((struct sockaddr *)(X))->sa_family == AF_LINK \ + ? ROUNDUP(sizeof(struct sockaddr_dl)) \ + : sizeof(struct sockaddr)))) +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + +#endif /* !SA_SIZE */ + +/* Routing socket message types. */ +const struct message rtm_type_str[] = {{RTM_ADD, "RTM_ADD"}, + {RTM_DELETE, "RTM_DELETE"}, + {RTM_CHANGE, "RTM_CHANGE"}, + {RTM_GET, "RTM_GET"}, + {RTM_LOSING, "RTM_LOSING"}, + {RTM_REDIRECT, "RTM_REDIRECT"}, + {RTM_MISS, "RTM_MISS"}, +#ifdef RTM_LOCK + {RTM_LOCK, "RTM_LOCK"}, +#endif /* RTM_LOCK */ +#ifdef OLDADD + {RTM_OLDADD, "RTM_OLDADD"}, +#endif /* RTM_OLDADD */ +#ifdef RTM_OLDDEL + {RTM_OLDDEL, "RTM_OLDDEL"}, +#endif /* RTM_OLDDEL */ +#ifdef RTM_RESOLVE + {RTM_RESOLVE, "RTM_RESOLVE"}, +#endif /* RTM_RESOLVE */ + {RTM_NEWADDR, "RTM_NEWADDR"}, + {RTM_DELADDR, "RTM_DELADDR"}, + {RTM_IFINFO, "RTM_IFINFO"}, +#ifdef RTM_OIFINFO + {RTM_OIFINFO, "RTM_OIFINFO"}, +#endif /* RTM_OIFINFO */ +#ifdef RTM_NEWMADDR + {RTM_NEWMADDR, "RTM_NEWMADDR"}, +#endif /* RTM_NEWMADDR */ +#ifdef RTM_DELMADDR + {RTM_DELMADDR, "RTM_DELMADDR"}, +#endif /* RTM_DELMADDR */ +#ifdef RTM_IFANNOUNCE + {RTM_IFANNOUNCE, "RTM_IFANNOUNCE"}, +#endif /* RTM_IFANNOUNCE */ +#ifdef RTM_IEEE80211 + {RTM_IEEE80211, "RTM_IEEE80211"}, +#endif + {0}}; + +static const struct message rtm_flag_str[] = {{RTF_UP, "UP"}, + {RTF_GATEWAY, "GATEWAY"}, + {RTF_HOST, "HOST"}, + {RTF_REJECT, "REJECT"}, + {RTF_DYNAMIC, "DYNAMIC"}, + {RTF_MODIFIED, "MODIFIED"}, + {RTF_DONE, "DONE"}, +#ifdef RTF_MASK + {RTF_MASK, "MASK"}, +#endif /* RTF_MASK */ +#ifdef RTF_CLONING + {RTF_CLONING, "CLONING"}, +#endif /* RTF_CLONING */ +#ifdef RTF_XRESOLVE + {RTF_XRESOLVE, "XRESOLVE"}, +#endif /* RTF_XRESOLVE */ +#ifdef RTF_LLINFO + {RTF_LLINFO, "LLINFO"}, +#endif /* RTF_LLINFO */ + {RTF_STATIC, "STATIC"}, + {RTF_BLACKHOLE, "BLACKHOLE"}, +#ifdef RTF_PRIVATE + {RTF_PRIVATE, "PRIVATE"}, +#endif /* RTF_PRIVATE */ + {RTF_PROTO1, "PROTO1"}, + {RTF_PROTO2, "PROTO2"}, +#ifdef RTF_PRCLONING + {RTF_PRCLONING, "PRCLONING"}, +#endif /* RTF_PRCLONING */ +#ifdef RTF_WASCLONED + {RTF_WASCLONED, "WASCLONED"}, +#endif /* RTF_WASCLONED */ +#ifdef RTF_PROTO3 + {RTF_PROTO3, "PROTO3"}, +#endif /* RTF_PROTO3 */ +#ifdef RTF_PINNED + {RTF_PINNED, "PINNED"}, +#endif /* RTF_PINNED */ +#ifdef RTF_LOCAL + {RTF_LOCAL, "LOCAL"}, +#endif /* RTF_LOCAL */ +#ifdef RTF_BROADCAST + {RTF_BROADCAST, "BROADCAST"}, +#endif /* RTF_BROADCAST */ +#ifdef RTF_MULTICAST + {RTF_MULTICAST, "MULTICAST"}, +#endif /* RTF_MULTICAST */ +#ifdef RTF_MULTIRT + {RTF_MULTIRT, "MULTIRT"}, +#endif /* RTF_MULTIRT */ +#ifdef RTF_SETSRC + {RTF_SETSRC, "SETSRC"}, +#endif /* RTF_SETSRC */ + {0}}; + +/* Kernel routing update socket. */ +int routing_sock = -1; + +/* Kernel dataplane routing update socket, used in the dataplane pthread + * context. + */ +int dplane_routing_sock = -1; + +/* Yes I'm checking ugly routing socket behavior. */ +/* #define DEBUG */ + +size_t _rta_get(caddr_t sap, void *destp, size_t destlen, bool checkaf); +size_t rta_get(caddr_t sap, void *dest, size_t destlen); +size_t rta_getattr(caddr_t sap, void *destp, size_t destlen); +size_t rta_getsdlname(caddr_t sap, void *dest, short *destlen); +const char *rtatostr(unsigned int flags, char *buf, size_t buflen); + +/* Supported address family check. */ +static inline int af_check(int family) +{ + if (family == AF_INET) + return 1; + if (family == AF_INET6) + return 1; + return 0; +} + +size_t _rta_get(caddr_t sap, void *destp, size_t destlen, bool checkaf) +{ + struct sockaddr *sa = (struct sockaddr *)sap; + struct sockaddr_dl *sdl; + uint8_t *dest = destp; + size_t tlen, copylen; + +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + copylen = sa->sa_len; + tlen = (copylen == 0) ? sizeof(ROUNDUP_TYPE) : ROUNDUP(copylen); +#else /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + copylen = tlen = SAROUNDUP(sap); +#endif /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + + if (copylen > 0 && dest != NULL) { + if (checkaf && af_check(sa->sa_family) == 0) + return tlen; + /* + * Handle sockaddr_dl corner case: + * RTA_NETMASK might be AF_LINK, but it doesn't anything + * relevant (e.g. zeroed out fields). Check for this + * case and avoid warning log message. + */ + if (sa->sa_family == AF_LINK) { + sdl = (struct sockaddr_dl *)sa; + if (sdl->sdl_index == 0 || sdl->sdl_nlen == 0) + copylen = destlen; + } + + if (copylen > destlen) { + zlog_warn( + "%s: destination buffer too small (%zu vs %zu)", + __func__, copylen, destlen); + memcpy(dest, sap, destlen); + } else + memcpy(dest, sap, copylen); + } + + return tlen; +} + +size_t rta_get(caddr_t sap, void *destp, size_t destlen) +{ + return _rta_get(sap, destp, destlen, true); +} + +size_t rta_getattr(caddr_t sap, void *destp, size_t destlen) +{ + return _rta_get(sap, destp, destlen, false); +} + +size_t rta_getsdlname(caddr_t sap, void *destp, short *destlen) +{ + struct sockaddr_dl *sdl = (struct sockaddr_dl *)sap; + uint8_t *dest = destp; + size_t tlen, copylen; + + copylen = sdl->sdl_nlen; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + struct sockaddr *sa = (struct sockaddr *)sap; + + tlen = (sa->sa_len == 0) ? sizeof(ROUNDUP_TYPE) : ROUNDUP(sa->sa_len); +#else /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + tlen = SAROUNDUP(sap); +#endif /* !HAVE_STRUCT_SOCKADDR_SA_LEN */ + + if (copylen > 0 && dest != NULL && sdl->sdl_family == AF_LINK) { + if (copylen > IFNAMSIZ) { + zlog_warn( + "%s: destination buffer too small (%zu vs %d)", + __func__, copylen, IFNAMSIZ); + memcpy(dest, sdl->sdl_data, IFNAMSIZ); + dest[IFNAMSIZ] = 0; + *destlen = IFNAMSIZ; + } else { + memcpy(dest, sdl->sdl_data, copylen); + dest[copylen] = 0; + *destlen = copylen; + } + } else + *destlen = 0; + + return tlen; +} + +const char *rtatostr(unsigned int flags, char *buf, size_t buflen) +{ + const char *flagstr, *bufstart; + int bit, wlen; + char ustr[32]; + + /* Hold the pointer to the buffer beginning. */ + bufstart = buf; + + for (bit = 1; bit; bit <<= 1) { + if ((flags & bit) == 0) + continue; + + switch (bit) { + case RTA_DST: + flagstr = "DST"; + break; + case RTA_GATEWAY: + flagstr = "GATEWAY"; + break; + case RTA_NETMASK: + flagstr = "NETMASK"; + break; +#ifdef RTA_GENMASK + case RTA_GENMASK: + flagstr = "GENMASK"; + break; +#endif /* RTA_GENMASK */ + case RTA_IFP: + flagstr = "IFP"; + break; + case RTA_IFA: + flagstr = "IFA"; + break; +#ifdef RTA_AUTHOR + case RTA_AUTHOR: + flagstr = "AUTHOR"; + break; +#endif /* RTA_AUTHOR */ + case RTA_BRD: + flagstr = "BRD"; + break; +#ifdef RTA_SRC + case RTA_SRC: + flagstr = "SRC"; + break; +#endif /* RTA_SRC */ +#ifdef RTA_SRCMASK + case RTA_SRCMASK: + flagstr = "SRCMASK"; + break; +#endif /* RTA_SRCMASK */ +#ifdef RTA_LABEL + case RTA_LABEL: + flagstr = "LABEL"; + break; +#endif /* RTA_LABEL */ + + default: + snprintf(ustr, sizeof(ustr), "0x%x", bit); + flagstr = ustr; + break; + } + + wlen = snprintf(buf, buflen, "%s,", flagstr); + buf += wlen; + buflen -= wlen; + } + + /* Check for empty buffer. */ + if (bufstart != buf) + buf--; + + /* Remove the last comma. */ + *buf = 0; + + return bufstart; +} + +/* Dump routing table flag for debug purpose. */ +static void rtm_flag_dump(int flag) +{ + const struct message *mes; + static char buf[BUFSIZ]; + + buf[0] = '\0'; + for (mes = rtm_flag_str; mes->key != 0; mes++) { + if (mes->key & flag) { + strlcat(buf, mes->str, BUFSIZ); + strlcat(buf, " ", BUFSIZ); + } + } + zlog_debug("Kernel: %s", buf); +} + +#ifdef RTM_IFANNOUNCE +/* Interface adding function */ +static int ifan_read(struct if_announcemsghdr *ifan) +{ + struct interface *ifp; + + ifp = if_lookup_by_index(ifan->ifan_index, VRF_DEFAULT); + + if (ifp) + assert((ifp->ifindex == ifan->ifan_index) + || (ifp->ifindex == IFINDEX_INTERNAL)); + + if ((ifp == NULL) || ((ifp->ifindex == IFINDEX_INTERNAL) + && (ifan->ifan_what == IFAN_ARRIVAL))) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: creating interface for ifindex %d, name %s", + __func__, ifan->ifan_index, ifan->ifan_name); + + /* Create Interface */ + ifp = if_get_by_name(ifan->ifan_name, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if_set_index(ifp, ifan->ifan_index); + + if_get_metric(ifp); + if_add_update(ifp); + } else if (ifp != NULL && ifan->ifan_what == IFAN_DEPARTURE) + if_delete_update(&ifp); + + if (ifp) { + if_get_flags(ifp); + if_get_mtu(ifp); + if_get_metric(ifp); + } + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: interface %s index %d", __func__, + ifan->ifan_name, ifan->ifan_index); + + return 0; +} +#endif /* RTM_IFANNOUNCE */ + +#ifdef HAVE_BSD_IFI_LINK_STATE +/* BSD link detect translation */ +static void bsd_linkdetect_translate(struct if_msghdr *ifm) +{ + if ((ifm->ifm_data.ifi_link_state >= LINK_STATE_UP) + || (ifm->ifm_data.ifi_link_state == LINK_STATE_UNKNOWN)) + SET_FLAG(ifm->ifm_flags, IFF_RUNNING); + else + UNSET_FLAG(ifm->ifm_flags, IFF_RUNNING); +} +#endif /* HAVE_BSD_IFI_LINK_STATE */ + +static enum zebra_link_type sdl_to_zebra_link_type(unsigned int sdlt) +{ + switch (sdlt) { + case IFT_ETHER: + return ZEBRA_LLT_ETHER; + case IFT_X25: + return ZEBRA_LLT_X25; + case IFT_FDDI: + return ZEBRA_LLT_FDDI; + case IFT_PPP: + return ZEBRA_LLT_PPP; + case IFT_LOOP: + return ZEBRA_LLT_LOOPBACK; + case IFT_SLIP: + return ZEBRA_LLT_SLIP; + case IFT_ARCNET: + return ZEBRA_LLT_ARCNET; + case IFT_ATM: + return ZEBRA_LLT_ATM; + case IFT_LOCALTALK: + return ZEBRA_LLT_LOCALTLK; + case IFT_HIPPI: + return ZEBRA_LLT_HIPPI; +#ifdef IFT_IEEE1394 + case IFT_IEEE1394: + return ZEBRA_LLT_IEEE1394; +#endif + + default: + return ZEBRA_LLT_UNKNOWN; + } +} + +/* + * Handle struct if_msghdr obtained from reading routing socket or + * sysctl (from interface_list). There may or may not be sockaddrs + * present after the header. + */ +int ifm_read(struct if_msghdr *ifm) +{ + struct interface *ifp = NULL; + struct sockaddr_dl *sdl = NULL; + char ifname[IFNAMSIZ]; + short ifnlen = 0; + int maskbit; + caddr_t cp; + char fbuf[64]; + + /* terminate ifname at head (for strnlen) and tail (for safety) */ + ifname[IFNAMSIZ - 1] = '\0'; + + /* paranoia: sanity check structure */ + if (ifm->ifm_msglen < sizeof(struct if_msghdr)) { + flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, + "%s: ifm->ifm_msglen %d too short", __func__, + ifm->ifm_msglen); + return -1; + } + + /* + * Check for a sockaddr_dl following the message. First, point to + * where a socakddr might be if one follows the message. + */ + cp = (void *)(ifm + 1); + + /* Look up for RTA_IFP and skip others. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & ifm->ifm_addrs) == 0) + continue; + if (maskbit != RTA_IFP) { + cp += rta_get(cp, NULL, 0); + continue; + } + + /* Save the pointer to the structure. */ + sdl = (struct sockaddr_dl *)cp; + cp += rta_getsdlname(cp, ifname, &ifnlen); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: sdl ifname %s addrs {%s}", __func__, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifm_addrs, fbuf, sizeof(fbuf))); + + /* + * Look up on ifindex first, because ifindices are the primary handle + * for + * interfaces across the user/kernel boundary, for most systems. (Some + * messages, such as up/down status changes on NetBSD, do not include a + * sockaddr_dl). + */ + if ((ifp = if_lookup_by_index(ifm->ifm_index, VRF_DEFAULT)) != NULL) { + /* we have an ifp, verify that the name matches as some systems, + * eg Solaris, have a 1:many association of ifindex:ifname + * if they dont match, we dont have the correct ifp and should + * set it back to NULL to let next check do lookup by name + */ + if (ifnlen && (strncmp(ifp->name, ifname, IFNAMSIZ) != 0)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: ifp name %s doesn't match sdl name %s", + __func__, ifp->name, ifname); + ifp = NULL; + } + } + + /* + * If we dont have an ifp, try looking up by name. Particularly as some + * systems (Solaris) have a 1:many mapping of ifindex:ifname - the + * ifname + * is therefore our unique handle to that interface. + * + * Interfaces specified in the configuration file for which the ifindex + * has not been determined will have ifindex == IFINDEX_INTERNAL, and + * such + * interfaces are found by this search, and then their ifindex values + * can + * be filled in. + */ + if ((ifp == NULL) && ifnlen) + ifp = if_lookup_by_name(ifname, VRF_DEFAULT); + + /* + * If ifp still does not exist or has an invalid index + * (IFINDEX_INTERNAL), + * create or fill in an interface. + */ + if ((ifp == NULL) || (ifp->ifindex == IFINDEX_INTERNAL)) { + /* + * To create or fill in an interface, a sockaddr_dl (via + * RTA_IFP) is required. + */ + if (!ifnlen) { + zlog_debug("Interface index %d (new) missing ifname", + ifm->ifm_index); + return -1; + } + +#ifndef RTM_IFANNOUNCE + /* Down->Down interface should be ignored here. + * See further comment below. + */ + if (!CHECK_FLAG(ifm->ifm_flags, IFF_UP)) + return 0; +#endif /* !RTM_IFANNOUNCE */ + + if (ifp == NULL) { + /* Interface that zebra was not previously aware of, so + * create. */ + ifp = if_get_by_name(ifname, VRF_DEFAULT, + VRF_DEFAULT_NAME); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: creating ifp for ifindex %d", + __func__, ifm->ifm_index); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: updated/created ifp, ifname %s, ifindex %d", + __func__, ifp->name, ifp->ifindex); + /* + * Fill in newly created interface structure, or larval + * structure with ifindex IFINDEX_INTERNAL. + */ + if_set_index(ifp, ifm->ifm_index); + +#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */ + bsd_linkdetect_translate(ifm); +#endif /* HAVE_BSD_IFI_LINK_STATE */ + + if_flags_update(ifp, ifm->ifm_flags); +#if defined(__bsdi__) + if_kvm_get_mtu(ifp); +#else + if_get_mtu(ifp); +#endif /* __bsdi__ */ + if_get_metric(ifp); + + /* + * XXX sockaddr_dl contents can be larger than the structure + * definition. There are 2 big families here: + * - BSD has sdl_len + sdl_data[16] + overruns sdl_data + * we MUST use sdl_len here or we'll truncate data. + * - Solaris has no sdl_len, but sdl_data[244] + * presumably, it's not going to run past that, so sizeof() + * is fine here. + * a nonzero ifnlen from rta_getsdlname() means sdl is valid + */ + ifp->ll_type = ZEBRA_LLT_UNKNOWN; + ifp->hw_addr_len = 0; + if (ifnlen) { +#ifdef HAVE_STRUCT_SOCKADDR_DL_SDL_LEN + memcpy(&((struct zebra_if *)ifp->info)->sdl, sdl, + sdl->sdl_len); +#else + memcpy(&((struct zebra_if *)ifp->info)->sdl, sdl, + sizeof(struct sockaddr_dl)); +#endif /* HAVE_STRUCT_SOCKADDR_DL_SDL_LEN */ + + ifp->ll_type = sdl_to_zebra_link_type(sdl->sdl_type); + if (sdl->sdl_alen <= sizeof(ifp->hw_addr)) { + memcpy(ifp->hw_addr, LLADDR(sdl), + sdl->sdl_alen); + ifp->hw_addr_len = sdl->sdl_alen; + } + } + + if_add_update(ifp); + } else + /* + * Interface structure exists. Adjust stored flags from + * notification. If interface has up->down or down->up + * transition, call state change routines (to adjust routes, + * notify routing daemons, etc.). (Other flag changes are stored + * but apparently do not trigger action.) + */ + { + if (ifp->ifindex != ifm->ifm_index) { + zlog_debug( + "%s: index mismatch, ifname %s, ifp index %d, ifm index %d", + __func__, ifp->name, ifp->ifindex, + ifm->ifm_index); + return -1; + } + +#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */ + bsd_linkdetect_translate(ifm); +#endif /* HAVE_BSD_IFI_LINK_STATE */ + + /* update flags and handle operative->inoperative transition, if + * any */ + if_flags_update(ifp, ifm->ifm_flags); + +#ifndef RTM_IFANNOUNCE + if (!if_is_up(ifp)) { + /* No RTM_IFANNOUNCE on this platform, so we can never + * distinguish between ~IFF_UP and delete. We must + * presume + * it has been deleted. + * Eg, Solaris will not notify us of unplumb. + * + * XXX: Fixme - this should be runtime detected + * So that a binary compiled on a system with IFANNOUNCE + * will still behave correctly if run on a platform + * without + */ + if_delete_update(&ifp); + } +#endif /* RTM_IFANNOUNCE */ + if (ifp && if_is_up(ifp)) { +#if defined(__bsdi__) + if_kvm_get_mtu(ifp); +#else + if_get_mtu(ifp); +#endif /* __bsdi__ */ + if_get_metric(ifp); + } + } + + if (ifp) { +#ifdef HAVE_NET_RT_IFLIST + ifp->stats = ifm->ifm_data; +#endif /* HAVE_NET_RT_IFLIST */ + ifp->speed = ifm->ifm_data.ifi_baudrate / 1000000; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: interface %s index %d", __func__, + ifp->name, ifp->ifindex); + } + + return 0; +} + +/* Address read from struct ifa_msghdr. */ +static void ifam_read_mesg(struct ifa_msghdr *ifm, union sockunion *addr, + union sockunion *mask, union sockunion *brd, + char *ifname, short *ifnlen) +{ + caddr_t pnt, end; + union sockunion dst; + union sockunion gateway; + int maskbit; + char fbuf[64]; + + pnt = (caddr_t)(ifm + 1); + end = ((caddr_t)ifm) + ifm->ifam_msglen; + + /* Be sure structure is cleared */ + memset(mask, 0, sizeof(union sockunion)); + memset(addr, 0, sizeof(union sockunion)); + memset(brd, 0, sizeof(union sockunion)); + memset(&dst, 0, sizeof(union sockunion)); + memset(&gateway, 0, sizeof(union sockunion)); + + /* We fetch each socket variable into sockunion. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & ifm->ifam_addrs) == 0) + continue; + + switch (maskbit) { + case RTA_DST: + pnt += rta_get(pnt, &dst, sizeof(dst)); + break; + case RTA_GATEWAY: + pnt += rta_get(pnt, &gateway, sizeof(gateway)); + break; + case RTA_NETMASK: + pnt += rta_getattr(pnt, mask, sizeof(*mask)); + break; + case RTA_IFP: + pnt += rta_getsdlname(pnt, ifname, ifnlen); + break; + case RTA_IFA: + pnt += rta_get(pnt, addr, sizeof(*addr)); + break; + case RTA_BRD: + pnt += rta_get(pnt, brd, sizeof(*brd)); + break; + + default: + pnt += rta_get(pnt, NULL, 0); + break; + } + + if (pnt > end) { + zlog_warn("%s: overflow detected (pnt:%p end:%p)", + __func__, pnt, end); + break; + } + } + + if (IS_ZEBRA_DEBUG_KERNEL) { + switch (sockunion_family(addr)) { + case AF_INET: + case AF_INET6: { + int masklen = + (sockunion_family(addr) == AF_INET) + ? ip_masklen(mask->sin.sin_addr) + : ip6_masklen(mask->sin6.sin6_addr); + zlog_debug( + "%s: ifindex %d, ifname %s, ifam_addrs {%s}, ifam_flags 0x%x, addr %pSU/%d broad %pSU dst %pSU gateway %pSU", + __func__, ifm->ifam_index, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifam_addrs, fbuf, sizeof(fbuf)), + ifm->ifam_flags, addr, masklen, brd, &dst, + &gateway); + } break; + default: + zlog_debug("%s: ifindex %d, ifname %s, ifam_addrs {%s}", + __func__, ifm->ifam_index, + (ifnlen ? ifname : "(nil)"), + rtatostr(ifm->ifam_addrs, fbuf, + sizeof(fbuf))); + break; + } + } + + /* Assert read up end point matches to end point */ + pnt = (caddr_t)ROUNDUP((size_t)pnt); + if (pnt != (caddr_t)ROUNDUP((size_t)end)) + zlog_debug("ifam_read() doesn't read all socket data"); +} + +/* Interface's address information get. */ +int ifam_read(struct ifa_msghdr *ifam) +{ + struct interface *ifp = NULL; + union sockunion addr, mask, brd; + bool dest_same = false; + char ifname[INTERFACE_NAMSIZ]; + short ifnlen = 0; + bool isalias = false; + uint32_t flags = 0; + + ifname[0] = ifname[INTERFACE_NAMSIZ - 1] = '\0'; + + /* Allocate and read address information. */ + ifam_read_mesg(ifam, &addr, &mask, &brd, ifname, &ifnlen); + + if ((ifp = if_lookup_by_index(ifam->ifam_index, VRF_DEFAULT)) == NULL) { + flog_warn(EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: no interface for ifname %s, index %d", __func__, + ifname, ifam->ifam_index); + return -1; + } + + if (ifnlen && strncmp(ifp->name, ifname, INTERFACE_NAMSIZ)) + isalias = true; + + /* + * Mark the alias prefixes as secondary + */ + if (isalias) + SET_FLAG(flags, ZEBRA_IFA_SECONDARY); + + /* N.B. The info in ifa_msghdr does not tell us whether the RTA_BRD + field contains a broadcast address or a peer address, so we are + forced to + rely upon the interface type. */ + if (if_is_pointopoint(ifp)) + SET_FLAG(flags, ZEBRA_IFA_PEER); + else { + if (memcmp(&addr, &brd, sizeof(addr)) == 0) + dest_same = true; + } + +#if 0 + /* it might seem cute to grab the interface metric here, however + * we're processing an address update message, and so some systems + * (e.g. FBSD) dont bother to fill in ifam_metric. Disabled, but left + * in deliberately, as comment. + */ + ifp->metric = ifam->ifam_metric; +#endif + + /* Add connected address. */ + switch (sockunion_family(&addr)) { + case AF_INET: + if (ifam->ifam_type == RTM_NEWADDR) + connected_add_ipv4(ifp, flags, &addr.sin.sin_addr, + ip_masklen(mask.sin.sin_addr), + dest_same ? NULL : &brd.sin.sin_addr, + (isalias ? ifname : NULL), + METRIC_MAX); + else + connected_delete_ipv4(ifp, flags, &addr.sin.sin_addr, + ip_masklen(mask.sin.sin_addr), + dest_same ? NULL + : &brd.sin.sin_addr); + break; + case AF_INET6: + /* Unset interface index from link-local address when IPv6 stack + is KAME. */ + if (IN6_IS_ADDR_LINKLOCAL(&addr.sin6.sin6_addr)) { + SET_IN6_LINKLOCAL_IFINDEX(addr.sin6.sin6_addr, 0); + } + + if (ifam->ifam_type == RTM_NEWADDR) + connected_add_ipv6(ifp, flags, &addr.sin6.sin6_addr, + NULL, + ip6_masklen(mask.sin6.sin6_addr), + (isalias ? ifname : NULL), + METRIC_MAX); + else + connected_delete_ipv6(ifp, &addr.sin6.sin6_addr, NULL, + ip6_masklen(mask.sin6.sin6_addr)); + break; + default: + /* Unsupported family silently ignore... */ + break; + } + + /* Check interface flag for implicit up of the interface. */ + if_refresh(ifp); + + return 0; +} + +/* Interface function for reading kernel routing table information. */ +static int rtm_read_mesg(struct rt_msghdr *rtm, union sockunion *dest, + union sockunion *mask, union sockunion *gate, + char *ifname, short *ifnlen) +{ + caddr_t pnt, end; + int maskbit; + + /* Pnt points out socket data start point. */ + pnt = (caddr_t)(rtm + 1); + end = ((caddr_t)rtm) + rtm->rtm_msglen; + + /* rt_msghdr version check. */ + if (rtm->rtm_version != RTM_VERSION) + flog_warn(EC_ZEBRA_RTM_VERSION_MISMATCH, + "Routing message version different %d should be %d.This may cause problem", + rtm->rtm_version, RTM_VERSION); + + /* Be sure structure is cleared */ + memset(dest, 0, sizeof(union sockunion)); + memset(gate, 0, sizeof(union sockunion)); + memset(mask, 0, sizeof(union sockunion)); + + /* We fetch each socket variable into sockunion. */ + /* We fetch each socket variable into sockunion. */ + for (maskbit = 1; maskbit; maskbit <<= 1) { + if ((maskbit & rtm->rtm_addrs) == 0) + continue; + + switch (maskbit) { + case RTA_DST: + pnt += rta_get(pnt, dest, sizeof(*dest)); + break; + case RTA_GATEWAY: + pnt += rta_get(pnt, gate, sizeof(*gate)); + break; + case RTA_NETMASK: + pnt += rta_getattr(pnt, mask, sizeof(*mask)); + break; + case RTA_IFP: + pnt += rta_getsdlname(pnt, ifname, ifnlen); + break; + + default: + pnt += rta_get(pnt, NULL, 0); + break; + } + + if (pnt > end) { + zlog_warn("%s: overflow detected (pnt:%p end:%p)", + __func__, pnt, end); + break; + } + } + + /* If there is netmask information set it's family same as + destination family*/ + if (rtm->rtm_addrs & RTA_NETMASK) + mask->sa.sa_family = dest->sa.sa_family; + + /* Assert read up to the end of pointer. */ + if (pnt != end) + zlog_debug("rtm_read() doesn't read all socket data."); + + return rtm->rtm_flags; +} + +void rtm_read(struct rt_msghdr *rtm) +{ + int flags; + uint32_t zebra_flags; + union sockunion dest, mask, gate; + char ifname[INTERFACE_NAMSIZ + 1]; + short ifnlen = 0; + struct nexthop nh; + struct prefix p; + ifindex_t ifindex = 0; + afi_t afi; + char fbuf[64]; + int32_t proto = ZEBRA_ROUTE_KERNEL; + uint8_t distance = 0; + + zebra_flags = 0; + + /* Read destination and netmask and gateway from rtm message + structure. */ + flags = rtm_read_mesg(rtm, &dest, &mask, &gate, ifname, &ifnlen); + if (!(flags & RTF_DONE)) + return; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: got rtm of type %d (%s) addrs {%s}", __func__, + rtm->rtm_type, + lookup_msg(rtm_type_str, rtm->rtm_type, NULL), + rtatostr(rtm->rtm_addrs, fbuf, sizeof(fbuf))); + +#ifdef RTF_CLONED /*bsdi, netbsd 1.6*/ + if (flags & RTF_CLONED) + return; +#endif +#ifdef RTF_WASCLONED /*freebsd*/ + if (flags & RTF_WASCLONED) + return; +#endif + + if ((rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) + && !(flags & RTF_UP)) + return; + + /* This is connected route. */ + if (!(flags & RTF_GATEWAY)) + return; + + if (flags & RTF_PROTO1) { + SET_FLAG(zebra_flags, ZEBRA_FLAG_SELFROUTE); + proto = ZEBRA_ROUTE_STATIC; + distance = 255; + } + + memset(&nh, 0, sizeof(nh)); + + nh.vrf_id = VRF_DEFAULT; + /* This is a reject or blackhole route */ + if (flags & RTF_REJECT) { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_REJECT; + } else if (flags & RTF_BLACKHOLE) { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_NULL; + } + + /* + * Ignore our own messages. + */ + if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid) + return; + + if (dest.sa.sa_family == AF_INET) { + afi = AFI_IP; + p.family = AF_INET; + p.u.prefix4 = dest.sin.sin_addr; + if (flags & RTF_HOST) + p.prefixlen = IPV4_MAX_BITLEN; + else + p.prefixlen = ip_masklen(mask.sin.sin_addr); + + if (!nh.type) { + nh.type = NEXTHOP_TYPE_IPV4; + nh.gate.ipv4 = gate.sin.sin_addr; + } + } else if (dest.sa.sa_family == AF_INET6) { + afi = AFI_IP6; + p.family = AF_INET6; + p.u.prefix6 = dest.sin6.sin6_addr; + if (flags & RTF_HOST) + p.prefixlen = IPV6_MAX_BITLEN; + else + p.prefixlen = ip6_masklen(mask.sin6.sin6_addr); + +#ifdef KAME + if (IN6_IS_ADDR_LINKLOCAL(&gate.sin6.sin6_addr)) { + ifindex = IN6_LINKLOCAL_IFINDEX(gate.sin6.sin6_addr); + SET_IN6_LINKLOCAL_IFINDEX(gate.sin6.sin6_addr, 0); + } +#endif /* KAME */ + + if (!nh.type) { + nh.type = ifindex ? NEXTHOP_TYPE_IPV6_IFINDEX + : NEXTHOP_TYPE_IPV6; + nh.gate.ipv6 = gate.sin6.sin6_addr; + nh.ifindex = ifindex; + } + } else + return; + + if (rtm->rtm_type == RTM_GET || rtm->rtm_type == RTM_ADD + || rtm->rtm_type == RTM_CHANGE) + rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, proto, 0, zebra_flags, + &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, 0, distance, 0, + false); + else + rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, proto, 0, + zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, + distance, true); +} + +/* Interface function for the kernel routing table updates. Support + * for RTM_CHANGE will be needed. + * Exported only for rt_socket.c + */ +int rtm_write(int message, union sockunion *dest, union sockunion *mask, + union sockunion *gate, union sockunion *mpls, unsigned int index, + enum blackhole_type bh_type, int metric) +{ + int ret; + caddr_t pnt; + struct interface *ifp; + + /* Sequencial number of routing message. */ + static int msg_seq = 0; + + /* Struct of rt_msghdr and buffer for storing socket's data. */ + struct { + struct rt_msghdr rtm; + char buf[512]; + } msg; + + if (dplane_routing_sock < 0) + return ZEBRA_ERR_EPERM; + + /* Clear and set rt_msghdr values */ + memset(&msg, 0, sizeof(msg)); + msg.rtm.rtm_version = RTM_VERSION; + msg.rtm.rtm_type = message; + msg.rtm.rtm_seq = msg_seq++; + msg.rtm.rtm_addrs = RTA_DST; + msg.rtm.rtm_addrs |= RTA_GATEWAY; + msg.rtm.rtm_flags = RTF_UP; +#ifdef __OpenBSD__ + msg.rtm.rtm_flags |= RTF_MPATH; + msg.rtm.rtm_fmask = RTF_MPLS; +#endif + msg.rtm.rtm_index = index; + + if (metric != 0) { + msg.rtm.rtm_rmx.rmx_hopcount = metric; + msg.rtm.rtm_inits |= RTV_HOPCOUNT; + } + + ifp = if_lookup_by_index(index, VRF_DEFAULT); + + if (gate && (message == RTM_ADD || message == RTM_CHANGE)) + msg.rtm.rtm_flags |= RTF_GATEWAY; + +/* When RTF_CLONING is unavailable on BSD, should we set some + * other flag instead? + */ +#ifdef RTF_CLONING + if (!gate && (message == RTM_ADD || message == RTM_CHANGE) && ifp + && (ifp->flags & IFF_POINTOPOINT) == 0) + msg.rtm.rtm_flags |= RTF_CLONING; +#endif /* RTF_CLONING */ + + /* If no protocol specific gateway is specified, use link + address for gateway. */ + if (!gate) { + if (!ifp) { + char dest_buf[INET_ADDRSTRLEN] = "NULL", + mask_buf[INET_ADDRSTRLEN] = "255.255.255.255"; + if (dest) + inet_ntop(AF_INET, &dest->sin.sin_addr, + dest_buf, INET_ADDRSTRLEN); + if (mask) + inet_ntop(AF_INET, &mask->sin.sin_addr, + mask_buf, INET_ADDRSTRLEN); + flog_warn( + EC_ZEBRA_RTM_NO_GATEWAY, + "%s: %s/%s: gate == NULL and no gateway found for ifindex %d", + __func__, dest_buf, mask_buf, index); + return -1; + } + gate = (union sockunion *)&((struct zebra_if *)ifp->info)->sdl; + } + + if (mask) + msg.rtm.rtm_addrs |= RTA_NETMASK; + else if (message == RTM_ADD || message == RTM_CHANGE) + msg.rtm.rtm_flags |= RTF_HOST; + +#ifdef __OpenBSD__ + if (mpls) { + msg.rtm.rtm_addrs |= RTA_SRC; + msg.rtm.rtm_flags |= RTF_MPLS; + + if (mpls->smpls.smpls_label + != htonl(MPLS_LABEL_IMPLICIT_NULL << MPLS_LABEL_OFFSET)) + msg.rtm.rtm_mpls = MPLS_OP_PUSH; + } +#endif + + /* Tagging route with flags */ + msg.rtm.rtm_flags |= (RTF_PROTO1); + + switch (bh_type) { + case BLACKHOLE_UNSPEC: + break; + case BLACKHOLE_REJECT: + msg.rtm.rtm_flags |= RTF_REJECT; + break; + default: + msg.rtm.rtm_flags |= RTF_BLACKHOLE; + break; + } + + +#define SOCKADDRSET(X, R) \ + if (msg.rtm.rtm_addrs & (R)) { \ + int len = SAROUNDUP(X); \ + memcpy(pnt, (caddr_t)(X), len); \ + pnt += len; \ + } + + pnt = (caddr_t)msg.buf; + + /* Write each socket data into rtm message buffer */ + SOCKADDRSET(dest, RTA_DST); + SOCKADDRSET(gate, RTA_GATEWAY); + SOCKADDRSET(mask, RTA_NETMASK); +#ifdef __OpenBSD__ + SOCKADDRSET(mpls, RTA_SRC); +#endif + + msg.rtm.rtm_msglen = pnt - (caddr_t)&msg; + + ret = write(dplane_routing_sock, &msg, msg.rtm.rtm_msglen); + + if (ret != msg.rtm.rtm_msglen) { + if (errno == EEXIST) + return ZEBRA_ERR_RTEXIST; + if (errno == ENETUNREACH) + return ZEBRA_ERR_RTUNREACH; + if (errno == ESRCH) + return ZEBRA_ERR_RTNOEXIST; + + flog_err_sys(EC_LIB_SOCKET, "%s: write : %s (%d)", __func__, + safe_strerror(errno), errno); + return ZEBRA_ERR_KERNEL; + } + return ZEBRA_ERR_NOERROR; +} + + +#include "thread.h" +#include "zebra/zserv.h" + +/* For debug purpose. */ +static void rtmsg_debug(struct rt_msghdr *rtm) +{ + char fbuf[64]; + + zlog_debug("Kernel: Len: %d Type: %s", rtm->rtm_msglen, + lookup_msg(rtm_type_str, rtm->rtm_type, NULL)); + rtm_flag_dump(rtm->rtm_flags); + zlog_debug("Kernel: message seq %d", rtm->rtm_seq); + zlog_debug("Kernel: pid %lld, rtm_addrs {%s}", (long long)rtm->rtm_pid, + rtatostr(rtm->rtm_addrs, fbuf, sizeof(fbuf))); +} + +/* This is pretty gross, better suggestions welcome -- mhandler */ +#ifndef RTAX_MAX +#ifdef RTA_NUMBITS +#define RTAX_MAX RTA_NUMBITS +#else +#define RTAX_MAX 8 +#endif /* RTA_NUMBITS */ +#endif /* RTAX_MAX */ + +/* Kernel routing table and interface updates via routing socket. */ +static void kernel_read(struct thread *thread) +{ + int sock; + int nbytes; + struct rt_msghdr *rtm; + + /* + * This must be big enough for any message the kernel might send. + * Rather than determining how many sockaddrs of what size might be + * in each particular message, just use RTAX_MAX of sockaddr_storage + * for each. Note that the sockaddrs must be after each message + * definition, or rather after whichever happens to be the largest, + * since the buffer needs to be big enough for a message and the + * sockaddrs together. + */ + union { + /* Routing information. */ + struct { + struct rt_msghdr rtm; + struct sockaddr_storage addr[RTAX_MAX]; + } r; + + /* Interface information. */ + struct { + struct if_msghdr ifm; + struct sockaddr_storage addr[RTAX_MAX]; + } im; + + /* Interface address information. */ + struct { + struct ifa_msghdr ifa; + struct sockaddr_storage addr[RTAX_MAX]; + } ia; + +#ifdef RTM_IFANNOUNCE + /* Interface arrival/departure */ + struct { + struct if_announcemsghdr ifan; + struct sockaddr_storage addr[RTAX_MAX]; + } ian; +#endif /* RTM_IFANNOUNCE */ + + } buf; + + /* Fetch routing socket. */ + sock = THREAD_FD(thread); + + nbytes = read(sock, &buf, sizeof(buf)); + + if (nbytes < 0) { + if (errno == ENOBUFS) { +#ifdef __FreeBSD__ + /* + * ENOBUFS indicates a temporary resource + * shortage and is not harmful for consistency of + * reading the routing socket. Ignore it. + */ + thread_add_read(zrouter.master, kernel_read, NULL, sock, + NULL); + return; +#else + flog_err(EC_ZEBRA_RECVMSG_OVERRUN, + "routing socket overrun: %s", + safe_strerror(errno)); + /* + * In this case we are screwed. + * There is no good way to + * recover zebra at this point. + */ + exit(-1); +#endif + } + if (errno != EAGAIN && errno != EWOULDBLOCK) + flog_err_sys(EC_LIB_SOCKET, "routing socket error: %s", + safe_strerror(errno)); + return; + } + + if (nbytes == 0) + return; + + thread_add_read(zrouter.master, kernel_read, NULL, sock, NULL); + + if (IS_ZEBRA_DEBUG_KERNEL) + rtmsg_debug(&buf.r.rtm); + + rtm = &buf.r.rtm; + + /* + * Ensure that we didn't drop any data, so that processing routines + * can assume they have the whole message. + */ + if (rtm->rtm_msglen != nbytes) { + zlog_debug("%s: rtm->rtm_msglen %d, nbytes %d, type %d", + __func__, rtm->rtm_msglen, nbytes, rtm->rtm_type); + return; + } + + switch (rtm->rtm_type) { + case RTM_ADD: + case RTM_DELETE: + case RTM_CHANGE: + rtm_read(rtm); + break; + case RTM_IFINFO: + ifm_read(&buf.im.ifm); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + ifam_read(&buf.ia.ifa); + break; +#ifdef RTM_IFANNOUNCE + case RTM_IFANNOUNCE: + ifan_read(&buf.ian.ifan); + break; +#endif /* RTM_IFANNOUNCE */ + default: + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "Unprocessed RTM_type: %s(%d)", + lookup_msg(rtm_type_str, rtm->rtm_type, NULL), + rtm->rtm_type); + break; + } +} + +/* Make routing socket. */ +static void routing_socket(struct zebra_ns *zns) +{ + uint32_t default_rcvbuf; + socklen_t optlen; + + frr_with_privs(&zserv_privs) { + routing_sock = ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + + dplane_routing_sock = + ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + } + + if (routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, "Can't init kernel routing socket"); + return; + } + + if (dplane_routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't init kernel dataplane routing socket"); + return; + } + +#ifdef SO_RERROR + /* Allow reporting of route(4) buffer overflow errors */ + int n = 1; + + if (setsockopt(routing_sock, SOL_SOCKET, SO_RERROR, &n, sizeof(n)) < 0) + flog_err_sys(EC_LIB_SOCKET, + "Can't set SO_RERROR on routing socket"); +#endif + + /* XXX: Socket should be NONBLOCK, however as we currently + * discard failed writes, this will lead to inconsistencies. + * For now, socket must be blocking. + */ + /*if (fcntl (routing_sock, F_SETFL, O_NONBLOCK) < 0) + zlog_warn ("Can't set O_NONBLOCK to routing socket");*/ + + /* + * Attempt to set a more useful receive buffer size + */ + optlen = sizeof(default_rcvbuf); + if (getsockopt(routing_sock, SOL_SOCKET, SO_RCVBUF, &default_rcvbuf, + &optlen) == -1) + flog_err_sys(EC_LIB_SOCKET, + "routing_sock sockopt SOL_SOCKET SO_RCVBUF"); + else { + for (; rcvbufsize > default_rcvbuf && + setsockopt(routing_sock, SOL_SOCKET, SO_RCVBUF, + &rcvbufsize, sizeof(rcvbufsize)) == -1 && + errno == ENOBUFS; + rcvbufsize /= 2) + ; + } + + /* kernel_read needs rewrite. */ + thread_add_read(zrouter.master, kernel_read, NULL, routing_sock, NULL); +} + +/* Exported interface function. This function simply calls + routing_socket (). */ +void kernel_init(struct zebra_ns *zns) +{ + routing_socket(zns); +} + +void kernel_terminate(struct zebra_ns *zns, bool complete) +{ + return; +} + +/* + * Global init for platform-/OS-specific things + */ +void kernel_router_init(void) +{ +} + +/* + * Global deinit for platform-/OS-specific things + */ +void kernel_router_terminate(void) +{ +} + +/* + * Called by the dplane pthread to read incoming OS messages and dispatch them. + */ +int kernel_dplane_read(struct zebra_dplane_info *info) +{ + return 0; +} + +void kernel_update_multi(struct dplane_ctx_q *ctx_list) +{ + struct zebra_dplane_ctx *ctx; + struct dplane_ctx_q handled_list; + enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS; + + TAILQ_INIT(&handled_list); + + while (true) { + ctx = dplane_ctx_dequeue(ctx_list); + if (ctx == NULL) + break; + + /* + * A previous provider plugin may have asked to skip the + * kernel update. + */ + if (dplane_ctx_is_skip_kernel(ctx)) { + res = ZEBRA_DPLANE_REQUEST_SUCCESS; + goto skip_one; + } + + switch (dplane_ctx_get_op(ctx)) { + + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + res = kernel_route_update(ctx); + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + res = kernel_nexthop_update(ctx); + break; + + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + res = kernel_lsp_update(ctx); + break; + + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + res = kernel_pw_update(ctx); + break; + + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + res = kernel_address_update_ctx(ctx); + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + res = kernel_mac_update_ctx(ctx); + break; + + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NEIGH_DISCOVER: + res = kernel_neigh_update_ctx(ctx); + break; + + case DPLANE_OP_RULE_ADD: + case DPLANE_OP_RULE_DELETE: + case DPLANE_OP_RULE_UPDATE: + res = kernel_pbr_rule_update(ctx); + break; + + case DPLANE_OP_INTF_INSTALL: + case DPLANE_OP_INTF_UPDATE: + case DPLANE_OP_INTF_DELETE: + res = kernel_intf_update(ctx); + break; + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + res = kernel_tc_update(ctx); + break; + + /* Ignore 'notifications' - no-op */ + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + res = ZEBRA_DPLANE_REQUEST_SUCCESS; + break; + + case DPLANE_OP_INTF_NETCONFIG: + res = kernel_intf_netconf_update(ctx); + break; + + case DPLANE_OP_NONE: + case DPLANE_OP_BR_PORT_UPDATE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: + case DPLANE_OP_NEIGH_IP_INSTALL: + case DPLANE_OP_NEIGH_IP_DELETE: + case DPLANE_OP_NEIGH_TABLE_UPDATE: + case DPLANE_OP_GRE_SET: + case DPLANE_OP_INTF_ADDR_ADD: + case DPLANE_OP_INTF_ADDR_DEL: + zlog_err("Unhandled dplane data for %s", + dplane_op2str(dplane_ctx_get_op(ctx))); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + } + + skip_one: + dplane_ctx_set_status(ctx, res); + + dplane_ctx_enqueue_tail(&handled_list, ctx); + } + + TAILQ_INIT(ctx_list); + dplane_ctx_list_append(ctx_list, &handled_list); +} + +#endif /* !HAVE_NETLINK */ |