diff options
Diffstat (limited to '')
-rw-r--r-- | src/sock_inet.c | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/src/sock_inet.c b/src/sock_inet.c new file mode 100644 index 0000000..028ffaa --- /dev/null +++ b/src/sock_inet.c @@ -0,0 +1,521 @@ +/* + * AF_INET/AF_INET6 socket management + * + * Copyright 2000-2020 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <errno.h> +#include <string.h> +#include <unistd.h> + +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <netinet/tcp.h> +#include <netinet/in.h> + +#include <haproxy/api.h> +#include <haproxy/errors.h> +#include <haproxy/fd.h> +#include <haproxy/global.h> +#include <haproxy/namespace.h> +#include <haproxy/receiver-t.h> +#include <haproxy/sock.h> +#include <haproxy/sock_inet.h> +#include <haproxy/tools.h> + +struct proto_fam proto_fam_inet4 = { + .name = "inet4", + .sock_domain = PF_INET, + .sock_family = AF_INET, + .sock_addrlen = sizeof(struct sockaddr_in), + .l3_addrlen = 32/8, + .addrcmp = sock_inet4_addrcmp, + .bind = sock_inet_bind_receiver, + .get_src = sock_get_src, + .get_dst = sock_inet_get_dst, + .set_port = sock_inet_set_port, +}; + +struct proto_fam proto_fam_inet6 = { + .name = "inet6", + .sock_domain = PF_INET6, + .sock_family = AF_INET6, + .sock_addrlen = sizeof(struct sockaddr_in6), + .l3_addrlen = 128/8, + .addrcmp = sock_inet6_addrcmp, + .bind = sock_inet_bind_receiver, + .get_src = sock_get_src, + .get_dst = sock_get_dst, + .set_port = sock_inet_set_port, +}; + +/* PLEASE NOTE for function below: + * - sock_inet4_* is solely for AF_INET (IPv4) + * - sock_inet6_* is solely for AF_INET6 (IPv6) + * - sock_inet_* is for either + * + * The address family SHOULD always be checked. In some cases a function will + * be used in a situation where the address family is guaranteed (e.g. protocol + * definitions), so the test may be avoided. This special case must then be + * mentioned in the comment before the function definition. + */ + +/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes. + * It also remains if IPv6 is not enabled/configured. + */ +int sock_inet6_v6only_default = 0; + +/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */ +int sock_inet_tcp_maxseg_default = -1; +int sock_inet6_tcp_maxseg_default = -1; + +/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero + * if they do not match. + */ +int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) +{ + const struct sockaddr_in *a4 = (const struct sockaddr_in *)a; + const struct sockaddr_in *b4 = (const struct sockaddr_in *)b; + + if (a->ss_family != b->ss_family) + return -1; + + if (a->ss_family != AF_INET) + return -1; + + if (a4->sin_port != b4->sin_port) + return -1; + + return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr)); +} + +/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or + * non-zero if they do not match. + */ +int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) +{ + const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a; + const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b; + + if (a->ss_family != b->ss_family) + return -1; + + if (a->ss_family != AF_INET6) + return -1; + + if (a6->sin6_port != b6->sin6_port) + return -1; + + return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr)); +} + +/* Sets the port <port> on IPv4 or IPv6 address <addr>. The address family is + * determined from the sockaddr_storage's address family. Nothing is done for + * other families. + */ +void sock_inet_set_port(struct sockaddr_storage *addr, int port) +{ + if (addr->ss_family == AF_INET) + ((struct sockaddr_in *)addr)->sin_port = htons(port); + else if (addr->ss_family == AF_INET6) + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); +} + +/* + * Retrieves the original destination address for the socket <fd> which must be + * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener + * (=0) or an initiator (!=0). In the case of a listener, if the original + * destination address was translated, the original address is retrieved. It + * returns 0 in case of success, -1 in case of error. The socket's source + * address is stored in <sa> for <salen> bytes. + */ +int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir) +{ + if (dir) + return getpeername(fd, sa, &salen); + else { + int ret = getsockname(fd, sa, &salen); + + if (ret < 0) + return ret; + +#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST) + /* For TPROXY and Netfilter's NAT, we can retrieve the original + * IPv4 address before DNAT/REDIRECT. We must not do that with + * other families because v6-mapped IPv4 addresses are still + * reported as v4. + */ + if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0) + return 0; +#endif + return ret; + } +} + +/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN + * according to the various supported socket options. The socket's address family + * must be passed in <family>. + */ +int sock_inet_is_foreign(int fd, sa_family_t family) +{ + int val __maybe_unused; + socklen_t len __maybe_unused; + + switch (family) { + case AF_INET: +#if defined(IP_TRANSPARENT) + val = 0; len = sizeof(val); + if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val) + return 1; +#endif +#if defined(IP_FREEBIND) + val = 0; len = sizeof(val); + if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val) + return 1; +#endif +#if defined(IP_BINDANY) + val = 0; len = sizeof(val); + if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val) + return 1; +#endif +#if defined(SO_BINDANY) + val = 0; len = sizeof(val); + if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val) + return 1; +#endif + break; + + case AF_INET6: +#if defined(IPV6_TRANSPARENT) + val = 0; len = sizeof(val); + if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val) + return 1; +#endif +#if defined(IP_FREEBIND) + val = 0; len = sizeof(val); + if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val) + return 1; +#endif +#if defined(IPV6_BINDANY) + val = 0; len = sizeof(val); + if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val) + return 1; +#endif +#if defined(SO_BINDANY) + val = 0; len = sizeof(val); + if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val) + return 1; +#endif + break; + } + return 0; +} + +/* Attempt all known socket options to prepare an AF_INET4 socket to be bound + * to a foreign address. The socket must already exist and must not be bound. + * 1 is returned on success, 0 on failure. The caller must check the address + * family before calling this function. + */ +int sock_inet4_make_foreign(int fd) +{ + return +#if defined(IP_TRANSPARENT) + setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 || +#endif +#if defined(IP_FREEBIND) + setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 || +#endif +#if defined(IP_BINDANY) + setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 || +#endif +#if defined(SO_BINDANY) + setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 || +#endif + 0; +} + +/* Attempt all known socket options to prepare an AF_INET6 socket to be bound + * to a foreign address. The socket must already exist and must not be bound. + * 1 is returned on success, 0 on failure. The caller must check the address + * family before calling this function. + */ +int sock_inet6_make_foreign(int fd) +{ + return +#if defined(IPV6_TRANSPARENT) + setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 || +#endif +#if defined(IP_FREEBIND) + setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 || +#endif +#if defined(IPV6_BINDANY) + setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 || +#endif +#if defined(SO_BINDANY) + setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 || +#endif + 0; +} + +/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and + * context, respectively. Returns and error code made of ERR_* bits on failure + * or ERR_NONE on success. On failure, an error message may be passed into + * <errmsg>. + */ +int sock_inet_bind_receiver(struct receiver *rx, char **errmsg) +{ + int fd, err, ext; + /* copy listener addr because sometimes we need to switch family */ + struct sockaddr_storage addr_inet = rx->addr; + + /* force to classic sock family, not AF_CUST_* */ + addr_inet.ss_family = rx->proto->fam->sock_family; + + /* ensure we never return garbage */ + if (errmsg) + *errmsg = 0; + + err = ERR_NONE; + + if (rx->flags & RX_F_BOUND) + return ERR_NONE; + + if (rx->flags & RX_F_MUST_DUP) { + /* this is a secondary receiver that is an exact copy of a + * reference which must already be bound (or has failed). + * We'll try to dup() the other one's FD and take it. We + * try hard not to reconfigure the socket since it's shared. + */ + BUG_ON(!rx->shard_info); + if (!(rx->shard_info->ref->flags & RX_F_BOUND)) { + /* it's assumed that the first one has already reported + * the error, let's not spam with another one, and do + * not set ERR_ALERT. + */ + err |= ERR_RETRYABLE; + goto bind_ret_err; + } + /* taking the other one's FD will result in it being marked + * extern and being dup()ed. Let's mark the receiver as + * inherited so that it properly bypasses all second-stage + * setup and avoids being passed to new processes. + */ + rx->flags |= RX_F_INHERITED; + rx->fd = rx->shard_info->ref->fd; + } + + /* if no FD was assigned yet, we'll have to either find a compatible + * one or create a new one. + */ + if (rx->fd == -1) + rx->fd = sock_find_compatible_fd(rx); + + /* if the receiver now has an fd assigned, then we were offered the fd + * by an external process (most likely the parent), and we don't want + * to create a new socket. However we still want to set a few flags on + * the socket. + */ + fd = rx->fd; + ext = (fd >= 0); + + if (!ext) { + fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain, + rx->proto->sock_type, rx->proto->sock_prot); + if (fd == -1) { + err |= ERR_RETRYABLE | ERR_ALERT; + memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); + goto bind_return; + } + } + + if (ext && fd < global.maxsock && fdtab[fd].owner) { + /* This FD was already bound so this means that it was already + * known and registered before parsing, hence it's an inherited + * FD. The only reason why it's already known here is that it + * has been registered multiple times (multiple listeners on the + * same, or a "shards" directive on the line). There cannot be + * multiple listeners on one FD but at least we can create a + * new one from the original one. We won't reconfigure it, + * however, as this was already done for the first one. + */ + fd = dup(fd); + if (fd == -1) { + err |= ERR_RETRYABLE | ERR_ALERT; + memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno)); + goto bind_return; + } + } + + if (fd >= global.maxsock) { + err |= ERR_FATAL | ERR_ABORT | ERR_ALERT; + memprintf(errmsg, "not enough free sockets (raise '-n' parameter)"); + goto bind_close_return; + } + + if (fd_set_nonblock(fd) == -1) { + err |= ERR_FATAL | ERR_ALERT; + memprintf(errmsg, "cannot make socket non-blocking"); + goto bind_close_return; + } + + if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) { + /* not fatal but should be reported */ + memprintf(errmsg, "cannot do so_reuseaddr"); + err |= ERR_ALERT; + } + +#ifdef SO_REUSEPORT + /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of + * Linux, it might return an error that we will silently ignore. + */ + if (!ext && (rx->proto->flags & PROTO_F_REUSEPORT_SUPPORTED)) + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); +#endif + +#ifdef SO_REUSEPORT_LB + /* FreeBSD 12 and above use this to load-balance incoming connections. + * This is limited to 256 listeners per group however. + */ + if (!ext && (rx->proto->flags & PROTO_F_REUSEPORT_SUPPORTED)) + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, &one, sizeof(one)); +#endif + + if (!ext && (rx->settings->options & RX_O_FOREIGN)) { + switch (addr_inet.ss_family) { + case AF_INET: + if (!sock_inet4_make_foreign(fd)) { + memprintf(errmsg, "cannot make receiving socket transparent"); + err |= ERR_ALERT; + } + break; + case AF_INET6: + if (!sock_inet6_make_foreign(fd)) { + memprintf(errmsg, "cannot make receiving socket transparent"); + err |= ERR_ALERT; + } + break; + } + } + +#ifdef SO_BINDTODEVICE + /* Note: this might fail if not CAP_NET_RAW */ + if (!ext && rx->settings->interface) { + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + rx->settings->interface, + strlen(rx->settings->interface) + 1) == -1) { + memprintf(errmsg, "cannot bind receiver to device '%s' (%s)", rx->settings->interface, strerror(errno)); + err |= ERR_WARN; + } + } +#endif + +#if defined(IPV6_V6ONLY) + if (addr_inet.ss_family == AF_INET6 && !ext) { + /* Prepare to match the v6only option against what we really want. Note + * that sadly the two options are not exclusive to each other and that + * v6only is stronger than v4v6. + */ + if ((rx->settings->options & RX_O_V6ONLY) || + (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6))) + setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)); + else + setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero)); + } +#endif + + if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) { + err |= ERR_RETRYABLE | ERR_ALERT; + memprintf(errmsg, "cannot bind socket (%s)", strerror(errno)); + goto bind_close_return; + } + + rx->fd = fd; + rx->flags |= RX_F_BOUND; + + fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread); + + /* for now, all regularly bound TCP listeners are exportable */ + if (!(rx->flags & RX_F_INHERITED)) + HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED); + + bind_return: + if (errmsg && *errmsg) { + char pn[INET6_ADDRSTRLEN]; + + addr_to_str(&addr_inet, pn, sizeof(pn)); + memprintf(errmsg, "%s for [%s:%d]", *errmsg, pn, get_host_port(&addr_inet)); + } + bind_ret_err: + return err; + + bind_close_return: + close(fd); + goto bind_return; +} + +static void sock_inet_prepare() +{ + int fd, val; + socklen_t len; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd >= 0) { +#ifdef TCP_MAXSEG + /* retrieve the OS' default mss for TCPv4 */ + len = sizeof(val); + if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0) + sock_inet_tcp_maxseg_default = val; +#endif + close(fd); + } + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd >= 0) { +#if defined(IPV6_V6ONLY) + /* retrieve the OS' bindv6only value */ + len = sizeof(val); + if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0) + sock_inet6_v6only_default = 1; +#endif + +#ifdef TCP_MAXSEG + /* retrieve the OS' default mss for TCPv6 */ + len = sizeof(val); + if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0) + sock_inet6_tcp_maxseg_default = val; +#endif + close(fd); + } +} + +INITCALL0(STG_PREPARE, sock_inet_prepare); + + +REGISTER_BUILD_OPTS("Built with transparent proxy support using:" +#if defined(IP_TRANSPARENT) + " IP_TRANSPARENT" +#endif +#if defined(IPV6_TRANSPARENT) + " IPV6_TRANSPARENT" +#endif +#if defined(IP_FREEBIND) + " IP_FREEBIND" +#endif +#if defined(IP_BINDANY) + " IP_BINDANY" +#endif +#if defined(IPV6_BINDANY) + " IPV6_BINDANY" +#endif +#if defined(SO_BINDANY) + " SO_BINDANY" +#endif + ""); |