/* * AF_INET/AF_INET6 socket management * * Copyright 2000-2020 Willy Tarreau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct proto_fam proto_fam_inet4 = { .name = "inet4", .sock_domain = PF_INET, .sock_family = AF_INET, .sock_addrlen = sizeof(struct sockaddr_in), .l3_addrlen = 32/8, .addrcmp = sock_inet4_addrcmp, .bind = sock_inet_bind_receiver, .get_src = sock_get_src, .get_dst = sock_inet_get_dst, .set_port = sock_inet_set_port, }; struct proto_fam proto_fam_inet6 = { .name = "inet6", .sock_domain = PF_INET6, .sock_family = AF_INET6, .sock_addrlen = sizeof(struct sockaddr_in6), .l3_addrlen = 128/8, .addrcmp = sock_inet6_addrcmp, .bind = sock_inet_bind_receiver, .get_src = sock_get_src, .get_dst = sock_get_dst, .set_port = sock_inet_set_port, }; /* PLEASE NOTE for function below: * - sock_inet4_* is solely for AF_INET (IPv4) * - sock_inet6_* is solely for AF_INET6 (IPv6) * - sock_inet_* is for either * * The address family SHOULD always be checked. In some cases a function will * be used in a situation where the address family is guaranteed (e.g. protocol * definitions), so the test may be avoided. This special case must then be * mentioned in the comment before the function definition. */ /* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes. * It also remains if IPv6 is not enabled/configured. */ int sock_inet6_v6only_default = 0; /* Default TCPv4/TCPv6 MSS settings. -1=unknown. */ int sock_inet_tcp_maxseg_default = -1; int sock_inet6_tcp_maxseg_default = -1; /* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero * if they do not match. */ int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) { const struct sockaddr_in *a4 = (const struct sockaddr_in *)a; const struct sockaddr_in *b4 = (const struct sockaddr_in *)b; if (a->ss_family != b->ss_family) return -1; if (a->ss_family != AF_INET) return -1; if (a4->sin_port != b4->sin_port) return -1; return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr)); } /* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or * non-zero if they do not match. */ int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) { const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a; const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b; if (a->ss_family != b->ss_family) return -1; if (a->ss_family != AF_INET6) return -1; if (a6->sin6_port != b6->sin6_port) return -1; return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr)); } /* Sets the port on IPv4 or IPv6 address . The address family is * determined from the sockaddr_storage's address family. Nothing is done for * other families. */ void sock_inet_set_port(struct sockaddr_storage *addr, int port) { if (addr->ss_family == AF_INET) ((struct sockaddr_in *)addr)->sin_port = htons(port); else if (addr->ss_family == AF_INET6) ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); } /* * Retrieves the original destination address for the socket which must be * of family AF_INET (not AF_INET6), with indicating if we're a listener * (=0) or an initiator (!=0). In the case of a listener, if the original * destination address was translated, the original address is retrieved. It * returns 0 in case of success, -1 in case of error. The socket's source * address is stored in for bytes. */ int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir) { if (dir) return getpeername(fd, sa, &salen); else { int ret = getsockname(fd, sa, &salen); if (ret < 0) return ret; #if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST) /* For TPROXY and Netfilter's NAT, we can retrieve the original * IPv4 address before DNAT/REDIRECT. We must not do that with * other families because v6-mapped IPv4 addresses are still * reported as v4. */ if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0) return 0; #endif return ret; } } /* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN * according to the various supported socket options. The socket's address family * must be passed in . */ int sock_inet_is_foreign(int fd, sa_family_t family) { int val __maybe_unused; socklen_t len __maybe_unused; switch (family) { case AF_INET: #if defined(IP_TRANSPARENT) val = 0; len = sizeof(val); if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val) return 1; #endif #if defined(IP_FREEBIND) val = 0; len = sizeof(val); if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val) return 1; #endif #if defined(IP_BINDANY) val = 0; len = sizeof(val); if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val) return 1; #endif #if defined(SO_BINDANY) val = 0; len = sizeof(val); if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val) return 1; #endif break; case AF_INET6: #if defined(IPV6_TRANSPARENT) val = 0; len = sizeof(val); if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val) return 1; #endif #if defined(IP_FREEBIND) val = 0; len = sizeof(val); if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val) return 1; #endif #if defined(IPV6_BINDANY) val = 0; len = sizeof(val); if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val) return 1; #endif #if defined(SO_BINDANY) val = 0; len = sizeof(val); if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val) return 1; #endif break; } return 0; } /* Attempt all known socket options to prepare an AF_INET4 socket to be bound * to a foreign address. The socket must already exist and must not be bound. * 1 is returned on success, 0 on failure. The caller must check the address * family before calling this function. */ int sock_inet4_make_foreign(int fd) { return #if defined(IP_TRANSPARENT) setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 || #endif #if defined(IP_FREEBIND) setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 || #endif #if defined(IP_BINDANY) setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 || #endif #if defined(SO_BINDANY) setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 || #endif 0; } /* Attempt all known socket options to prepare an AF_INET6 socket to be bound * to a foreign address. The socket must already exist and must not be bound. * 1 is returned on success, 0 on failure. The caller must check the address * family before calling this function. */ int sock_inet6_make_foreign(int fd) { return #if defined(IPV6_TRANSPARENT) setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 || #endif #if defined(IP_FREEBIND) setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 || #endif #if defined(IPV6_BINDANY) setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 || #endif #if defined(SO_BINDANY) setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 || #endif 0; } /* Binds receiver , and assigns rx->iocb and rx->owner as the callback and * context, respectively. Returns and error code made of ERR_* bits on failure * or ERR_NONE on success. On failure, an error message may be passed into * . */ int sock_inet_bind_receiver(struct receiver *rx, char **errmsg) { int fd, err, ext; /* copy listener addr because sometimes we need to switch family */ struct sockaddr_storage addr_inet = rx->addr; /* force to classic sock family, not AF_CUST_* */ addr_inet.ss_family = rx->proto->fam->sock_family; /* ensure we never return garbage */ if (errmsg) *errmsg = 0; err = ERR_NONE; if (rx->flags & RX_F_BOUND) return ERR_NONE; if (rx->flags & RX_F_MUST_DUP) { /* this is a secondary receiver that is an exact copy of a * reference which must already be bound (or has failed). * We'll try to dup() the other one's FD and take it. We * try hard not to reconfigure the socket since it's shared. */ BUG_ON(!rx->shard_info); if (!(rx->shard_info->ref->flags & RX_F_BOUND)) { /* it's assumed that the first one has already reported * the error, let's not spam with another one, and do * not set ERR_ALERT. */ err |= ERR_RETRYABLE; goto bind_ret_err; } /* taking the other one's FD will result in it being marked * extern and being dup()ed. Let's mark the receiver as * inherited so that it properly bypasses all second-stage * setup and avoids being passed to new processes. */ rx->flags |= RX_F_INHERITED; rx->fd = rx->shard_info->ref->fd; } /* if no FD was assigned yet, we'll have to either find a compatible * one or create a new one. */ if (rx->fd == -1) rx->fd = sock_find_compatible_fd(rx); /* if the receiver now has an fd assigned, then we were offered the fd * by an external process (most likely the parent), and we don't want * to create a new socket. However we still want to set a few flags on * the socket. */ fd = rx->fd; ext = (fd >= 0); if (!ext) { fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); if (fd == -1) { err |= ERR_RETRYABLE | ERR_ALERT; memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); goto bind_return; } } if (ext && fd < global.maxsock && fdtab[fd].owner) { /* This FD was already bound so this means that it was already * known and registered before parsing, hence it's an inherited * FD. The only reason why it's already known here is that it * has been registered multiple times (multiple listeners on the * same, or a "shards" directive on the line). There cannot be * multiple listeners on one FD but at least we can create a * new one from the original one. We won't reconfigure it, * however, as this was already done for the first one. */ fd = dup(fd); if (fd == -1) { err |= ERR_RETRYABLE | ERR_ALERT; memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno)); goto bind_return; } } if (fd >= global.maxsock) { err |= ERR_FATAL | ERR_ABORT | ERR_ALERT; memprintf(errmsg, "not enough free sockets (raise '-n' parameter)"); goto bind_close_return; } if (fd_set_nonblock(fd) == -1) { err |= ERR_FATAL | ERR_ALERT; memprintf(errmsg, "cannot make socket non-blocking"); goto bind_close_return; } if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) { /* not fatal but should be reported */ memprintf(errmsg, "cannot do so_reuseaddr"); err |= ERR_ALERT; } #ifdef SO_REUSEPORT /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of * Linux, it might return an error that we will silently ignore. */ if (!ext && (rx->proto->flags & PROTO_F_REUSEPORT_SUPPORTED)) setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); #endif #ifdef SO_REUSEPORT_LB /* FreeBSD 12 and above use this to load-balance incoming connections. * This is limited to 256 listeners per group however. */ if (!ext && (rx->proto->flags & PROTO_F_REUSEPORT_SUPPORTED)) setsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, &one, sizeof(one)); #endif if (!ext && (rx->settings->options & RX_O_FOREIGN)) { switch (addr_inet.ss_family) { case AF_INET: if (!sock_inet4_make_foreign(fd)) { memprintf(errmsg, "cannot make receiving socket transparent"); err |= ERR_ALERT; } break; case AF_INET6: if (!sock_inet6_make_foreign(fd)) { memprintf(errmsg, "cannot make receiving socket transparent"); err |= ERR_ALERT; } break; } } #ifdef SO_BINDTODEVICE /* Note: this might fail if not CAP_NET_RAW */ if (!ext && rx->settings->interface) { if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, rx->settings->interface, strlen(rx->settings->interface) + 1) == -1) { memprintf(errmsg, "cannot bind receiver to device '%s' (%s)", rx->settings->interface, strerror(errno)); err |= ERR_WARN; } } #endif #if defined(IPV6_V6ONLY) if (addr_inet.ss_family == AF_INET6 && !ext) { /* Prepare to match the v6only option against what we really want. Note * that sadly the two options are not exclusive to each other and that * v6only is stronger than v4v6. */ if ((rx->settings->options & RX_O_V6ONLY) || (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6))) setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)); else setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero)); } #endif if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) { err |= ERR_RETRYABLE | ERR_ALERT; memprintf(errmsg, "cannot bind socket (%s)", strerror(errno)); goto bind_close_return; } rx->fd = fd; rx->flags |= RX_F_BOUND; fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread); /* for now, all regularly bound TCP listeners are exportable */ if (!(rx->flags & RX_F_INHERITED)) HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED); bind_return: if (errmsg && *errmsg) { char pn[INET6_ADDRSTRLEN]; addr_to_str(&addr_inet, pn, sizeof(pn)); memprintf(errmsg, "%s for [%s:%d]", *errmsg, pn, get_host_port(&addr_inet)); } bind_ret_err: return err; bind_close_return: close(fd); goto bind_return; } static void sock_inet_prepare() { int fd, val; socklen_t len; fd = socket(AF_INET, SOCK_STREAM, 0); if (fd >= 0) { #ifdef TCP_MAXSEG /* retrieve the OS' default mss for TCPv4 */ len = sizeof(val); if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0) sock_inet_tcp_maxseg_default = val; #endif close(fd); } fd = socket(AF_INET6, SOCK_STREAM, 0); if (fd >= 0) { #if defined(IPV6_V6ONLY) /* retrieve the OS' bindv6only value */ len = sizeof(val); if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0) sock_inet6_v6only_default = 1; #endif #ifdef TCP_MAXSEG /* retrieve the OS' default mss for TCPv6 */ len = sizeof(val); if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0) sock_inet6_tcp_maxseg_default = val; #endif close(fd); } } INITCALL0(STG_PREPARE, sock_inet_prepare); REGISTER_BUILD_OPTS("Built with transparent proxy support using:" #if defined(IP_TRANSPARENT) " IP_TRANSPARENT" #endif #if defined(IPV6_TRANSPARENT) " IPV6_TRANSPARENT" #endif #if defined(IP_FREEBIND) " IP_FREEBIND" #endif #if defined(IP_BINDANY) " IP_BINDANY" #endif #if defined(IPV6_BINDANY) " IPV6_BINDANY" #endif #if defined(SO_BINDANY) " SO_BINDANY" #endif "");