diff options
Diffstat (limited to '')
-rw-r--r-- | iputils.cc | 545 |
1 files changed, 545 insertions, 0 deletions
diff --git a/iputils.cc b/iputils.cc new file mode 100644 index 0000000..1c04228 --- /dev/null +++ b/iputils.cc @@ -0,0 +1,545 @@ +/* + * This file is part of PowerDNS or dnsdist. + * Copyright -- PowerDNS.COM B.V. and its contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In addition, for the avoidance of any doubt, permission is granted to + * link this program with OpenSSL and to (re)distribute the binaries + * produced as the result of such linking. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "iputils.hh" +#include <sys/socket.h> + +/** these functions provide a very lightweight wrapper to the Berkeley sockets API. Errors -> exceptions! */ + +static void RuntimeError(const boost::format& fmt) +{ + throw runtime_error(fmt.str()); +} + +static void NetworkErr(const boost::format& fmt) +{ + throw NetworkError(fmt.str()); +} + +int SSocket(int family, int type, int flags) +{ + int ret = socket(family, type, flags); + if(ret < 0) + RuntimeError(boost::format("creating socket of type %d: %s") % family % stringerror()); + return ret; +} + +int SConnect(int sockfd, const ComboAddress& remote) +{ + int ret = connect(sockfd, reinterpret_cast<const struct sockaddr*>(&remote), remote.getSocklen()); + if(ret < 0) { + int savederrno = errno; + RuntimeError(boost::format("connecting socket to %s: %s") % remote.toStringWithPort() % strerror(savederrno)); + } + return ret; +} + +int SConnectWithTimeout(int sockfd, const ComboAddress& remote, const struct timeval& timeout) +{ + int ret = connect(sockfd, reinterpret_cast<const struct sockaddr*>(&remote), remote.getSocklen()); + if(ret < 0) { + int savederrno = errno; + if (savederrno == EINPROGRESS) { + if (timeout <= timeval{0,0}) { + return savederrno; + } + + /* we wait until the connection has been established */ + bool error = false; + bool disconnected = false; + int res = waitForRWData(sockfd, false, timeout.tv_sec, timeout.tv_usec, &error, &disconnected); + if (res == 1) { + if (error) { + savederrno = 0; + socklen_t errlen = sizeof(savederrno); + if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, (void *)&savederrno, &errlen) == 0) { + NetworkErr(boost::format("connecting to %s failed: %s") % remote.toStringWithPort() % string(strerror(savederrno))); + } + else { + NetworkErr(boost::format("connecting to %s failed") % remote.toStringWithPort()); + } + } + if (disconnected) { + NetworkErr(boost::format("%s closed the connection") % remote.toStringWithPort()); + } + return 0; + } + else if (res == 0) { + NetworkErr(boost::format("timeout while connecting to %s") % remote.toStringWithPort()); + } else if (res < 0) { + savederrno = errno; + NetworkErr(boost::format("waiting to connect to %s: %s") % remote.toStringWithPort() % string(strerror(savederrno))); + } + } + else { + NetworkErr(boost::format("connecting to %s: %s") % remote.toStringWithPort() % string(strerror(savederrno))); + } + } + + return 0; +} + +int SBind(int sockfd, const ComboAddress& local) +{ + int ret = bind(sockfd, (struct sockaddr*)&local, local.getSocklen()); + if(ret < 0) { + int savederrno = errno; + RuntimeError(boost::format("binding socket to %s: %s") % local.toStringWithPort() % strerror(savederrno)); + } + return ret; +} + +int SAccept(int sockfd, ComboAddress& remote) +{ + socklen_t remlen = remote.getSocklen(); + + int ret = accept(sockfd, (struct sockaddr*)&remote, &remlen); + if(ret < 0) + RuntimeError(boost::format("accepting new connection on socket: %s") % stringerror()); + return ret; +} + +int SListen(int sockfd, int limit) +{ + int ret = listen(sockfd, limit); + if(ret < 0) + RuntimeError(boost::format("setting socket to listen: %s") % stringerror()); + return ret; +} + +int SSetsockopt(int sockfd, int level, int opname, int value) +{ + int ret = setsockopt(sockfd, level, opname, &value, sizeof(value)); + if(ret < 0) + RuntimeError(boost::format("setsockopt for level %d and opname %d to %d failed: %s") % level % opname % value % stringerror()); + return ret; +} + +void setSocketIgnorePMTU(int sockfd, int family) +{ + if (family == AF_INET) { +#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) +#ifdef IP_PMTUDISC_OMIT + /* Linux 3.15+ has IP_PMTUDISC_OMIT, which discards PMTU information to prevent + poisoning, but still allows fragmentation if the packet size exceeds the + outgoing interface MTU, which is good. + */ + try { + SSetsockopt(sockfd, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_OMIT); + return; + } + catch(const std::exception& e) { + /* failed, let's try IP_PMTUDISC_DONT instead */ + } +#endif /* IP_PMTUDISC_OMIT */ + + /* IP_PMTUDISC_DONT disables Path MTU discovery */ + SSetsockopt(sockfd, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT); +#endif /* defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) */ + } + else { + #if defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DONT) +#ifdef IPV6_PMTUDISC_OMIT + /* Linux 3.15+ has IPV6_PMTUDISC_OMIT, which discards PMTU information to prevent + poisoning, but still allows fragmentation if the packet size exceeds the + outgoing interface MTU, which is good. + */ + try { + SSetsockopt(sockfd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, IPV6_PMTUDISC_OMIT); + return; + } + catch(const std::exception& e) { + /* failed, let's try IP_PMTUDISC_DONT instead */ + } +#endif /* IPV6_PMTUDISC_OMIT */ + + /* IPV6_PMTUDISC_DONT disables Path MTU discovery */ + SSetsockopt(sockfd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, IPV6_PMTUDISC_DONT); +#endif /* defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DONT) */ + } +} + + +bool setReusePort(int sockfd) +{ +#if defined(SO_REUSEPORT_LB) + try { + SSetsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT_LB, 1); + return true; + } + catch (const std::exception& e) { + return false; + } +#elif defined(SO_REUSEPORT) + try { + SSetsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, 1); + return true; + } + catch (const std::exception& e) { + return false; + } +#endif + return false; +} + +bool HarvestTimestamp(struct msghdr* msgh, struct timeval* tv) +{ +#ifdef SO_TIMESTAMP + struct cmsghdr *cmsg; + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg != nullptr; cmsg = CMSG_NXTHDR(msgh,cmsg)) { + if ((cmsg->cmsg_level == SOL_SOCKET) && (cmsg->cmsg_type == SO_TIMESTAMP || cmsg->cmsg_type == SCM_TIMESTAMP) && + CMSG_LEN(sizeof(*tv)) == cmsg->cmsg_len) { + memcpy(tv, CMSG_DATA(cmsg), sizeof(*tv)); + return true; + } + } +#endif + return false; +} +bool HarvestDestinationAddress(const struct msghdr* msgh, ComboAddress* destination) +{ + destination->reset(); +#ifdef __NetBSD__ + struct cmsghdr* cmsg; +#else + const struct cmsghdr* cmsg; +#endif + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg != nullptr; cmsg = CMSG_NXTHDR(const_cast<struct msghdr*>(msgh), const_cast<struct cmsghdr*>(cmsg))) { +#if defined(IP_PKTINFO) + if ((cmsg->cmsg_level == IPPROTO_IP) && (cmsg->cmsg_type == IP_PKTINFO)) { + struct in_pktinfo *i = (struct in_pktinfo *) CMSG_DATA(cmsg); + destination->sin4.sin_addr = i->ipi_addr; + destination->sin4.sin_family = AF_INET; + return true; + } +#elif defined(IP_RECVDSTADDR) + if ((cmsg->cmsg_level == IPPROTO_IP) && (cmsg->cmsg_type == IP_RECVDSTADDR)) { + struct in_addr *i = (struct in_addr *) CMSG_DATA(cmsg); + destination->sin4.sin_addr = *i; + destination->sin4.sin_family = AF_INET; + return true; + } +#endif + + if ((cmsg->cmsg_level == IPPROTO_IPV6) && (cmsg->cmsg_type == IPV6_PKTINFO)) { + struct in6_pktinfo *i = (struct in6_pktinfo *) CMSG_DATA(cmsg); + destination->sin6.sin6_addr = i->ipi6_addr; + destination->sin4.sin_family = AF_INET6; + return true; + } + } + return false; +} + +bool IsAnyAddress(const ComboAddress& addr) +{ + if(addr.sin4.sin_family == AF_INET) + return addr.sin4.sin_addr.s_addr == 0; + else if(addr.sin4.sin_family == AF_INET6) + return !memcmp(&addr.sin6.sin6_addr, &in6addr_any, sizeof(addr.sin6.sin6_addr)); + + return false; +} +int sendOnNBSocket(int fd, const struct msghdr *msgh) +{ + int sendErr = 0; +#ifdef __OpenBSD__ + // OpenBSD can and does return EAGAIN on non-blocking datagram sockets + for (int i = 0; i < 10; i++) { // Arbitrary upper bound + if (sendmsg(fd, msgh, 0) != -1) { + sendErr = 0; + break; + } + sendErr = errno; + if (sendErr != EAGAIN) { + break; + } + } +#else + if (sendmsg(fd, msgh, 0) == -1) { + sendErr = errno; + } +#endif + return sendErr; +} + +ssize_t sendfromto(int sock, const void* data, size_t len, int flags, const ComboAddress& from, const ComboAddress& to) +{ + struct msghdr msgh; + struct iovec iov; + cmsgbuf_aligned cbuf; + + /* Set up iov and msgh structures. */ + memset(&msgh, 0, sizeof(struct msghdr)); + iov.iov_base = const_cast<void*>(data); + iov.iov_len = len; + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_name = (struct sockaddr*)&to; + msgh.msg_namelen = to.getSocklen(); + + if(from.sin4.sin_family) { + addCMsgSrcAddr(&msgh, &cbuf, &from, 0); + } + else { + msgh.msg_control=nullptr; + } + return sendmsg(sock, &msgh, flags); +} + +// be careful: when using this for receive purposes, make sure addr->sin4.sin_family is set appropriately so getSocklen works! +// be careful: when using this function for *send* purposes, be sure to set cbufsize to 0! +// be careful: if you don't call addCMsgSrcAddr after fillMSGHdr, make sure to set msg_control to NULL +void fillMSGHdr(struct msghdr* msgh, struct iovec* iov, cmsgbuf_aligned* cbuf, size_t cbufsize, char* data, size_t datalen, ComboAddress* addr) +{ + iov->iov_base = data; + iov->iov_len = datalen; + + memset(msgh, 0, sizeof(struct msghdr)); + + msgh->msg_control = cbuf; + msgh->msg_controllen = cbufsize; + msgh->msg_name = addr; + msgh->msg_namelen = addr->getSocklen(); + msgh->msg_iov = iov; + msgh->msg_iovlen = 1; + msgh->msg_flags = 0; +} + +// warning: various parts of PowerDNS assume 'truncate' will never throw +void ComboAddress::truncate(unsigned int bits) noexcept +{ + uint8_t* start; + int len=4; + if(sin4.sin_family==AF_INET) { + if(bits >= 32) + return; + start = (uint8_t*)&sin4.sin_addr.s_addr; + len=4; + } + else { + if(bits >= 128) + return; + start = (uint8_t*)&sin6.sin6_addr.s6_addr; + len=16; + } + + auto tozero= len*8 - bits; // if set to 22, this will clear 1 byte, as it should + + memset(start + len - tozero/8, 0, tozero/8); // blot out the whole bytes on the right + + auto bitsleft=tozero % 8; // 2 bits left to clear + + // a b c d, to truncate to 22 bits, we just zeroed 'd' and need to zero 2 bits from c + // so and by '11111100', which is ~((1<<2)-1) = ~3 + uint8_t* place = start + len - 1 - tozero/8; + *place &= (~((1<<bitsleft)-1)); +} + +size_t sendMsgWithOptions(int fd, const char* buffer, size_t len, const ComboAddress* dest, const ComboAddress* local, unsigned int localItf, int flags) +{ + struct msghdr msgh; + struct iovec iov; + cmsgbuf_aligned cbuf; + + /* Set up iov and msgh structures. */ + memset(&msgh, 0, sizeof(struct msghdr)); + msgh.msg_control = nullptr; + msgh.msg_controllen = 0; + if (dest) { + msgh.msg_name = reinterpret_cast<void*>(const_cast<ComboAddress*>(dest)); + msgh.msg_namelen = dest->getSocklen(); + } + else { + msgh.msg_name = nullptr; + msgh.msg_namelen = 0; + } + + msgh.msg_flags = 0; + + if (localItf != 0 && local) { + addCMsgSrcAddr(&msgh, &cbuf, local, localItf); + } + + iov.iov_base = reinterpret_cast<void*>(const_cast<char*>(buffer)); + iov.iov_len = len; + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_flags = 0; + + size_t sent = 0; + bool firstTry = true; + + do { + +#ifdef MSG_FASTOPEN + if (flags & MSG_FASTOPEN && firstTry == false) { + flags &= ~MSG_FASTOPEN; + } +#endif /* MSG_FASTOPEN */ + + ssize_t res = sendmsg(fd, &msgh, flags); + + if (res > 0) { + size_t written = static_cast<size_t>(res); + sent += written; + + if (sent == len) { + return sent; + } + + /* partial write */ + firstTry = false; + iov.iov_len -= written; + iov.iov_base = reinterpret_cast<void*>(reinterpret_cast<char*>(iov.iov_base) + written); + } + else if (res == 0) { + return res; + } + else if (res == -1) { + int err = errno; + if (err == EINTR) { + continue; + } + else if (err == EAGAIN || err == EWOULDBLOCK || err == EINPROGRESS || err == ENOTCONN) { + /* EINPROGRESS might happen with non blocking socket, + especially with TCP Fast Open */ + return sent; + } + else { + unixDie("failed in sendMsgWithTimeout"); + } + } + } + while (true); + + return 0; +} + +template class NetmaskTree<bool, Netmask>; + +/* requires a non-blocking socket. + On Linux, we could use MSG_DONTWAIT on a blocking socket + but this is not portable. +*/ +bool isTCPSocketUsable(int sock) +{ + int err = 0; + char buf = '\0'; + size_t buf_size = sizeof(buf); + + do { + ssize_t got = recv(sock, &buf, buf_size, MSG_PEEK); + + if (got > 0) { + /* socket is usable, some data is even waiting to be read */ + return true; + } + else if (got == 0) { + /* other end has closed the socket */ + return false; + } + else { + err = errno; + + if (err == EAGAIN || err == EWOULDBLOCK) { + /* socket is usable, no data waiting */ + return true; + } + else { + if (err != EINTR) { + /* something is wrong, could be ECONNRESET, + ENOTCONN, EPIPE, but anyway this socket is + not usable. */ + return false; + } + } + } + } while (err == EINTR); + + return false; +} +/* mission in life: parse four cases + 1) [2002::1]:53 + 2) 1.2.3.4 + 3) 1.2.3.4:5300 + 4) 2001::1 no port allowed +*/ + +ComboAddress parseIPAndPort(const std::string& input, uint16_t port) +{ + if (input[0] == '[') { // case 1 + auto both = splitField(input.substr(1), ']'); + return ComboAddress(both.first, both.second.empty() ? port : static_cast<uint16_t>(pdns_stou(both.second.substr(1)))); + } + + string::size_type count = 0; + for (char c : input) { + if (c == ':') { + count++; + } + if (count > 1) { + break; + } + } + switch (count) { + case 0: // case 2 + return ComboAddress(input, port); + case 1: { // case 3 + string::size_type cpos = input.rfind(':'); + pair<std::string,std::string> both; + both.first = input.substr(0, cpos); + both.second = input.substr(cpos + 1); + + uint16_t newport = static_cast<uint16_t>(pdns_stou(both.second)); + return ComboAddress(both.first, newport); + } + default: // case 4 + return ComboAddress(input, port); + } +} + +void setSocketBuffer(int fd, int optname, uint32_t size) +{ + uint32_t psize = 0; + socklen_t len = sizeof(psize); + + if (!getsockopt(fd, SOL_SOCKET, optname, &psize, &len) && psize > size) { + throw std::runtime_error("Not decreasing socket buffer size from " + std::to_string(psize) + " to " + std::to_string(size)); + } + + if (setsockopt(fd, SOL_SOCKET, optname, &size, sizeof(size)) < 0) { + throw std::runtime_error("Unable to raise socket buffer size to " + std::to_string(size) + ": " + stringerror()); + } +} + +void setSocketReceiveBuffer(int fd, uint32_t size) +{ + setSocketBuffer(fd, SO_RCVBUF, size); +} + +void setSocketSendBuffer(int fd, uint32_t size) +{ + setSocketBuffer(fd, SO_SNDBUF, size); +} |