/* anet.c -- Basic TCP socket stuff made a bit less boring * * Copyright (c) 2006-2012, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "fmacros.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "anet.h" #include "config.h" #define UNUSED(x) (void)(x) static void anetSetError(char *err, const char *fmt, ...) { va_list ap; if (!err) return; va_start(ap, fmt); vsnprintf(err, ANET_ERR_LEN, fmt, ap); va_end(ap); } int anetSetBlock(char *err, int fd, int non_block) { int flags; /* Set the socket blocking (if non_block is zero) or non-blocking. * Note that fcntl(2) for F_GETFL and F_SETFL can't be * interrupted by a signal. */ if ((flags = fcntl(fd, F_GETFL)) == -1) { anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno)); return ANET_ERR; } /* Check if this flag has been set or unset, if so, * then there is no need to call fcntl to set/unset it again. */ if (!!(flags & O_NONBLOCK) == !!non_block) return ANET_OK; if (non_block) flags |= O_NONBLOCK; else flags &= ~O_NONBLOCK; if (fcntl(fd, F_SETFL, flags) == -1) { anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno)); return ANET_ERR; } return ANET_OK; } int anetNonBlock(char *err, int fd) { return anetSetBlock(err,fd,1); } int anetBlock(char *err, int fd) { return anetSetBlock(err,fd,0); } /* Enable the FD_CLOEXEC on the given fd to avoid fd leaks. * This function should be invoked for fd's on specific places * where fork + execve system calls are called. */ int anetCloexec(int fd) { int r; int flags; do { r = fcntl(fd, F_GETFD); } while (r == -1 && errno == EINTR); if (r == -1 || (r & FD_CLOEXEC)) return r; flags = r | FD_CLOEXEC; do { r = fcntl(fd, F_SETFD, flags); } while (r == -1 && errno == EINTR); return r; } /* Set TCP keep alive option to detect dead peers. The interval option * is only used for Linux as we are using Linux-specific APIs to set * the probe send time, interval, and count. */ int anetKeepAlive(char *err, int fd, int interval) { int val = 1; if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1) { anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno)); return ANET_ERR; } #ifdef __linux__ /* Default settings are more or less garbage, with the keepalive time * set to 7200 by default on Linux. Modify settings to make the feature * actually useful. */ /* Send first probe after interval. */ val = interval; if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) { anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno)); return ANET_ERR; } /* Send next probes after the specified interval. Note that we set the * delay as interval / 3, as we send three probes before detecting * an error (see the next setsockopt call). */ val = interval/3; if (val == 0) val = 1; if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) { anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); return ANET_ERR; } /* Consider the socket in error state after three we send three ACK * probes without getting a reply. */ val = 3; if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) { anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); return ANET_ERR; } #elif defined(__APPLE__) /* Set idle time with interval */ val = interval; if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0) { anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno)); return ANET_ERR; } #else ((void) interval); /* Avoid unused var warning for non Linux systems. */ #endif return ANET_OK; } static int anetSetTcpNoDelay(char *err, int fd, int val) { if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) == -1) { anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno)); return ANET_ERR; } return ANET_OK; } int anetEnableTcpNoDelay(char *err, int fd) { return anetSetTcpNoDelay(err, fd, 1); } int anetDisableTcpNoDelay(char *err, int fd) { return anetSetTcpNoDelay(err, fd, 0); } /* Set the socket send timeout (SO_SNDTIMEO socket option) to the specified * number of milliseconds, or disable it if the 'ms' argument is zero. */ int anetSendTimeout(char *err, int fd, long long ms) { struct timeval tv; tv.tv_sec = ms/1000; tv.tv_usec = (ms%1000)*1000; if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) { anetSetError(err, "setsockopt SO_SNDTIMEO: %s", strerror(errno)); return ANET_ERR; } return ANET_OK; } /* Set the socket receive timeout (SO_RCVTIMEO socket option) to the specified * number of milliseconds, or disable it if the 'ms' argument is zero. */ int anetRecvTimeout(char *err, int fd, long long ms) { struct timeval tv; tv.tv_sec = ms/1000; tv.tv_usec = (ms%1000)*1000; if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) { anetSetError(err, "setsockopt SO_RCVTIMEO: %s", strerror(errno)); return ANET_ERR; } return ANET_OK; } /* Resolve the hostname "host" and set the string representation of the * IP address into the buffer pointed by "ipbuf". * * If flags is set to ANET_IP_ONLY the function only resolves hostnames * that are actually already IPv4 or IPv6 addresses. This turns the function * into a validating / normalizing function. */ int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags) { struct addrinfo hints, *info; int rv; memset(&hints,0,sizeof(hints)); if (flags & ANET_IP_ONLY) hints.ai_flags = AI_NUMERICHOST; hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; /* specify socktype to avoid dups */ if ((rv = getaddrinfo(host, NULL, &hints, &info)) != 0) { anetSetError(err, "%s", gai_strerror(rv)); return ANET_ERR; } if (info->ai_family == AF_INET) { struct sockaddr_in *sa = (struct sockaddr_in *)info->ai_addr; inet_ntop(AF_INET, &(sa->sin_addr), ipbuf, ipbuf_len); } else { struct sockaddr_in6 *sa = (struct sockaddr_in6 *)info->ai_addr; inet_ntop(AF_INET6, &(sa->sin6_addr), ipbuf, ipbuf_len); } freeaddrinfo(info); return ANET_OK; } static int anetSetReuseAddr(char *err, int fd) { int yes = 1; /* Make sure connection-intensive things like the redis benchmark * will be able to close/open sockets a zillion of times */ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) { anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno)); return ANET_ERR; } return ANET_OK; } static int anetCreateSocket(char *err, int domain) { int s; if ((s = socket(domain, SOCK_STREAM, 0)) == -1) { anetSetError(err, "creating socket: %s", strerror(errno)); return ANET_ERR; } /* Make sure connection-intensive things like the redis benchmark * will be able to close/open sockets a zillion of times */ if (anetSetReuseAddr(err,s) == ANET_ERR) { close(s); return ANET_ERR; } return s; } #define ANET_CONNECT_NONE 0 #define ANET_CONNECT_NONBLOCK 1 #define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */ static int anetTcpGenericConnect(char *err, const char *addr, int port, const char *source_addr, int flags) { int s = ANET_ERR, rv; char portstr[6]; /* strlen("65535") + 1; */ struct addrinfo hints, *servinfo, *bservinfo, *p, *b; snprintf(portstr,sizeof(portstr),"%d",port); memset(&hints,0,sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; if ((rv = getaddrinfo(addr,portstr,&hints,&servinfo)) != 0) { anetSetError(err, "%s", gai_strerror(rv)); return ANET_ERR; } for (p = servinfo; p != NULL; p = p->ai_next) { /* Try to create the socket and to connect it. * If we fail in the socket() call, or on connect(), we retry with * the next entry in servinfo. */ if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1) continue; if (anetSetReuseAddr(err,s) == ANET_ERR) goto error; if (flags & ANET_CONNECT_NONBLOCK && anetNonBlock(err,s) != ANET_OK) goto error; if (source_addr) { int bound = 0; /* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */ if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0) { anetSetError(err, "%s", gai_strerror(rv)); goto error; } for (b = bservinfo; b != NULL; b = b->ai_next) { if (bind(s,b->ai_addr,b->ai_addrlen) != -1) { bound = 1; break; } } freeaddrinfo(bservinfo); if (!bound) { anetSetError(err, "bind: %s", strerror(errno)); goto error; } } if (connect(s,p->ai_addr,p->ai_addrlen) == -1) { /* If the socket is non-blocking, it is ok for connect() to * return an EINPROGRESS error here. */ if (errno == EINPROGRESS && flags & ANET_CONNECT_NONBLOCK) goto end; close(s); s = ANET_ERR; continue; } /* If we ended an iteration of the for loop without errors, we * have a connected socket. Let's return to the caller. */ goto end; } if (p == NULL) anetSetError(err, "creating socket: %s", strerror(errno)); error: if (s != ANET_ERR) { close(s); s = ANET_ERR; } end: freeaddrinfo(servinfo); /* Handle best effort binding: if a binding address was used, but it is * not possible to create a socket, try again without a binding address. */ if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) { return anetTcpGenericConnect(err,addr,port,NULL,flags); } else { return s; } } int anetTcpNonBlockConnect(char *err, const char *addr, int port) { return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK); } int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port, const char *source_addr) { return anetTcpGenericConnect(err,addr,port,source_addr, ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING); } int anetUnixGenericConnect(char *err, const char *path, int flags) { int s; struct sockaddr_un sa; if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR) return ANET_ERR; sa.sun_family = AF_LOCAL; strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1); if (flags & ANET_CONNECT_NONBLOCK) { if (anetNonBlock(err,s) != ANET_OK) { close(s); return ANET_ERR; } } if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) { if (errno == EINPROGRESS && flags & ANET_CONNECT_NONBLOCK) return s; anetSetError(err, "connect: %s", strerror(errno)); close(s); return ANET_ERR; } return s; } static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len, int backlog, mode_t perm) { if (bind(s,sa,len) == -1) { anetSetError(err, "bind: %s", strerror(errno)); close(s); return ANET_ERR; } if (sa->sa_family == AF_LOCAL && perm) chmod(((struct sockaddr_un *) sa)->sun_path, perm); if (listen(s, backlog) == -1) { anetSetError(err, "listen: %s", strerror(errno)); close(s); return ANET_ERR; } return ANET_OK; } static int anetV6Only(char *err, int s) { int yes = 1; if (setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,&yes,sizeof(yes)) == -1) { anetSetError(err, "setsockopt: %s", strerror(errno)); return ANET_ERR; } return ANET_OK; } static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog) { int s = -1, rv; char _port[6]; /* strlen("65535") */ struct addrinfo hints, *servinfo, *p; snprintf(_port,6,"%d",port); memset(&hints,0,sizeof(hints)); hints.ai_family = af; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; /* No effect if bindaddr != NULL */ if (bindaddr && !strcmp("*", bindaddr)) bindaddr = NULL; if (af == AF_INET6 && bindaddr && !strcmp("::*", bindaddr)) bindaddr = NULL; if ((rv = getaddrinfo(bindaddr,_port,&hints,&servinfo)) != 0) { anetSetError(err, "%s", gai_strerror(rv)); return ANET_ERR; } for (p = servinfo; p != NULL; p = p->ai_next) { if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1) continue; if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error; if (anetSetReuseAddr(err,s) == ANET_ERR) goto error; if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog,0) == ANET_ERR) s = ANET_ERR; goto end; } if (p == NULL) { anetSetError(err, "unable to bind socket, errno: %d", errno); goto error; } error: if (s != -1) close(s); s = ANET_ERR; end: freeaddrinfo(servinfo); return s; } int anetTcpServer(char *err, int port, char *bindaddr, int backlog) { return _anetTcpServer(err, port, bindaddr, AF_INET, backlog); } int anetTcp6Server(char *err, int port, char *bindaddr, int backlog) { return _anetTcpServer(err, port, bindaddr, AF_INET6, backlog); } int anetUnixServer(char *err, char *path, mode_t perm, int backlog) { int s; struct sockaddr_un sa; if (strlen(path) > sizeof(sa.sun_path)-1) { anetSetError(err,"unix socket path too long (%zu), must be under %zu", strlen(path), sizeof(sa.sun_path)); return ANET_ERR; } if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR) return ANET_ERR; memset(&sa,0,sizeof(sa)); sa.sun_family = AF_LOCAL; strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1); if (anetListen(err,s,(struct sockaddr*)&sa,sizeof(sa),backlog,perm) == ANET_ERR) return ANET_ERR; return s; } /* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC. * returns the new socket FD, or -1 on error. */ static int anetGenericAccept(char *err, int s, struct sockaddr *sa, socklen_t *len) { int fd; do { /* Use the accept4() call on linux to simultaneously accept and * set a socket as non-blocking. */ #ifdef HAVE_ACCEPT4 fd = accept4(s, sa, len, SOCK_NONBLOCK | SOCK_CLOEXEC); #else fd = accept(s,sa,len); #endif } while(fd == -1 && errno == EINTR); if (fd == -1) { anetSetError(err, "accept: %s", strerror(errno)); return ANET_ERR; } #ifndef HAVE_ACCEPT4 if (anetCloexec(fd) == -1) { anetSetError(err, "anetCloexec: %s", strerror(errno)); close(fd); return ANET_ERR; } if (anetNonBlock(err, fd) != ANET_OK) { close(fd); return ANET_ERR; } #endif return fd; } /* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC. * returns the new socket FD, or -1 on error. */ int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port) { int fd; struct sockaddr_storage sa; socklen_t salen = sizeof(sa); if ((fd = anetGenericAccept(err,serversock,(struct sockaddr*)&sa,&salen)) == ANET_ERR) return ANET_ERR; if (sa.ss_family == AF_INET) { struct sockaddr_in *s = (struct sockaddr_in *)&sa; if (ip) inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len); if (port) *port = ntohs(s->sin_port); } else { struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa; if (ip) inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len); if (port) *port = ntohs(s->sin6_port); } return fd; } /* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC. * returns the new socket FD, or -1 on error. */ int anetUnixAccept(char *err, int s) { int fd; struct sockaddr_un sa; socklen_t salen = sizeof(sa); if ((fd = anetGenericAccept(err,s,(struct sockaddr*)&sa,&salen)) == ANET_ERR) return ANET_ERR; return fd; } int anetFdToString(int fd, char *ip, size_t ip_len, int *port, int fd_to_str_type) { struct sockaddr_storage sa; socklen_t salen = sizeof(sa); if (fd_to_str_type == FD_TO_PEER_NAME) { if (getpeername(fd, (struct sockaddr *)&sa, &salen) == -1) goto error; } else { if (getsockname(fd, (struct sockaddr *)&sa, &salen) == -1) goto error; } if (sa.ss_family == AF_INET) { struct sockaddr_in *s = (struct sockaddr_in *)&sa; if (ip) { if (inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len) == NULL) goto error; } if (port) *port = ntohs(s->sin_port); } else if (sa.ss_family == AF_INET6) { struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa; if (ip) { if (inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len) == NULL) goto error; } if (port) *port = ntohs(s->sin6_port); } else if (sa.ss_family == AF_UNIX) { if (ip) { int res = snprintf(ip, ip_len, "/unixsocket"); if (res < 0 || (unsigned int) res >= ip_len) goto error; } if (port) *port = 0; } else { goto error; } return 0; error: if (ip) { if (ip_len >= 2) { ip[0] = '?'; ip[1] = '\0'; } else if (ip_len == 1) { ip[0] = '\0'; } } if (port) *port = 0; return -1; } /* Format an IP,port pair into something easy to parse. If IP is IPv6 * (matches for ":"), the ip is surrounded by []. IP and port are just * separated by colons. This the standard to display addresses within Redis. */ int anetFormatAddr(char *buf, size_t buf_len, char *ip, int port) { return snprintf(buf,buf_len, strchr(ip,':') ? "[%s]:%d" : "%s:%d", ip, port); } /* Like anetFormatAddr() but extract ip and port from the socket's peer/sockname. */ int anetFormatFdAddr(int fd, char *buf, size_t buf_len, int fd_to_str_type) { char ip[INET6_ADDRSTRLEN]; int port; anetFdToString(fd,ip,sizeof(ip),&port,fd_to_str_type); return anetFormatAddr(buf, buf_len, ip, port); } /* Create a pipe buffer with given flags for read end and write end. * Note that it supports the file flags defined by pipe2() and fcntl(F_SETFL), * and one of the use cases is O_CLOEXEC|O_NONBLOCK. */ int anetPipe(int fds[2], int read_flags, int write_flags) { int pipe_flags = 0; #if defined(__linux__) || defined(__FreeBSD__) /* When possible, try to leverage pipe2() to apply flags that are common to both ends. * There is no harm to set O_CLOEXEC to prevent fd leaks. */ pipe_flags = O_CLOEXEC | (read_flags & write_flags); if (pipe2(fds, pipe_flags)) { /* Fail on real failures, and fallback to simple pipe if pipe2 is unsupported. */ if (errno != ENOSYS && errno != EINVAL) return -1; pipe_flags = 0; } else { /* If the flags on both ends are identical, no need to do anything else. */ if ((O_CLOEXEC | read_flags) == (O_CLOEXEC | write_flags)) return 0; /* Clear the flags which have already been set using pipe2. */ read_flags &= ~pipe_flags; write_flags &= ~pipe_flags; } #endif /* When we reach here with pipe_flags of 0, it means pipe2 failed (or was not attempted), * so we try to use pipe. Otherwise, we skip and proceed to set specific flags below. */ if (pipe_flags == 0 && pipe(fds)) return -1; /* File descriptor flags. * Currently, only one such flag is defined: FD_CLOEXEC, the close-on-exec flag. */ if (read_flags & O_CLOEXEC) if (fcntl(fds[0], F_SETFD, FD_CLOEXEC)) goto error; if (write_flags & O_CLOEXEC) if (fcntl(fds[1], F_SETFD, FD_CLOEXEC)) goto error; /* File status flags after clearing the file descriptor flag O_CLOEXEC. */ read_flags &= ~O_CLOEXEC; if (read_flags) if (fcntl(fds[0], F_SETFL, read_flags)) goto error; write_flags &= ~O_CLOEXEC; if (write_flags) if (fcntl(fds[1], F_SETFL, write_flags)) goto error; return 0; error: close(fds[0]); close(fds[1]); return -1; } int anetSetSockMarkId(char *err, int fd, uint32_t id) { #ifdef HAVE_SOCKOPTMARKID if (setsockopt(fd, SOL_SOCKET, SOCKOPTMARKID, (void *)&id, sizeof(id)) == -1) { anetSetError(err, "setsockopt: %s", strerror(errno)); return ANET_ERR; } return ANET_OK; #else UNUSED(fd); UNUSED(id); anetSetError(err,"anetSetSockMarkid unsupported on this platform"); return ANET_OK; #endif }