diff options
Diffstat (limited to 'lib/tsocket/tsocket_bsd.c')
-rw-r--r-- | lib/tsocket/tsocket_bsd.c | 2845 |
1 files changed, 2845 insertions, 0 deletions
diff --git a/lib/tsocket/tsocket_bsd.c b/lib/tsocket/tsocket_bsd.c new file mode 100644 index 0000000..4483b03 --- /dev/null +++ b/lib/tsocket/tsocket_bsd.c @@ -0,0 +1,2845 @@ +/* + Unix SMB/CIFS implementation. + + Copyright (C) Stefan Metzmacher 2009 + + ** NOTE! The following LGPL license applies to the tsocket + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/network.h" +#include "tsocket.h" +#include "tsocket_internal.h" +#include "lib/util/iov_buf.h" +#include "lib/util/blocking.h" +#include "lib/util/util_net.h" +#include "lib/util/samba_util.h" + +static int tsocket_bsd_error_from_errno(int ret, + int sys_errno, + bool *retry) +{ + *retry = false; + + if (ret >= 0) { + return 0; + } + + if (ret != -1) { + return EIO; + } + + if (sys_errno == 0) { + return EIO; + } + + if (sys_errno == EINTR) { + *retry = true; + return sys_errno; + } + + if (sys_errno == EINPROGRESS) { + *retry = true; + return sys_errno; + } + + if (sys_errno == EAGAIN) { + *retry = true; + return sys_errno; + } + + /* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */ + if (sys_errno == ENOMEM) { + *retry = true; + return sys_errno; + } + +#ifdef EWOULDBLOCK + if (sys_errno == EWOULDBLOCK) { + *retry = true; + return sys_errno; + } +#endif + + return sys_errno; +} + +static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd) +{ + int i; + int sys_errno = 0; + int fds[3]; + int num_fds = 0; + + int result; + bool ok; + + if (fd == -1) { + return -1; + } + + /* first make a fd >= 3 */ + if (high_fd) { + while (fd < 3) { + fds[num_fds++] = fd; + fd = dup(fd); + if (fd == -1) { + sys_errno = errno; + break; + } + } + for (i=0; i<num_fds; i++) { + close(fds[i]); + } + if (fd == -1) { + errno = sys_errno; + return fd; + } + } + + result = set_blocking(fd, false); + if (result == -1) { + goto fail; + } + + ok = smb_set_close_on_exec(fd); + if (!ok) { + goto fail; + } + + return fd; + + fail: + if (fd != -1) { + sys_errno = errno; + close(fd); + errno = sys_errno; + } + return -1; +} + +#ifdef HAVE_LINUX_RTNETLINK_H +/** + * Get the amount of pending bytes from a netlink socket + * + * For some reason netlink sockets don't support querying the amount of pending + * data via ioctl with FIONREAD, which is what we use in tsocket_bsd_pending() + * below. + * + * We know we are on Linux as we're using netlink, which means we have a working + * MSG_TRUNC flag to recvmsg() as well, so we use that together with MSG_PEEK. + **/ +static ssize_t tsocket_bsd_netlink_pending(int fd) +{ + struct iovec iov; + struct msghdr msg; + char buf[1]; + + iov = (struct iovec) { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + + msg = (struct msghdr) { + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return recvmsg(fd, &msg, MSG_PEEK | MSG_TRUNC); +} +#else +static ssize_t tsocket_bsd_netlink_pending(int fd) +{ + errno = ENOSYS; + return -1; +} +#endif + +static ssize_t tsocket_bsd_pending(int fd) +{ + int ret; + int value = 0; + + ret = ioctl(fd, FIONREAD, &value); + if (ret == -1) { + return ret; + } + + if (ret != 0) { + /* this should not be reached */ + errno = EIO; + return -1; + } + + if (value != 0) { + return value; + } + + return samba_socket_poll_or_sock_error(fd); +} + +static const struct tsocket_address_ops tsocket_address_bsd_ops; + +int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx, + const struct sockaddr *sa, + size_t sa_socklen, + struct tsocket_address **_addr, + const char *location) +{ + struct tsocket_address *addr; + struct samba_sockaddr *bsda = NULL; + + if (sa_socklen < sizeof(sa->sa_family)) { + errno = EINVAL; + return -1; + } + + switch (sa->sa_family) { + case AF_UNIX: + if (sa_socklen > sizeof(struct sockaddr_un)) { + sa_socklen = sizeof(struct sockaddr_un); + } + break; + case AF_INET: + if (sa_socklen < sizeof(struct sockaddr_in)) { + errno = EINVAL; + return -1; + } + sa_socklen = sizeof(struct sockaddr_in); + break; +#ifdef HAVE_IPV6 + case AF_INET6: + if (sa_socklen < sizeof(struct sockaddr_in6)) { + errno = EINVAL; + return -1; + } + sa_socklen = sizeof(struct sockaddr_in6); + break; +#endif + default: + errno = EAFNOSUPPORT; + return -1; + } + + if (sa_socklen > sizeof(struct sockaddr_storage)) { + errno = EINVAL; + return -1; + } + + addr = tsocket_address_create(mem_ctx, + &tsocket_address_bsd_ops, + &bsda, + struct samba_sockaddr, + location); + if (!addr) { + errno = ENOMEM; + return -1; + } + + ZERO_STRUCTP(bsda); + + memcpy(&bsda->u.ss, sa, sa_socklen); + + bsda->sa_socklen = sa_socklen; +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + bsda->u.sa.sa_len = bsda->sa_socklen; +#endif + + *_addr = addr; + return 0; +} + +int _tsocket_address_bsd_from_samba_sockaddr(TALLOC_CTX *mem_ctx, + const struct samba_sockaddr *xs_addr, + struct tsocket_address **t_addr, + const char *location) +{ + return _tsocket_address_bsd_from_sockaddr(mem_ctx, + &xs_addr->u.sa, + xs_addr->sa_socklen, + t_addr, + location); +} + +ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr, + struct sockaddr *sa, + size_t sa_socklen) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + + if (!bsda) { + errno = EINVAL; + return -1; + } + + if (sa_socklen < bsda->sa_socklen) { + errno = EINVAL; + return -1; + } + + if (sa_socklen > bsda->sa_socklen) { + memset(sa, 0, sa_socklen); + sa_socklen = bsda->sa_socklen; + } + + memcpy(sa, &bsda->u.ss, sa_socklen); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sa->sa_len = sa_socklen; +#endif + return sa_socklen; +} + +bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + + if (!bsda) { + return false; + } + + switch (bsda->u.sa.sa_family) { + case AF_INET: + if (strcasecmp(fam, "ip") == 0) { + return true; + } + + if (strcasecmp(fam, "ipv4") == 0) { + return true; + } + + return false; +#ifdef HAVE_IPV6 + case AF_INET6: + if (strcasecmp(fam, "ip") == 0) { + return true; + } + + if (strcasecmp(fam, "ipv6") == 0) { + return true; + } + + return false; +#endif + } + + return false; +} + +int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx, + const char *fam, + const char *addr, + uint16_t port, + struct tsocket_address **_addr, + const char *location) +{ + struct addrinfo hints; + struct addrinfo *result = NULL; + char port_str[6]; + int ret; + + ZERO_STRUCT(hints); + /* + * we use SOCKET_STREAM here to get just one result + * back from getaddrinfo(). + */ + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; + + if (strcasecmp(fam, "ip") == 0) { + hints.ai_family = AF_UNSPEC; + if (!addr) { +#ifdef HAVE_IPV6 + addr = "::"; +#else + addr = "0.0.0.0"; +#endif + } + } else if (strcasecmp(fam, "ipv4") == 0) { + hints.ai_family = AF_INET; + if (!addr) { + addr = "0.0.0.0"; + } +#ifdef HAVE_IPV6 + } else if (strcasecmp(fam, "ipv6") == 0) { + hints.ai_family = AF_INET6; + if (!addr) { + addr = "::"; + } +#endif + } else { + errno = EAFNOSUPPORT; + return -1; + } + + snprintf(port_str, sizeof(port_str), "%u", port); + + ret = getaddrinfo(addr, port_str, &hints, &result); + if (ret != 0) { + switch (ret) { + case EAI_FAIL: + case EAI_NONAME: +#ifdef EAI_ADDRFAMILY + case EAI_ADDRFAMILY: +#endif + errno = EINVAL; + break; + } + ret = -1; + goto done; + } + + if (result->ai_socktype != SOCK_STREAM) { + errno = EINVAL; + ret = -1; + goto done; + } + + ret = _tsocket_address_bsd_from_sockaddr(mem_ctx, + result->ai_addr, + result->ai_addrlen, + _addr, + location); + +done: + if (result) { + freeaddrinfo(result); + } + return ret; +} + +int _tsocket_address_inet_from_hostport_strings(TALLOC_CTX *mem_ctx, + const char *fam, + const char *host_port_addr, + uint16_t default_port, + struct tsocket_address **_addr, + const char *location) +{ + char *pl_sq = NULL; + char *pr_sq = NULL; + char *pl_period = NULL; + char *port_sep = NULL; + char *cport = NULL; + char *buf = NULL; + uint64_t port = 0; + int ret; + char *s_addr = NULL; + uint16_t s_port = default_port; + bool conv_ret; + bool is_ipv6_by_squares = false; + + if (host_port_addr == NULL) { + /* got straight to next function if host_port_addr is NULL */ + goto get_addr; + } + buf = talloc_strdup(mem_ctx, host_port_addr); + if (buf == NULL) { + errno = ENOMEM; + return -1; + } + pl_period = strchr_m(buf, '.'); + port_sep = strrchr_m(buf, ':'); + pl_sq = strchr_m(buf, '['); + pr_sq = strrchr_m(buf, ']'); + /* See if its IPv4 or IPv6 */ + /* Only parse IPv6 with squares with/without port, and IPv4 with port */ + /* Everything else, let tsocket_address_inet_from string() */ + /* find parsing errors */ +#ifdef HAVE_IPV6 + is_ipv6_by_squares = (pl_sq != NULL && pr_sq != NULL && pr_sq > pl_sq); +#endif + if (is_ipv6_by_squares) { + /* IPv6 possibly with port - squares detected */ + port_sep = pr_sq + 1; + if (*port_sep == '\0') { + s_addr = pl_sq + 1; + *pr_sq = 0; + s_port = default_port; + goto get_addr; + } + if (*port_sep != ':') { + errno = EINVAL; + return -1; + } + cport = port_sep + 1; + conv_ret = conv_str_u64(cport, &port); + if (!conv_ret) { + errno = EINVAL; + return -1; + } + if (port > 65535) { + errno = EINVAL; + return -1; + } + s_port = (uint16_t)port; + *port_sep = 0; + *pr_sq = 0; + s_addr = pl_sq + 1; + *pl_sq = 0; + goto get_addr; + } else if (pl_period != NULL && port_sep != NULL) { + /* IPv4 with port - more than one period in string */ + cport = port_sep + 1; + conv_ret = conv_str_u64(cport, &port); + if (!conv_ret) { + errno = EINVAL; + return -1; + } + if (port > 65535) { + errno = EINVAL; + return -1; + } + s_port = (uint16_t)port; + *port_sep = 0; + s_addr = buf; + goto get_addr; + } else { + /* Everything else, let tsocket_address_inet_from string() */ + /* find parsing errors */ + s_addr = buf; + s_port = default_port; + goto get_addr; + } +get_addr: + ret = _tsocket_address_inet_from_strings( + mem_ctx, fam, s_addr, s_port, _addr, location); + + return ret; +} + +char *tsocket_address_inet_addr_string(const struct tsocket_address *addr, + TALLOC_CTX *mem_ctx) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + char addr_str[INET6_ADDRSTRLEN+1]; + const char *str; + + if (!bsda) { + errno = EINVAL; + return NULL; + } + + switch (bsda->u.sa.sa_family) { + case AF_INET: + str = inet_ntop(bsda->u.in.sin_family, + &bsda->u.in.sin_addr, + addr_str, sizeof(addr_str)); + break; +#ifdef HAVE_IPV6 + case AF_INET6: + str = inet_ntop(bsda->u.in6.sin6_family, + &bsda->u.in6.sin6_addr, + addr_str, sizeof(addr_str)); + break; +#endif + default: + errno = EINVAL; + return NULL; + } + + if (!str) { + return NULL; + } + + return talloc_strdup(mem_ctx, str); +} + +uint16_t tsocket_address_inet_port(const struct tsocket_address *addr) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + uint16_t port = 0; + + if (!bsda) { + errno = EINVAL; + return 0; + } + + switch (bsda->u.sa.sa_family) { + case AF_INET: + port = ntohs(bsda->u.in.sin_port); + break; +#ifdef HAVE_IPV6 + case AF_INET6: + port = ntohs(bsda->u.in6.sin6_port); + break; +#endif + default: + errno = EINVAL; + return 0; + } + + return port; +} + +int tsocket_address_inet_set_port(struct tsocket_address *addr, + uint16_t port) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + + if (!bsda) { + errno = EINVAL; + return -1; + } + + switch (bsda->u.sa.sa_family) { + case AF_INET: + bsda->u.in.sin_port = htons(port); + break; +#ifdef HAVE_IPV6 + case AF_INET6: + bsda->u.in6.sin6_port = htons(port); + break; +#endif + default: + errno = EINVAL; + return -1; + } + + return 0; +} + +bool tsocket_address_is_unix(const struct tsocket_address *addr) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + + if (!bsda) { + return false; + } + + switch (bsda->u.sa.sa_family) { + case AF_UNIX: + return true; + } + + return false; +} + +int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx, + const char *path, + struct tsocket_address **_addr, + const char *location) +{ + struct sockaddr_un un; + void *p = &un; + int ret; + + if (!path) { + path = ""; + } + + if (strlen(path) > sizeof(un.sun_path)-1) { + errno = ENAMETOOLONG; + return -1; + } + + ZERO_STRUCT(un); + un.sun_family = AF_UNIX; + strncpy(un.sun_path, path, sizeof(un.sun_path)-1); + + ret = _tsocket_address_bsd_from_sockaddr(mem_ctx, + (struct sockaddr *)p, + sizeof(un), + _addr, + location); + + return ret; +} + +char *tsocket_address_unix_path(const struct tsocket_address *addr, + TALLOC_CTX *mem_ctx) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + const char *str; + + if (!bsda) { + errno = EINVAL; + return NULL; + } + + switch (bsda->u.sa.sa_family) { + case AF_UNIX: + str = bsda->u.un.sun_path; + break; + default: + errno = EINVAL; + return NULL; + } + + return talloc_strdup(mem_ctx, str); +} + +static char *tsocket_address_bsd_string(const struct tsocket_address *addr, + TALLOC_CTX *mem_ctx) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + char *str; + char *addr_str; + const char *prefix = NULL; + uint16_t port; + + switch (bsda->u.sa.sa_family) { + case AF_UNIX: + return talloc_asprintf(mem_ctx, "unix:%s", + bsda->u.un.sun_path); + case AF_INET: + prefix = "ipv4"; + break; +#ifdef HAVE_IPV6 + case AF_INET6: + prefix = "ipv6"; + break; +#endif + default: + errno = EINVAL; + return NULL; + } + + addr_str = tsocket_address_inet_addr_string(addr, mem_ctx); + if (!addr_str) { + return NULL; + } + + port = tsocket_address_inet_port(addr); + + str = talloc_asprintf(mem_ctx, "%s:%s:%u", + prefix, addr_str, port); + talloc_free(addr_str); + + return str; +} + +static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr, + TALLOC_CTX *mem_ctx, + const char *location) +{ + struct samba_sockaddr *bsda = talloc_get_type(addr->private_data, + struct samba_sockaddr); + struct tsocket_address *copy; + int ret; + + ret = _tsocket_address_bsd_from_sockaddr(mem_ctx, + &bsda->u.sa, + bsda->sa_socklen, + ©, + location); + if (ret != 0) { + return NULL; + } + + return copy; +} + +static const struct tsocket_address_ops tsocket_address_bsd_ops = { + .name = "bsd", + .string = tsocket_address_bsd_string, + .copy = tsocket_address_bsd_copy, +}; + +struct tdgram_bsd { + int fd; + + void *event_ptr; + struct tevent_fd *fde; + bool optimize_recvfrom; + bool netlink; + + void *readable_private; + void (*readable_handler)(void *private_data); + void *writeable_private; + void (*writeable_handler)(void *private_data); +}; + +bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram, + bool on) +{ + struct tdgram_bsd *bsds = + talloc_get_type(_tdgram_context_data(dgram), + struct tdgram_bsd); + bool old; + + if (bsds == NULL) { + /* not a bsd socket */ + return false; + } + + old = bsds->optimize_recvfrom; + bsds->optimize_recvfrom = on; + + return old; +} + +static void tdgram_bsd_fde_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, + void *private_data) +{ + struct tdgram_bsd *bsds = talloc_get_type_abort(private_data, + struct tdgram_bsd); + + if (flags & TEVENT_FD_WRITE) { + bsds->writeable_handler(bsds->writeable_private); + return; + } + if (flags & TEVENT_FD_READ) { + if (!bsds->readable_handler) { + TEVENT_FD_NOT_READABLE(bsds->fde); + return; + } + bsds->readable_handler(bsds->readable_private); + return; + } +} + +static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds, + struct tevent_context *ev, + void (*handler)(void *private_data), + void *private_data) +{ + if (ev == NULL) { + if (handler) { + errno = EINVAL; + return -1; + } + if (!bsds->readable_handler) { + return 0; + } + bsds->readable_handler = NULL; + bsds->readable_private = NULL; + + return 0; + } + + /* read and write must use the same tevent_context */ + if (bsds->event_ptr != ev) { + if (bsds->readable_handler || bsds->writeable_handler) { + errno = EINVAL; + return -1; + } + bsds->event_ptr = NULL; + TALLOC_FREE(bsds->fde); + } + + if (tevent_fd_get_flags(bsds->fde) == 0) { + TALLOC_FREE(bsds->fde); + + bsds->fde = tevent_add_fd(ev, bsds, + bsds->fd, TEVENT_FD_READ, + tdgram_bsd_fde_handler, + bsds); + if (!bsds->fde) { + errno = ENOMEM; + return -1; + } + + /* cache the event context we're running on */ + bsds->event_ptr = ev; + } else if (!bsds->readable_handler) { + TEVENT_FD_READABLE(bsds->fde); + } + + bsds->readable_handler = handler; + bsds->readable_private = private_data; + + return 0; +} + +static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds, + struct tevent_context *ev, + void (*handler)(void *private_data), + void *private_data) +{ + if (ev == NULL) { + if (handler) { + errno = EINVAL; + return -1; + } + if (!bsds->writeable_handler) { + return 0; + } + bsds->writeable_handler = NULL; + bsds->writeable_private = NULL; + TEVENT_FD_NOT_WRITEABLE(bsds->fde); + + return 0; + } + + /* read and write must use the same tevent_context */ + if (bsds->event_ptr != ev) { + if (bsds->readable_handler || bsds->writeable_handler) { + errno = EINVAL; + return -1; + } + bsds->event_ptr = NULL; + TALLOC_FREE(bsds->fde); + } + + if (tevent_fd_get_flags(bsds->fde) == 0) { + TALLOC_FREE(bsds->fde); + + bsds->fde = tevent_add_fd(ev, bsds, + bsds->fd, TEVENT_FD_WRITE, + tdgram_bsd_fde_handler, + bsds); + if (!bsds->fde) { + errno = ENOMEM; + return -1; + } + + /* cache the event context we're running on */ + bsds->event_ptr = ev; + } else if (!bsds->writeable_handler) { + TEVENT_FD_WRITEABLE(bsds->fde); + } + + bsds->writeable_handler = handler; + bsds->writeable_private = private_data; + + return 0; +} + +struct tdgram_bsd_recvfrom_state { + struct tdgram_context *dgram; + bool first_try; + uint8_t *buf; + size_t len; + struct tsocket_address *src; +}; + +static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state) +{ + struct tdgram_bsd *bsds = tdgram_context_data(state->dgram, + struct tdgram_bsd); + + tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL); + + return 0; +} + +static void tdgram_bsd_recvfrom_handler(void *private_data); + +static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct tdgram_context *dgram) +{ + struct tevent_req *req; + struct tdgram_bsd_recvfrom_state *state; + struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd); + int ret; + + req = tevent_req_create(mem_ctx, &state, + struct tdgram_bsd_recvfrom_state); + if (!req) { + return NULL; + } + + state->dgram = dgram; + state->first_try= true; + state->buf = NULL; + state->len = 0; + state->src = NULL; + + talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor); + + if (bsds->fd == -1) { + tevent_req_error(req, ENOTCONN); + goto post; + } + + + /* + * this is a fast path, not waiting for the + * socket to become explicit readable gains + * about 10%-20% performance in benchmark tests. + */ + if (bsds->optimize_recvfrom) { + /* + * We only do the optimization on + * recvfrom if the caller asked for it. + * + * This is needed because in most cases + * we prefer to flush send buffers before + * receiving incoming requests. + */ + tdgram_bsd_recvfrom_handler(req); + if (!tevent_req_is_in_progress(req)) { + goto post; + } + } + + ret = tdgram_bsd_set_readable_handler(bsds, ev, + tdgram_bsd_recvfrom_handler, + req); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + + return req; + + post: + tevent_req_post(req, ev); + return req; +} + +static void tdgram_bsd_recvfrom_handler(void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort(private_data, + struct tevent_req); + struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req, + struct tdgram_bsd_recvfrom_state); + struct tdgram_context *dgram = state->dgram; + struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd); + struct samba_sockaddr *bsda = NULL; + ssize_t ret; + int err; + bool retry; + + if (bsds->netlink) { + ret = tsocket_bsd_netlink_pending(bsds->fd); + } else { + ret = tsocket_bsd_pending(bsds->fd); + } + + if (state->first_try && ret == 0) { + state->first_try = false; + /* retry later */ + return; + } + state->first_try = false; + + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { + /* retry later */ + return; + } + if (tevent_req_error(req, err)) { + return; + } + + /* note that 'ret' can be 0 here */ + state->buf = talloc_array(state, uint8_t, ret); + if (tevent_req_nomem(state->buf, req)) { + return; + } + state->len = ret; + + state->src = tsocket_address_create(state, + &tsocket_address_bsd_ops, + &bsda, + struct samba_sockaddr, + __location__ "bsd_recvfrom"); + if (tevent_req_nomem(state->src, req)) { + return; + } + + ZERO_STRUCTP(bsda); + bsda->sa_socklen = sizeof(bsda->u.ss); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + bsda->u.sa.sa_len = bsda->sa_socklen; +#endif + + ret = recvfrom(bsds->fd, state->buf, state->len, 0, + &bsda->u.sa, &bsda->sa_socklen); + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { + /* retry later */ + return; + } + if (tevent_req_error(req, err)) { + return; + } + + /* + * Some systems (FreeBSD, see bug #7115) return too much + * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...), + * the return value includes some IP/UDP header bytes, + * while recvfrom() just returns the payload. + */ + state->buf = talloc_realloc(state, state->buf, uint8_t, ret); + if (tevent_req_nomem(state->buf, req)) { + return; + } + state->len = ret; + + tevent_req_done(req); +} + +static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req, + int *perrno, + TALLOC_CTX *mem_ctx, + uint8_t **buf, + struct tsocket_address **src) +{ + struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req, + struct tdgram_bsd_recvfrom_state); + ssize_t ret; + + ret = tsocket_simple_int_recv(req, perrno); + if (ret == 0) { + *buf = talloc_move(mem_ctx, &state->buf); + ret = state->len; + if (src) { + *src = talloc_move(mem_ctx, &state->src); + } + } + + tevent_req_received(req); + return ret; +} + +struct tdgram_bsd_sendto_state { + struct tdgram_context *dgram; + + const uint8_t *buf; + size_t len; + const struct tsocket_address *dst; + + ssize_t ret; +}; + +static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state) +{ + struct tdgram_bsd *bsds = tdgram_context_data(state->dgram, + struct tdgram_bsd); + + tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL); + + return 0; +} + +static void tdgram_bsd_sendto_handler(void *private_data); + +static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct tdgram_context *dgram, + const uint8_t *buf, + size_t len, + const struct tsocket_address *dst) +{ + struct tevent_req *req; + struct tdgram_bsd_sendto_state *state; + struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd); + int ret; + + req = tevent_req_create(mem_ctx, &state, + struct tdgram_bsd_sendto_state); + if (!req) { + return NULL; + } + + state->dgram = dgram; + state->buf = buf; + state->len = len; + state->dst = dst; + state->ret = -1; + + talloc_set_destructor(state, tdgram_bsd_sendto_destructor); + + if (bsds->fd == -1) { + tevent_req_error(req, ENOTCONN); + goto post; + } + + /* + * this is a fast path, not waiting for the + * socket to become explicit writeable gains + * about 10%-20% performance in benchmark tests. + */ + tdgram_bsd_sendto_handler(req); + if (!tevent_req_is_in_progress(req)) { + goto post; + } + + ret = tdgram_bsd_set_writeable_handler(bsds, ev, + tdgram_bsd_sendto_handler, + req); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + + return req; + + post: + tevent_req_post(req, ev); + return req; +} + +static void tdgram_bsd_sendto_handler(void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort(private_data, + struct tevent_req); + struct tdgram_bsd_sendto_state *state = tevent_req_data(req, + struct tdgram_bsd_sendto_state); + struct tdgram_context *dgram = state->dgram; + struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd); + struct sockaddr *sa = NULL; + socklen_t sa_socklen = 0; + ssize_t ret; + int err; + bool retry; + + if (state->dst) { + struct samba_sockaddr *bsda = + talloc_get_type(state->dst->private_data, + struct samba_sockaddr); + + sa = &bsda->u.sa; + sa_socklen = bsda->sa_socklen; + } + + ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen); + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { + /* retry later */ + return; + } + + if (err == EMSGSIZE) { + /* round up in 1K increments */ + int bufsize = ((state->len + 1023) & (~1023)); + + ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + if (ret == 0) { + /* + * We do the retry here, rather then via the + * handler, as we only want to retry once for + * this condition, so if there is a mismatch + * between what setsockopt() accepts and what can + * actually be sent, we do not end up in a + * loop. + */ + + ret = sendto(bsds->fd, state->buf, state->len, + 0, sa, sa_socklen); + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { /* retry later */ + return; + } + } + } + + if (tevent_req_error(req, err)) { + return; + } + + state->ret = ret; + + tevent_req_done(req); +} + +static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno) +{ + struct tdgram_bsd_sendto_state *state = tevent_req_data(req, + struct tdgram_bsd_sendto_state); + ssize_t ret; + + ret = tsocket_simple_int_recv(req, perrno); + if (ret == 0) { + ret = state->ret; + } + + tevent_req_received(req); + return ret; +} + +struct tdgram_bsd_disconnect_state { + uint8_t __dummy; +}; + +static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct tdgram_context *dgram) +{ + struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd); + struct tevent_req *req; + struct tdgram_bsd_disconnect_state *state; + int ret; + int err; + bool dummy; + + req = tevent_req_create(mem_ctx, &state, + struct tdgram_bsd_disconnect_state); + if (req == NULL) { + return NULL; + } + + if (bsds->fd == -1) { + tevent_req_error(req, ENOTCONN); + goto post; + } + + TALLOC_FREE(bsds->fde); + ret = close(bsds->fd); + bsds->fd = -1; + err = tsocket_bsd_error_from_errno(ret, errno, &dummy); + if (tevent_req_error(req, err)) { + goto post; + } + + tevent_req_done(req); +post: + tevent_req_post(req, ev); + return req; +} + +static int tdgram_bsd_disconnect_recv(struct tevent_req *req, + int *perrno) +{ + int ret; + + ret = tsocket_simple_int_recv(req, perrno); + + tevent_req_received(req); + return ret; +} + +static const struct tdgram_context_ops tdgram_bsd_ops = { + .name = "bsd", + + .recvfrom_send = tdgram_bsd_recvfrom_send, + .recvfrom_recv = tdgram_bsd_recvfrom_recv, + + .sendto_send = tdgram_bsd_sendto_send, + .sendto_recv = tdgram_bsd_sendto_recv, + + .disconnect_send = tdgram_bsd_disconnect_send, + .disconnect_recv = tdgram_bsd_disconnect_recv, +}; + +static int tdgram_bsd_destructor(struct tdgram_bsd *bsds) +{ + TALLOC_FREE(bsds->fde); + if (bsds->fd != -1) { + close(bsds->fd); + bsds->fd = -1; + } + return 0; +} + +static int tdgram_bsd_dgram_socket(const struct tsocket_address *local, + const struct tsocket_address *remote, + bool broadcast, + TALLOC_CTX *mem_ctx, + struct tdgram_context **_dgram, + const char *location) +{ + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + struct samba_sockaddr *rbsda = NULL; + struct tdgram_context *dgram; + struct tdgram_bsd *bsds; + int fd; + int ret; + bool do_bind = false; + bool do_reuseaddr = false; + bool do_ipv6only = false; + bool is_inet = false; + int sa_fam = lbsda->u.sa.sa_family; + + if (remote) { + rbsda = talloc_get_type_abort(remote->private_data, + struct samba_sockaddr); + } + + switch (lbsda->u.sa.sa_family) { + case AF_UNIX: + if (broadcast) { + errno = EINVAL; + return -1; + } + if (lbsda->u.un.sun_path[0] != 0) { + do_reuseaddr = true; + do_bind = true; + } + break; + case AF_INET: + if (lbsda->u.in.sin_port != 0) { + do_reuseaddr = true; + do_bind = true; + } + if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) { + do_bind = true; + } + is_inet = true; + break; +#ifdef HAVE_IPV6 + case AF_INET6: + if (lbsda->u.in6.sin6_port != 0) { + do_reuseaddr = true; + do_bind = true; + } + if (memcmp(&in6addr_any, + &lbsda->u.in6.sin6_addr, + sizeof(in6addr_any)) != 0) { + do_bind = true; + } + is_inet = true; + do_ipv6only = true; + break; +#endif + default: + errno = EINVAL; + return -1; + } + + if (!do_bind && is_inet && rbsda) { + sa_fam = rbsda->u.sa.sa_family; + switch (sa_fam) { + case AF_INET: + do_ipv6only = false; + break; +#ifdef HAVE_IPV6 + case AF_INET6: + do_ipv6only = true; + break; +#endif + } + } + + fd = socket(sa_fam, SOCK_DGRAM, 0); + if (fd < 0) { + return -1; + } + + fd = tsocket_bsd_common_prepare_fd(fd, true); + if (fd < 0) { + return -1; + } + + dgram = tdgram_context_create(mem_ctx, + &tdgram_bsd_ops, + &bsds, + struct tdgram_bsd, + location); + if (!dgram) { + int saved_errno = errno; + close(fd); + errno = saved_errno; + return -1; + } + ZERO_STRUCTP(bsds); + bsds->fd = fd; + talloc_set_destructor(bsds, tdgram_bsd_destructor); + +#ifdef HAVE_IPV6 + if (do_ipv6only) { + int val = 1; + + ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, + (const void *)&val, sizeof(val)); + if (ret == -1) { + int saved_errno = errno; + talloc_free(dgram); + errno = saved_errno; + return -1; + } + } +#endif + + if (broadcast) { + int val = 1; + + ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST, + (const void *)&val, sizeof(val)); + if (ret == -1) { + int saved_errno = errno; + talloc_free(dgram); + errno = saved_errno; + return -1; + } + } + + if (do_reuseaddr) { + int val = 1; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + (const void *)&val, sizeof(val)); + if (ret == -1) { + int saved_errno = errno; + talloc_free(dgram); + errno = saved_errno; + return -1; + } + } + + if (do_bind) { + ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen); + if (ret == -1) { + int saved_errno = errno; + talloc_free(dgram); + errno = saved_errno; + return -1; + } + } + + if (rbsda) { + if (rbsda->u.sa.sa_family != sa_fam) { + talloc_free(dgram); + errno = EINVAL; + return -1; + } + + ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen); + if (ret == -1) { + int saved_errno = errno; + talloc_free(dgram); + errno = saved_errno; + return -1; + } + } + + *_dgram = dgram; + return 0; +} + +int _tdgram_bsd_existing_socket(TALLOC_CTX *mem_ctx, + int fd, + struct tdgram_context **_dgram, + const char *location) +{ + struct tdgram_context *dgram; + struct tdgram_bsd *bsds; +#ifdef HAVE_LINUX_RTNETLINK_H + int result; + struct sockaddr sa; + socklen_t sa_len = sizeof(struct sockaddr); +#endif + + dgram = tdgram_context_create(mem_ctx, + &tdgram_bsd_ops, + &bsds, + struct tdgram_bsd, + location); + if (!dgram) { + return -1; + } + ZERO_STRUCTP(bsds); + bsds->fd = fd; + talloc_set_destructor(bsds, tdgram_bsd_destructor); + + *_dgram = dgram; + +#ifdef HAVE_LINUX_RTNETLINK_H + /* + * Try to determine the protocol family and remember if it's + * AF_NETLINK. We don't care if this fails. + */ + result = getsockname(fd, &sa, &sa_len); + if (result == 0 && sa.sa_family == AF_NETLINK) { + bsds->netlink = true; + } +#endif + + return 0; +} + +int _tdgram_inet_udp_socket(const struct tsocket_address *local, + const struct tsocket_address *remote, + TALLOC_CTX *mem_ctx, + struct tdgram_context **dgram, + const char *location) +{ + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + int ret; + + switch (lbsda->u.sa.sa_family) { + case AF_INET: + break; +#ifdef HAVE_IPV6 + case AF_INET6: + break; +#endif + default: + errno = EINVAL; + return -1; + } + + ret = tdgram_bsd_dgram_socket(local, remote, false, + mem_ctx, dgram, location); + + return ret; +} + +int _tdgram_inet_udp_broadcast_socket(const struct tsocket_address *local, + TALLOC_CTX *mem_ctx, + struct tdgram_context **dgram, + const char *location) +{ + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + int ret; + + switch (lbsda->u.sa.sa_family) { + case AF_INET: + break; +#ifdef HAVE_IPV6 + case AF_INET6: + /* only ipv4 */ + errno = EINVAL; + return -1; +#endif + default: + errno = EINVAL; + return -1; + } + + ret = tdgram_bsd_dgram_socket(local, NULL, true, + mem_ctx, dgram, location); + + return ret; +} + +int _tdgram_unix_socket(const struct tsocket_address *local, + const struct tsocket_address *remote, + TALLOC_CTX *mem_ctx, + struct tdgram_context **dgram, + const char *location) +{ + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + int ret; + + switch (lbsda->u.sa.sa_family) { + case AF_UNIX: + break; + default: + errno = EINVAL; + return -1; + } + + ret = tdgram_bsd_dgram_socket(local, remote, false, + mem_ctx, dgram, location); + + return ret; +} + +struct tstream_bsd { + int fd; + int error; + + void *event_ptr; + struct tevent_fd *fde; + bool optimize_readv; + bool fail_readv_first_error; + + void *readable_private; + void (*readable_handler)(void *private_data); + void *writeable_private; + void (*writeable_handler)(void *private_data); +}; + +bool tstream_bsd_optimize_readv(struct tstream_context *stream, + bool on) +{ + struct tstream_bsd *bsds = + talloc_get_type(_tstream_context_data(stream), + struct tstream_bsd); + bool old; + + if (bsds == NULL) { + /* not a bsd socket */ + return false; + } + + old = bsds->optimize_readv; + bsds->optimize_readv = on; + + return old; +} + +bool tstream_bsd_fail_readv_first_error(struct tstream_context *stream, + bool on) +{ + struct tstream_bsd *bsds = + talloc_get_type(_tstream_context_data(stream), + struct tstream_bsd); + bool old; + + if (bsds == NULL) { + /* not a bsd socket */ + return false; + } + + old = bsds->fail_readv_first_error; + bsds->fail_readv_first_error = on; + + return old; +} + +static void tstream_bsd_fde_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, + void *private_data) +{ + struct tstream_bsd *bsds = talloc_get_type_abort(private_data, + struct tstream_bsd); + + if (flags & TEVENT_FD_ERROR) { + /* + * We lazily keep TEVENT_FD_READ alive + * in tstream_bsd_set_readable_handler() + * + * So we have to check TEVENT_FD_READ + * as well as bsds->readable_handler + * + * We only drain remaining data from the + * the recv queue if available and desired. + */ + if ((flags & TEVENT_FD_READ) && + !bsds->fail_readv_first_error && + (bsds->readable_handler != NULL)) + { + /* + * If there's still data to read + * we allow it to be read until + * we reach EOF (=> EPIPE). + */ + bsds->readable_handler(bsds->readable_private); + return; + } + + /* + * If there's no data left to read, + * we get the error. + * + * It means we no longer call any readv or + * writev, as bsds->error is checked first. + */ + if (bsds->error == 0) { + int ret = samba_socket_poll_or_sock_error(bsds->fd); + + if (ret == -1) { + bsds->error = errno; + } + /* fallback to EPIPE */ + if (bsds->error == 0) { + bsds->error = EPIPE; + } + } + + /* + * Let write to fail early. + * + * Note we only need to check TEVENT_FD_WRITE + * as tstream_bsd_set_writeable_handler() + * clear it together with the handler. + */ + if (flags & TEVENT_FD_WRITE) { + bsds->writeable_handler(bsds->writeable_private); + return; + } + + /* We prefer the readable handler to fire first. */ + if (bsds->readable_handler != NULL) { + bsds->readable_handler(bsds->readable_private); + return; + } + + /* As last resort we notify the writeable handler */ + if (bsds->writeable_handler != NULL) { + bsds->writeable_handler(bsds->writeable_private); + return; + } + + /* + * We may hit this because we don't clear TEVENT_FD_ERROR + * in tstream_bsd_set_readable_handler() nor + * tstream_bsd_set_writeable_handler(). + * + * As we already captured the error, we can remove + * the fde completely. + */ + TALLOC_FREE(bsds->fde); + return; + } + if (flags & TEVENT_FD_WRITE) { + bsds->writeable_handler(bsds->writeable_private); + return; + } + if (flags & TEVENT_FD_READ) { + if (!bsds->readable_handler) { + /* + * tstream_bsd_set_readable_handler + * doesn't clear TEVENT_FD_READ. + * + * In order to avoid cpu-spinning + * we need to clear it here. + */ + TEVENT_FD_NOT_READABLE(bsds->fde); + + /* + * Here we're lazy and keep TEVENT_FD_ERROR + * alive. If it's triggered the next time + * we'll handle it gracefully above + * and end up with TALLOC_FREE(bsds->fde); + * in order to spin on TEVENT_FD_ERROR. + */ + return; + } + bsds->readable_handler(bsds->readable_private); + return; + } +} + +static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds, + struct tevent_context *ev, + void (*handler)(void *private_data), + void *private_data) +{ + if (ev == NULL) { + if (handler) { + errno = EINVAL; + return -1; + } + if (!bsds->readable_handler) { + return 0; + } + bsds->readable_handler = NULL; + bsds->readable_private = NULL; + + /* + * Here we are lazy as it's very likely that the next + * tevent_readv_send() will come in shortly, + * so we keep TEVENT_FD_READ alive. + */ + return 0; + } + + /* read and write must use the same tevent_context */ + if (bsds->event_ptr != ev) { + if (bsds->readable_handler || bsds->writeable_handler) { + errno = EINVAL; + return -1; + } + bsds->event_ptr = NULL; + TALLOC_FREE(bsds->fde); + } + + if (tevent_fd_get_flags(bsds->fde) == 0) { + TALLOC_FREE(bsds->fde); + + bsds->fde = tevent_add_fd(ev, bsds, + bsds->fd, + TEVENT_FD_ERROR | TEVENT_FD_READ, + tstream_bsd_fde_handler, + bsds); + if (!bsds->fde) { + errno = ENOMEM; + return -1; + } + + /* cache the event context we're running on */ + bsds->event_ptr = ev; + } else if (!bsds->readable_handler) { + TEVENT_FD_READABLE(bsds->fde); + /* + * TEVENT_FD_ERROR is likely already set, so + * TEVENT_FD_WANTERROR() is most likely a no-op. + */ + TEVENT_FD_WANTERROR(bsds->fde); + } + + bsds->readable_handler = handler; + bsds->readable_private = private_data; + + return 0; +} + +static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds, + struct tevent_context *ev, + void (*handler)(void *private_data), + void *private_data) +{ + if (ev == NULL) { + if (handler) { + errno = EINVAL; + return -1; + } + if (!bsds->writeable_handler) { + return 0; + } + bsds->writeable_handler = NULL; + bsds->writeable_private = NULL; + + /* + * The writeable handler is only + * set if we got EAGAIN or a short + * writev on the first try, so + * this isn't the hot path. + * + * Here we are lazy and leave TEVENT_FD_ERROR + * alive as it's shared with the readable + * handler. So we only clear TEVENT_FD_WRITE. + */ + TEVENT_FD_NOT_WRITEABLE(bsds->fde); + return 0; + } + + /* read and write must use the same tevent_context */ + if (bsds->event_ptr != ev) { + if (bsds->readable_handler || bsds->writeable_handler) { + errno = EINVAL; + return -1; + } + bsds->event_ptr = NULL; + TALLOC_FREE(bsds->fde); + } + + if (tevent_fd_get_flags(bsds->fde) == 0) { + TALLOC_FREE(bsds->fde); + + bsds->fde = tevent_add_fd(ev, bsds, + bsds->fd, + TEVENT_FD_ERROR | TEVENT_FD_WRITE, + tstream_bsd_fde_handler, + bsds); + if (!bsds->fde) { + errno = ENOMEM; + return -1; + } + + /* cache the event context we're running on */ + bsds->event_ptr = ev; + } else if (!bsds->writeable_handler) { + TEVENT_FD_WRITEABLE(bsds->fde); + /* + * TEVENT_FD_ERROR is likely already set, so + * TEVENT_FD_WANTERROR() is most likely a no-op. + */ + TEVENT_FD_WANTERROR(bsds->fde); + } + + bsds->writeable_handler = handler; + bsds->writeable_private = private_data; + + return 0; +} + +static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream) +{ + struct tstream_bsd *bsds = tstream_context_data(stream, + struct tstream_bsd); + ssize_t ret; + + if (bsds->fd == -1) { + errno = ENOTCONN; + return -1; + } + + if (bsds->error != 0) { + errno = bsds->error; + return -1; + } + + ret = tsocket_bsd_pending(bsds->fd); + if (ret == -1) { + /* + * remember the error and don't + * allow further requests + */ + bsds->error = errno; + } + + return ret; +} + +struct tstream_bsd_readv_state { + struct tstream_context *stream; + + struct iovec *vector; + size_t count; + + int ret; +}; + +static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state) +{ + struct tstream_bsd *bsds = tstream_context_data(state->stream, + struct tstream_bsd); + + tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL); + + return 0; +} + +static void tstream_bsd_readv_handler(void *private_data); + +static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct tstream_context *stream, + struct iovec *vector, + size_t count) +{ + struct tevent_req *req; + struct tstream_bsd_readv_state *state; + struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd); + int ret; + + req = tevent_req_create(mem_ctx, &state, + struct tstream_bsd_readv_state); + if (!req) { + return NULL; + } + + state->stream = stream; + /* we make a copy of the vector so that we can modify it */ + state->vector = talloc_array(state, struct iovec, count); + if (tevent_req_nomem(state->vector, req)) { + goto post; + } + memcpy(state->vector, vector, sizeof(struct iovec)*count); + state->count = count; + state->ret = 0; + + talloc_set_destructor(state, tstream_bsd_readv_destructor); + + if (bsds->fd == -1) { + tevent_req_error(req, ENOTCONN); + goto post; + } + + /* + * this is a fast path, not waiting for the + * socket to become explicit readable gains + * about 10%-20% performance in benchmark tests. + */ + if (bsds->optimize_readv) { + /* + * We only do the optimization on + * readv if the caller asked for it. + * + * This is needed because in most cases + * we prefer to flush send buffers before + * receiving incoming requests. + */ + tstream_bsd_readv_handler(req); + if (!tevent_req_is_in_progress(req)) { + goto post; + } + } + + ret = tstream_bsd_set_readable_handler(bsds, ev, + tstream_bsd_readv_handler, + req); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + + return req; + + post: + tevent_req_post(req, ev); + return req; +} + +static void tstream_bsd_readv_handler(void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort(private_data, + struct tevent_req); + struct tstream_bsd_readv_state *state = tevent_req_data(req, + struct tstream_bsd_readv_state); + struct tstream_context *stream = state->stream; + struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd); + int ret; + int err; + int _count; + bool ok, retry; + + if (bsds->error != 0) { + tevent_req_error(req, bsds->error); + return; + } + + ret = readv(bsds->fd, state->vector, state->count); + if (ret == 0) { + /* propagate end of file */ + bsds->error = EPIPE; + tevent_req_error(req, EPIPE); + return; + } + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { + /* retry later */ + return; + } + if (err != 0) { + /* + * remember the error and don't + * allow further requests + */ + bsds->error = err; + } + if (tevent_req_error(req, err)) { + return; + } + + state->ret += ret; + + _count = state->count; /* tstream has size_t count, readv has int */ + ok = iov_advance(&state->vector, &_count, ret); + state->count = _count; + + if (!ok) { + tevent_req_error(req, EINVAL); + return; + } + + if (state->count > 0) { + /* we have more to read */ + return; + } + + tevent_req_done(req); +} + +static int tstream_bsd_readv_recv(struct tevent_req *req, + int *perrno) +{ + struct tstream_bsd_readv_state *state = tevent_req_data(req, + struct tstream_bsd_readv_state); + int ret; + + ret = tsocket_simple_int_recv(req, perrno); + if (ret == 0) { + ret = state->ret; + } + + tevent_req_received(req); + return ret; +} + +struct tstream_bsd_writev_state { + struct tstream_context *stream; + + struct iovec *vector; + size_t count; + + int ret; +}; + +static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state) +{ + struct tstream_bsd *bsds = tstream_context_data(state->stream, + struct tstream_bsd); + + tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL); + + return 0; +} + +static void tstream_bsd_writev_handler(void *private_data); + +static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct tstream_context *stream, + const struct iovec *vector, + size_t count) +{ + struct tevent_req *req; + struct tstream_bsd_writev_state *state; + struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd); + int ret; + + req = tevent_req_create(mem_ctx, &state, + struct tstream_bsd_writev_state); + if (!req) { + return NULL; + } + + state->stream = stream; + /* we make a copy of the vector so that we can modify it */ + state->vector = talloc_array(state, struct iovec, count); + if (tevent_req_nomem(state->vector, req)) { + goto post; + } + memcpy(state->vector, vector, sizeof(struct iovec)*count); + state->count = count; + state->ret = 0; + + talloc_set_destructor(state, tstream_bsd_writev_destructor); + + if (bsds->fd == -1) { + tevent_req_error(req, ENOTCONN); + goto post; + } + + /* + * this is a fast path, not waiting for the + * socket to become explicit writeable gains + * about 10%-20% performance in benchmark tests. + */ + tstream_bsd_writev_handler(req); + if (!tevent_req_is_in_progress(req)) { + goto post; + } + + ret = tstream_bsd_set_writeable_handler(bsds, ev, + tstream_bsd_writev_handler, + req); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + + return req; + + post: + tevent_req_post(req, ev); + return req; +} + +static void tstream_bsd_writev_handler(void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort(private_data, + struct tevent_req); + struct tstream_bsd_writev_state *state = tevent_req_data(req, + struct tstream_bsd_writev_state); + struct tstream_context *stream = state->stream; + struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd); + ssize_t ret; + int err; + int _count; + bool ok, retry; + + if (bsds->error != 0) { + tevent_req_error(req, bsds->error); + return; + } + + ret = writev(bsds->fd, state->vector, state->count); + if (ret == 0) { + /* propagate end of file */ + bsds->error = EPIPE; + tevent_req_error(req, EPIPE); + return; + } + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { + /* + * retry later... + */ + return; + } + if (err != 0) { + /* + * remember the error and don't + * allow further requests + */ + bsds->error = err; + } + if (tevent_req_error(req, err)) { + return; + } + + state->ret += ret; + + _count = state->count; /* tstream has size_t count, writev has int */ + ok = iov_advance(&state->vector, &_count, ret); + state->count = _count; + + if (!ok) { + tevent_req_error(req, EINVAL); + return; + } + + if (state->count > 0) { + /* + * we have more to write + */ + return; + } + + tevent_req_done(req); +} + +static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno) +{ + struct tstream_bsd_writev_state *state = tevent_req_data(req, + struct tstream_bsd_writev_state); + int ret; + + ret = tsocket_simple_int_recv(req, perrno); + if (ret == 0) { + ret = state->ret; + } + + tevent_req_received(req); + return ret; +} + +struct tstream_bsd_disconnect_state { + void *__dummy; +}; + +static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct tstream_context *stream) +{ + struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd); + struct tevent_req *req; + struct tstream_bsd_disconnect_state *state; + int ret; + int err; + bool dummy; + + req = tevent_req_create(mem_ctx, &state, + struct tstream_bsd_disconnect_state); + if (req == NULL) { + return NULL; + } + + if (bsds->fd == -1) { + tevent_req_error(req, ENOTCONN); + goto post; + } + + TALLOC_FREE(bsds->fde); + ret = close(bsds->fd); + bsds->fd = -1; + err = tsocket_bsd_error_from_errno(ret, errno, &dummy); + if (tevent_req_error(req, err)) { + goto post; + } + + tevent_req_done(req); +post: + tevent_req_post(req, ev); + return req; +} + +static int tstream_bsd_disconnect_recv(struct tevent_req *req, + int *perrno) +{ + int ret; + + ret = tsocket_simple_int_recv(req, perrno); + + tevent_req_received(req); + return ret; +} + +static const struct tstream_context_ops tstream_bsd_ops = { + .name = "bsd", + + .pending_bytes = tstream_bsd_pending_bytes, + + .readv_send = tstream_bsd_readv_send, + .readv_recv = tstream_bsd_readv_recv, + + .writev_send = tstream_bsd_writev_send, + .writev_recv = tstream_bsd_writev_recv, + + .disconnect_send = tstream_bsd_disconnect_send, + .disconnect_recv = tstream_bsd_disconnect_recv, +}; + +static int tstream_bsd_destructor(struct tstream_bsd *bsds) +{ + TALLOC_FREE(bsds->fde); + if (bsds->fd != -1) { + close(bsds->fd); + bsds->fd = -1; + } + return 0; +} + +int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx, + int fd, + struct tstream_context **_stream, + const char *location) +{ + struct tstream_context *stream; + struct tstream_bsd *bsds; + + stream = tstream_context_create(mem_ctx, + &tstream_bsd_ops, + &bsds, + struct tstream_bsd, + location); + if (!stream) { + return -1; + } + ZERO_STRUCTP(bsds); + bsds->fd = fd; + talloc_set_destructor(bsds, tstream_bsd_destructor); + + *_stream = stream; + return 0; +} + +struct tstream_bsd_connect_state { + int fd; + struct tevent_fd *fde; + struct tstream_conext *stream; + struct tsocket_address *local; +}; + +static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state) +{ + TALLOC_FREE(state->fde); + if (state->fd != -1) { + close(state->fd); + state->fd = -1; + } + + return 0; +} + +static void tstream_bsd_connect_fde_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, + void *private_data); + +static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int sys_errno, + const struct tsocket_address *local, + const struct tsocket_address *remote) +{ + struct tevent_req *req; + struct tstream_bsd_connect_state *state; + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + struct samba_sockaddr *lrbsda = NULL; + struct samba_sockaddr *rbsda = + talloc_get_type_abort(remote->private_data, + struct samba_sockaddr); + int ret; + bool do_bind = false; + bool do_reuseaddr = false; + bool do_ipv6only = false; + bool is_inet = false; + int sa_fam = lbsda->u.sa.sa_family; + + req = tevent_req_create(mem_ctx, &state, + struct tstream_bsd_connect_state); + if (!req) { + return NULL; + } + state->fd = -1; + state->fde = NULL; + + talloc_set_destructor(state, tstream_bsd_connect_destructor); + + /* give the wrappers a chance to report an error */ + if (sys_errno != 0) { + tevent_req_error(req, sys_errno); + goto post; + } + + switch (lbsda->u.sa.sa_family) { + case AF_UNIX: + if (lbsda->u.un.sun_path[0] != 0) { + do_reuseaddr = true; + do_bind = true; + } + break; + case AF_INET: + if (lbsda->u.in.sin_port != 0) { + do_reuseaddr = true; + do_bind = true; + } + if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) { + do_bind = true; + } + is_inet = true; + break; +#ifdef HAVE_IPV6 + case AF_INET6: + if (lbsda->u.in6.sin6_port != 0) { + do_reuseaddr = true; + do_bind = true; + } + if (memcmp(&in6addr_any, + &lbsda->u.in6.sin6_addr, + sizeof(in6addr_any)) != 0) { + do_bind = true; + } + is_inet = true; + do_ipv6only = true; + break; +#endif + default: + tevent_req_error(req, EINVAL); + goto post; + } + + if (!do_bind && is_inet) { + sa_fam = rbsda->u.sa.sa_family; + switch (sa_fam) { + case AF_INET: + do_ipv6only = false; + break; +#ifdef HAVE_IPV6 + case AF_INET6: + do_ipv6only = true; + break; +#endif + } + } + + if (is_inet) { + state->local = tsocket_address_create(state, + &tsocket_address_bsd_ops, + &lrbsda, + struct samba_sockaddr, + __location__ "bsd_connect"); + if (tevent_req_nomem(state->local, req)) { + goto post; + } + + ZERO_STRUCTP(lrbsda); + lrbsda->sa_socklen = sizeof(lrbsda->u.ss); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + lrbsda->u.sa.sa_len = lrbsda->sa_socklen; +#endif + } + + state->fd = socket(sa_fam, SOCK_STREAM, 0); + if (state->fd == -1) { + tevent_req_error(req, errno); + goto post; + } + + state->fd = tsocket_bsd_common_prepare_fd(state->fd, true); + if (state->fd == -1) { + tevent_req_error(req, errno); + goto post; + } + +#ifdef HAVE_IPV6 + if (do_ipv6only) { + int val = 1; + + ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY, + (const void *)&val, sizeof(val)); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + } +#endif + + if (do_reuseaddr) { + int val = 1; + + ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR, + (const void *)&val, sizeof(val)); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + } + + if (do_bind) { + ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + } + + if (rbsda->u.sa.sa_family != sa_fam) { + tevent_req_error(req, EINVAL); + goto post; + } + + ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen); + if (ret == -1) { + if (errno == EINPROGRESS) { + goto async; + } + tevent_req_error(req, errno); + goto post; + } + + if (!state->local) { + tevent_req_done(req); + goto post; + } + + if (lrbsda != NULL) { + ret = getsockname(state->fd, + &lrbsda->u.sa, + &lrbsda->sa_socklen); + if (ret == -1) { + tevent_req_error(req, errno); + goto post; + } + } + + tevent_req_done(req); + goto post; + + async: + + /* + * Note for historic reasons TEVENT_FD_WRITE is not enough + * to get notified for POLLERR or EPOLLHUP even if they + * come together with POLLOUT. That means we need to + * use TEVENT_FD_READ in addition until we have + * TEVENT_FD_ERROR. + */ + state->fde = tevent_add_fd(ev, state, + state->fd, + TEVENT_FD_ERROR | TEVENT_FD_WRITE, + tstream_bsd_connect_fde_handler, + req); + if (tevent_req_nomem(state->fde, req)) { + goto post; + } + + return req; + + post: + tevent_req_post(req, ev); + return req; +} + +static void tstream_bsd_connect_fde_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, + void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort(private_data, + struct tevent_req); + struct tstream_bsd_connect_state *state = tevent_req_data(req, + struct tstream_bsd_connect_state); + struct samba_sockaddr *lrbsda = NULL; + int ret; + int err; + bool retry; + + ret = samba_socket_sock_error(state->fd); + err = tsocket_bsd_error_from_errno(ret, errno, &retry); + if (retry) { + /* retry later */ + return; + } + if (tevent_req_error(req, err)) { + return; + } + + if (!state->local) { + tevent_req_done(req); + return; + } + + lrbsda = talloc_get_type_abort(state->local->private_data, + struct samba_sockaddr); + + ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen); + if (ret == -1) { + tevent_req_error(req, errno); + return; + } + + tevent_req_done(req); +} + +static int tstream_bsd_connect_recv(struct tevent_req *req, + int *perrno, + TALLOC_CTX *mem_ctx, + struct tstream_context **stream, + struct tsocket_address **local, + const char *location) +{ + struct tstream_bsd_connect_state *state = tevent_req_data(req, + struct tstream_bsd_connect_state); + int ret; + + ret = tsocket_simple_int_recv(req, perrno); + if (ret == 0) { + ret = _tstream_bsd_existing_socket(mem_ctx, + state->fd, + stream, + location); + if (ret == -1) { + *perrno = errno; + goto done; + } + TALLOC_FREE(state->fde); + state->fd = -1; + + if (local) { + *local = talloc_move(mem_ctx, &state->local); + } + } + +done: + tevent_req_received(req); + return ret; +} + +struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + const struct tsocket_address *local, + const struct tsocket_address *remote) +{ + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + struct tevent_req *req; + int sys_errno = 0; + + switch (lbsda->u.sa.sa_family) { + case AF_INET: + break; +#ifdef HAVE_IPV6 + case AF_INET6: + break; +#endif + default: + sys_errno = EINVAL; + break; + } + + req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote); + + return req; +} + +int _tstream_inet_tcp_connect_recv(struct tevent_req *req, + int *perrno, + TALLOC_CTX *mem_ctx, + struct tstream_context **stream, + struct tsocket_address **local, + const char *location) +{ + return tstream_bsd_connect_recv(req, perrno, + mem_ctx, stream, local, + location); +} + +struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + const struct tsocket_address *local, + const struct tsocket_address *remote) +{ + struct samba_sockaddr *lbsda = + talloc_get_type_abort(local->private_data, + struct samba_sockaddr); + struct tevent_req *req; + int sys_errno = 0; + + switch (lbsda->u.sa.sa_family) { + case AF_UNIX: + break; + default: + sys_errno = EINVAL; + break; + } + + req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote); + + return req; +} + +int _tstream_unix_connect_recv(struct tevent_req *req, + int *perrno, + TALLOC_CTX *mem_ctx, + struct tstream_context **stream, + const char *location) +{ + return tstream_bsd_connect_recv(req, perrno, + mem_ctx, stream, NULL, + location); +} + +int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1, + struct tstream_context **_stream1, + TALLOC_CTX *mem_ctx2, + struct tstream_context **_stream2, + const char *location) +{ + int ret; + int fds[2]; + int fd1; + int fd2; + struct tstream_context *stream1 = NULL; + struct tstream_context *stream2 = NULL; + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); + if (ret == -1) { + return -1; + } + fd1 = fds[0]; + fd2 = fds[1]; + + fd1 = tsocket_bsd_common_prepare_fd(fd1, true); + if (fd1 == -1) { + int sys_errno = errno; + close(fd2); + errno = sys_errno; + return -1; + } + + fd2 = tsocket_bsd_common_prepare_fd(fd2, true); + if (fd2 == -1) { + int sys_errno = errno; + close(fd1); + errno = sys_errno; + return -1; + } + + ret = _tstream_bsd_existing_socket(mem_ctx1, + fd1, + &stream1, + location); + if (ret == -1) { + int sys_errno = errno; + close(fd1); + close(fd2); + errno = sys_errno; + return -1; + } + + ret = _tstream_bsd_existing_socket(mem_ctx2, + fd2, + &stream2, + location); + if (ret == -1) { + int sys_errno = errno; + talloc_free(stream1); + close(fd2); + errno = sys_errno; + return -1; + } + + *_stream1 = stream1; + *_stream2 = stream2; + return 0; +} + |