1
0
Fork 0
knot-resolver/daemon/network.c
Daniel Baumann fbc604e215
Adding upstream version 5.7.5.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-21 13:56:17 +02:00

933 lines
26 KiB
C

/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include "daemon/network.h"
#include "contrib/cleanup.h"
#include "daemon/bindings/impl.h"
#include "daemon/io.h"
#include "daemon/tls.h"
#include "daemon/worker.h"
#include "lib/utils.h"
#if ENABLE_XDP
#include <libknot/xdp/eth.h>
#endif
#include <libgen.h>
#include <net/if.h>
#include <sys/un.h>
#include <unistd.h>
/** Determines the type of `struct endpoint_key`. */
enum endpoint_key_type
{
ENDPOINT_KEY_SOCKADDR = 1,
ENDPOINT_KEY_IFNAME = 2,
};
/** Used as a key in the `struct network::endpoints` trie. */
struct endpoint_key {
enum endpoint_key_type type;
char data[];
};
struct __attribute__((packed)) endpoint_key_sockaddr {
enum endpoint_key_type type;
struct kr_sockaddr_key_storage sa_key;
};
struct __attribute__((packed)) endpoint_key_ifname {
enum endpoint_key_type type;
char ifname[128];
};
/** Used for reserving enough storage for `endpoint_key`. */
struct endpoint_key_storage {
union {
enum endpoint_key_type type;
struct endpoint_key_sockaddr sa;
struct endpoint_key_ifname ifname;
char bytes[1]; /* for easier casting */
};
};
static_assert(_Alignof(struct endpoint_key) <= 4, "endpoint_key must be aligned to <=4");
static_assert(_Alignof(struct endpoint_key_sockaddr) <= 4, "endpoint_key must be aligned to <=4");
static_assert(_Alignof(struct endpoint_key_ifname) <= 4, "endpoint_key must be aligned to <=4");
void network_init(struct network *net, uv_loop_t *loop, int tcp_backlog)
{
if (net != NULL) {
net->loop = loop;
net->endpoints = trie_create(NULL);
net->endpoint_kinds = trie_create(NULL);
net->proxy_all4 = false;
net->proxy_all6 = false;
net->proxy_addrs4 = trie_create(NULL);
net->proxy_addrs6 = trie_create(NULL);
net->tls_client_params = NULL;
net->tls_session_ticket_ctx = /* unsync. random, by default */
tls_session_ticket_ctx_create(loop, NULL, 0);
net->tcp.in_idle_timeout = 10000;
net->tcp.tls_handshake_timeout = TLS_MAX_HANDSHAKE_TIME;
net->tcp_backlog = tcp_backlog;
net->tcp.user_timeout = 1000; // 1s should be more than enough
// On Linux, unset means some auto-tuning mechanism also depending on RAM,
// which might be OK default (together with the user_timeout above)
//net->listen_{tcp,udp}_buflens.{snd,rcv}
}
}
/** Notify the registered function about endpoint getting open.
* If log_port < 1, don't log it. */
static int endpoint_open_lua_cb(struct network *net, struct endpoint *ep,
const char *log_addr)
{
const bool ok = ep->flags.kind && !ep->handle && !ep->engaged && ep->fd != -1;
if (kr_fails_assert(ok))
return kr_error(EINVAL);
/* First find callback in the endpoint registry. */
lua_State *L = the_worker->engine->L;
void **pp = trie_get_try(net->endpoint_kinds, ep->flags.kind,
strlen(ep->flags.kind));
if (!pp && net->missing_kind_is_error) {
kr_log_error(NETWORK, "error: network socket kind '%s' not handled when opening '%s",
ep->flags.kind, log_addr);
if (ep->family != AF_UNIX)
kr_log_error(NETWORK, "#%d", ep->port);
kr_log_error(NETWORK, "'\n");
return kr_error(ENOENT);
}
if (!pp) return kr_ok();
/* Now execute the callback. */
const int fun_id = (char *)*pp - (char *)NULL;
lua_rawgeti(L, LUA_REGISTRYINDEX, fun_id);
lua_pushboolean(L, true /* open */);
lua_pushpointer(L, ep);
if (ep->family == AF_UNIX) {
lua_pushstring(L, log_addr);
} else {
lua_pushfstring(L, "%s#%d", log_addr, ep->port);
}
if (lua_pcall(L, 3, 0, 0)) {
kr_log_error(NETWORK, "error opening %s: %s\n", log_addr, lua_tostring(L, -1));
return kr_error(ENOSYS); /* TODO: better value? */
}
ep->engaged = true;
return kr_ok();
}
static int engage_endpoint_array(const char *b_key, uint32_t key_len, trie_val_t *val, void *net)
{
const char *log_addr = network_endpoint_key_str((struct endpoint_key *) b_key);
if (!log_addr)
log_addr = "[unknown]";
endpoint_array_t *eps = *val;
for (int i = 0; i < eps->len; ++i) {
struct endpoint *ep = &eps->at[i];
const bool match = !ep->engaged && ep->flags.kind;
if (!match) continue;
int ret = endpoint_open_lua_cb(net, ep, log_addr);
if (ret) return ret;
}
return 0;
}
int network_engage_endpoints(struct network *net)
{
if (net->missing_kind_is_error)
return kr_ok(); /* maybe weird, but let's make it idempotent */
net->missing_kind_is_error = true;
int ret = trie_apply_with_key(net->endpoints, engage_endpoint_array, net);
if (ret) {
net->missing_kind_is_error = false; /* avoid the same errors when closing */
return ret;
}
return kr_ok();
}
const char *network_endpoint_key_str(const struct endpoint_key *key)
{
switch (key->type)
{
case ENDPOINT_KEY_SOCKADDR:;
const struct endpoint_key_sockaddr *sa_key =
(struct endpoint_key_sockaddr *) key;
struct sockaddr_storage sa_storage;
struct sockaddr *sa = kr_sockaddr_from_key(&sa_storage, (const char *) &sa_key->sa_key);
return kr_straddr(sa);
case ENDPOINT_KEY_IFNAME:;
const struct endpoint_key_ifname *if_key =
(struct endpoint_key_ifname *) key;
return if_key->ifname;
default:
kr_assert(false);
return NULL;
}
}
/** Notify the registered function about endpoint about to be closed. */
static void endpoint_close_lua_cb(struct network *net, struct endpoint *ep)
{
lua_State *L = the_worker->engine->L;
void **pp = trie_get_try(net->endpoint_kinds, ep->flags.kind,
strlen(ep->flags.kind));
if (!pp && net->missing_kind_is_error) {
kr_log_error(NETWORK, "internal error: missing kind '%s' in endpoint registry\n",
ep->flags.kind);
return;
}
if (!pp) return;
const int fun_id = (char *)*pp - (char *)NULL;
lua_rawgeti(L, LUA_REGISTRYINDEX, fun_id);
lua_pushboolean(L, false /* close */);
lua_pushpointer(L, ep);
lua_pushstring(L, "FIXME:endpoint-identifier");
if (lua_pcall(L, 3, 0, 0)) {
kr_log_error(NETWORK, "failed to close FIXME:endpoint-identifier: %s\n",
lua_tostring(L, -1));
}
}
static void endpoint_close(struct network *net, struct endpoint *ep, bool force)
{
const bool is_control = ep->flags.kind && strcmp(ep->flags.kind, "control") == 0;
const bool is_xdp = ep->family == AF_XDP;
if (ep->family == AF_UNIX) { /* The FS name would be left behind. */
/* Extract local address for this socket. */
struct sockaddr_un sa;
sa.sun_path[0] = '\0'; /*< probably only for lint:scan-build */
socklen_t addr_len = sizeof(sa);
if (getsockname(ep->fd, (struct sockaddr *)&sa, &addr_len)
|| unlink(sa.sun_path)) {
kr_log_error(NETWORK, "error (ignored) when closing unix socket (fd = %d): %s\n",
ep->fd, strerror(errno));
return;
}
}
if (ep->flags.kind && !is_control && !is_xdp) {
kr_assert(!ep->handle);
/* Special lua-handled endpoint. */
if (ep->engaged) {
endpoint_close_lua_cb(net, ep);
}
if (ep->fd > 0) {
close(ep->fd); /* nothing to do with errors */
}
free_const(ep->flags.kind);
return;
}
free_const(ep->flags.kind); /* needed if (is_control) */
kr_require(ep->handle);
if (force) { /* Force close if event loop isn't running. */
if (ep->fd >= 0) {
close(ep->fd);
}
if (ep->handle) {
ep->handle->loop = NULL;
io_free(ep->handle);
}
} else { /* Asynchronous close */
uv_close(ep->handle, io_free);
}
}
/** Endpoint visitor (see @file trie.h) */
static int close_key(trie_val_t *val, void* net)
{
endpoint_array_t *ep_array = *val;
for (int i = 0; i < ep_array->len; ++i) {
endpoint_close(net, &ep_array->at[i], true);
}
return 0;
}
static int free_key(trie_val_t *val, void* ext)
{
endpoint_array_t *ep_array = *val;
array_clear(*ep_array);
free(ep_array);
return kr_ok();
}
int kind_unregister(trie_val_t *tv, void *L)
{
int fun_id = (char *)*tv - (char *)NULL;
luaL_unref(L, LUA_REGISTRYINDEX, fun_id);
return 0;
}
void network_close_force(struct network *net)
{
if (net != NULL) {
trie_apply(net->endpoints, close_key, net);
trie_apply(net->endpoints, free_key, NULL);
trie_clear(net->endpoints);
}
}
/** Frees all the `struct net_proxy_data` in the specified trie. */
void network_proxy_free_addr_data(trie_t* trie)
{
trie_it_t *it;
for (it = trie_it_begin(trie); !trie_it_finished(it); trie_it_next(it)) {
struct net_proxy_data *data = *trie_it_val(it);
free(data);
}
trie_it_free(it);
}
void network_deinit(struct network *net)
{
if (net != NULL) {
network_close_force(net);
trie_apply(net->endpoint_kinds, kind_unregister, the_worker->engine->L);
trie_free(net->endpoint_kinds);
trie_free(net->endpoints);
network_proxy_free_addr_data(net->proxy_addrs4);
trie_free(net->proxy_addrs4);
network_proxy_free_addr_data(net->proxy_addrs6);
trie_free(net->proxy_addrs6);
tls_credentials_free(net->tls_credentials);
tls_client_params_free(net->tls_client_params);
tls_session_ticket_ctx_destroy(net->tls_session_ticket_ctx);
#ifndef NDEBUG
memset(net, 0, sizeof(*net));
#endif
}
}
/** Creates an endpoint key for use with a `trie_t` and stores it into `dst`.
* Returns the actual length of the generated key. */
static ssize_t endpoint_key_create(struct endpoint_key_storage *dst,
const char *addr_str,
const struct sockaddr *sa)
{
memset(dst, 0, sizeof(*dst));
if (sa) {
struct endpoint_key_sockaddr *key = &dst->sa;
key->type = ENDPOINT_KEY_SOCKADDR;
ssize_t keylen = kr_sockaddr_key(&key->sa_key, sa);
if (keylen < 0)
return keylen;
return sizeof(struct endpoint_key) + keylen;
} else {
struct endpoint_key_ifname *key = &dst->ifname;
key->type = ENDPOINT_KEY_IFNAME;
/* The subtractions and additions of 1 are here to account for
* null-terminators. */
strncpy(key->ifname, addr_str, sizeof(key->ifname) - 1);
return sizeof(struct endpoint_key) + strlen(key->ifname) + 1;
}
}
/** Fetch or create endpoint array and insert endpoint (shallow memcpy). */
static int insert_endpoint(struct network *net, const char *addr_str,
const struct sockaddr *addr, struct endpoint *ep)
{
/* Fetch or insert address into map */
struct endpoint_key_storage key;
ssize_t keylen = endpoint_key_create(&key, addr_str, addr);
if (keylen < 0)
return keylen;
trie_val_t *val = trie_get_ins(net->endpoints, key.bytes, keylen);
endpoint_array_t *ep_array;
if (*val) {
ep_array = *val;
} else {
ep_array = malloc(sizeof(*ep_array));
kr_require(ep_array);
array_init(*ep_array);
*val = ep_array;
}
if (array_reserve(*ep_array, ep_array->len + 1)) {
return kr_error(ENOMEM);
}
memcpy(&ep_array->at[ep_array->len++], ep, sizeof(*ep));
return kr_ok();
}
/** Open endpoint protocols. ep->flags were pre-set.
* \p addr_str is only used for logging or for XDP "address". */
static int open_endpoint(struct network *net, const char *addr_str,
struct endpoint *ep, const struct sockaddr *sa)
{
const bool is_control = ep->flags.kind && strcmp(ep->flags.kind, "control") == 0;
const bool is_xdp = ep->family == AF_XDP;
bool ok = (!is_xdp)
|| (sa == NULL && ep->fd == -1 && ep->nic_queue >= 0
&& ep->flags.sock_type == SOCK_DGRAM && !ep->flags.tls);
if (kr_fails_assert(ok))
return kr_error(EINVAL);
if (ep->handle) {
return kr_error(EEXIST);
}
if (sa && ep->fd == -1) {
if (sa->sa_family == AF_UNIX) {
struct sockaddr_un *sun = (struct sockaddr_un*)sa;
char *dirc = strdup(sun->sun_path);
char *dname = dirname(dirc);
(void)unlink(sun->sun_path); /** Attempt to unlink if socket path exists. */
(void)mkdir(dname, S_IRWXU|S_IRWXG); /** Attempt to create dir. */
free(dirc);
}
ep->fd = io_bind(sa, ep->flags.sock_type, &ep->flags);
if (ep->fd < 0) return ep->fd;
}
if (ep->flags.kind && !is_control && !is_xdp) {
/* This EP isn't to be managed internally after binding. */
return endpoint_open_lua_cb(net, ep, addr_str);
} else {
ep->engaged = true;
/* .engaged seems not really meaningful in this case, but... */
}
int ret;
if (is_control) {
uv_pipe_t *ep_handle = malloc(sizeof(uv_pipe_t));
ep->handle = (uv_handle_t *)ep_handle;
ret = !ep->handle ? ENOMEM
: io_listen_pipe(net->loop, ep_handle, ep->fd);
goto finish_ret;
}
if (ep->family == AF_UNIX) {
/* Some parts of connection handling would need more work,
* so let's support AF_UNIX only with .kind != NULL for now. */
kr_log_error(NETWORK, "AF_UNIX only supported with set { kind = '...' }\n");
ret = EAFNOSUPPORT;
goto finish_ret;
/*
uv_pipe_t *ep_handle = malloc(sizeof(uv_pipe_t));
*/
}
if (is_xdp) {
#if ENABLE_XDP
uv_poll_t *ep_handle = malloc(sizeof(uv_poll_t));
ep->handle = (uv_handle_t *)ep_handle;
ret = !ep->handle ? ENOMEM
: io_listen_xdp(net->loop, ep, addr_str);
#else
ret = ESOCKTNOSUPPORT;
#endif
goto finish_ret;
} /* else */
if (ep->flags.sock_type == SOCK_DGRAM) {
if (kr_fails_assert(!ep->flags.tls))
return kr_error(EINVAL);
uv_udp_t *ep_handle = malloc(sizeof(uv_udp_t));
ep->handle = (uv_handle_t *)ep_handle;
ret = !ep->handle ? ENOMEM
: io_listen_udp(net->loop, ep_handle, ep->fd);
goto finish_ret;
} /* else */
if (ep->flags.sock_type == SOCK_STREAM) {
uv_tcp_t *ep_handle = malloc(sizeof(uv_tcp_t));
ep->handle = (uv_handle_t *)ep_handle;
ret = !ep->handle ? ENOMEM
: io_listen_tcp(net->loop, ep_handle, ep->fd,
net->tcp_backlog, ep->flags.tls, ep->flags.http);
goto finish_ret;
} /* else */
kr_assert(false);
return kr_error(EINVAL);
finish_ret:
if (!ret) return ret;
free(ep->handle);
ep->handle = NULL;
return kr_error(ret);
}
/** @internal Fetch a pointer to endpoint of given parameters (or NULL).
* Beware that there might be multiple matches, though that's not common.
* The matching isn't really precise in the sense that it might not find
* and endpoint that would *collide* the passed one. */
static struct endpoint * endpoint_get(struct network *net,
const char *addr_str,
const struct sockaddr *sa,
endpoint_flags_t flags)
{
struct endpoint_key_storage key;
ssize_t keylen = endpoint_key_create(&key, addr_str, sa);
if (keylen < 0)
return NULL;
trie_val_t *val = trie_get_try(net->endpoints, key.bytes, keylen);
if (!val)
return NULL;
endpoint_array_t *ep_array = *val;
uint16_t port = kr_inaddr_port(sa);
for (int i = 0; i < ep_array->len; ++i) {
struct endpoint *ep = &ep_array->at[i];
if ((flags.xdp || ep->port == port) && endpoint_flags_eq(ep->flags, flags)) {
return ep;
}
}
return NULL;
}
/** \note pass (either sa != NULL xor ep.fd != -1) or XDP case (neither sa nor ep.fd)
* \note in XDP case addr_str is interface name
* \note ownership of ep.flags.* is taken on success. */
static int create_endpoint(struct network *net, const char *addr_str,
struct endpoint *ep, const struct sockaddr *sa)
{
int ret = open_endpoint(net, addr_str, ep, sa);
if (ret == 0) {
ret = insert_endpoint(net, addr_str, sa, ep);
}
if (ret != 0 && ep->handle) {
endpoint_close(net, ep, false);
}
return ret;
}
int network_listen_fd(struct network *net, int fd, endpoint_flags_t flags)
{
if (kr_fails_assert(!flags.xdp))
return kr_error(EINVAL);
/* Extract fd's socket type. */
socklen_t len = sizeof(flags.sock_type);
int ret = getsockopt(fd, SOL_SOCKET, SO_TYPE, &flags.sock_type, &len);
if (ret != 0)
return kr_error(errno);
const bool is_dtls = flags.sock_type == SOCK_DGRAM && !flags.kind && flags.tls;
if (kr_fails_assert(!is_dtls))
return kr_error(EINVAL); /* Perhaps DTLS some day. */
if (flags.sock_type != SOCK_DGRAM && flags.sock_type != SOCK_STREAM)
return kr_error(EBADF);
/* Extract local address for this socket. */
struct sockaddr_storage ss = { .ss_family = AF_UNSPEC };
socklen_t addr_len = sizeof(ss);
ret = getsockname(fd, (struct sockaddr *)&ss, &addr_len);
if (ret != 0)
return kr_error(errno);
struct endpoint ep = {
.flags = flags,
.family = ss.ss_family,
.fd = fd,
};
/* Extract address string and port. */
char addr_buf[INET6_ADDRSTRLEN]; /* https://tools.ietf.org/html/rfc4291 */
const char *addr_str;
switch (ep.family) {
case AF_INET:
ret = uv_ip4_name((const struct sockaddr_in*)&ss, addr_buf, sizeof(addr_buf));
addr_str = addr_buf;
ep.port = ntohs(((struct sockaddr_in *)&ss)->sin_port);
break;
case AF_INET6:
ret = uv_ip6_name((const struct sockaddr_in6*)&ss, addr_buf, sizeof(addr_buf));
addr_str = addr_buf;
ep.port = ntohs(((struct sockaddr_in6 *)&ss)->sin6_port);
break;
case AF_UNIX:
/* No SOCK_DGRAM with AF_UNIX support, at least for now. */
ret = flags.sock_type == SOCK_STREAM ? kr_ok() : kr_error(EAFNOSUPPORT);
addr_str = ((struct sockaddr_un *)&ss)->sun_path;
break;
default:
ret = kr_error(EAFNOSUPPORT);
}
if (ret) return ret;
/* always create endpoint for supervisor supplied fd
* even if addr+port is not unique */
return create_endpoint(net, addr_str, &ep, (struct sockaddr *) &ss);
}
/** Try selecting XDP queue automatically. */
static int16_t nic_queue_auto(void)
{
const char *inst_str = getenv("SYSTEMD_INSTANCE");
if (!inst_str)
return 0; // should work OK for simple (single-kresd) deployments
char *endp;
errno = 0; // strtol() is special in this respect
long inst = strtol(inst_str, &endp, 10);
if (!errno && *endp == '\0' && inst > 0 && inst < UINT16_MAX)
return inst - 1; // 1-based vs. 0-based indexing conventions
return -1;
}
int network_listen(struct network *net, const char *addr, uint16_t port,
int16_t nic_queue, endpoint_flags_t flags)
{
if (kr_fails_assert(net != NULL && addr != 0 && nic_queue >= -1))
return kr_error(EINVAL);
if (flags.xdp && nic_queue < 0) {
nic_queue = nic_queue_auto();
if (nic_queue < 0) {
return kr_error(EINVAL);
}
}
// Try parsing the address.
const struct sockaddr *sa = kr_straddr_socket(addr, port, NULL);
if (!sa && !flags.xdp) { // unusable address spec
return kr_error(EINVAL);
}
char ifname_buf[64] UNUSED;
if (sa && flags.xdp) { // auto-detection: address -> interface
#if ENABLE_XDP
int ret = knot_eth_name_from_addr((const struct sockaddr_storage *)sa,
ifname_buf, sizeof(ifname_buf));
// even on success we don't want to pass `sa` on
free_const(sa);
sa = NULL;
if (ret) {
return kr_error(ret);
}
addr = ifname_buf;
#else
return kr_error(ESOCKTNOSUPPORT);
#endif
}
// XDP: if addr failed to parse as address, we assume it's an interface name.
if (endpoint_get(net, addr, sa, flags)) {
return kr_error(EADDRINUSE); // Already listening
}
struct endpoint ep = { 0 };
ep.flags = flags;
ep.fd = -1;
ep.port = port;
ep.family = flags.xdp ? AF_XDP : sa->sa_family;
ep.nic_queue = nic_queue;
int ret = create_endpoint(net, addr, &ep, sa);
// Error reporting: more precision.
if (ret == KNOT_EINVAL && !sa && flags.xdp && ENABLE_XDP) {
if (!if_nametoindex(addr) && errno == ENODEV) {
ret = kr_error(ENODEV);
}
}
free_const(sa);
return ret;
}
int network_proxy_allow(struct network *net, const char* addr)
{
if (kr_fails_assert(net != NULL && addr != NULL))
return kr_error(EINVAL);
int family = kr_straddr_family(addr);
if (family < 0) {
kr_log_error(NETWORK, "Wrong address format for proxy_allowed: %s\n",
addr);
return kr_error(EINVAL);
} else if (family == AF_UNIX) {
kr_log_error(NETWORK, "Unix sockets not supported for proxy_allowed: %s\n",
addr);
return kr_error(EINVAL);
}
union kr_in_addr ia;
int netmask = kr_straddr_subnet(&ia, addr);
if (netmask < 0) {
kr_log_error(NETWORK, "Wrong netmask format for proxy_allowed: %s\n", addr);
return kr_error(EINVAL);
} else if (netmask == 0) {
/* Netmask is zero: allow all addresses to use PROXYv2 */
switch (family) {
case AF_INET:
net->proxy_all4 = true;
break;
case AF_INET6:
net->proxy_all6 = true;
break;
default:
kr_assert(false);
return kr_error(EINVAL);
}
return kr_ok();
}
size_t addr_length;
trie_t *trie;
switch (family) {
case AF_INET:
addr_length = sizeof(ia.ip4);
trie = net->proxy_addrs4;
break;
case AF_INET6:
addr_length = sizeof(ia.ip6);
trie = net->proxy_addrs6;
break;
default:
kr_assert(false);
return kr_error(EINVAL);
}
kr_bitmask((unsigned char *) &ia, addr_length, netmask);
trie_val_t *val = trie_get_ins(trie, (char *) &ia, addr_length);
if (!val)
return kr_error(ENOMEM);
struct net_proxy_data *data = *val;
if (!data) {
/* Allocate data if the entry is new in the trie */
*val = malloc(sizeof(struct net_proxy_data));
data = *val;
data->netmask = 0;
}
if (data->netmask == 0) {
memcpy(&data->addr, &ia, addr_length);
data->netmask = netmask;
} else if (data->netmask > netmask) {
/* A more relaxed netmask configured - replace it */
data->netmask = netmask;
}
return kr_ok();
}
void network_proxy_reset(struct network *net)
{
net->proxy_all4 = false;
network_proxy_free_addr_data(net->proxy_addrs4);
trie_clear(net->proxy_addrs4);
net->proxy_all6 = false;
network_proxy_free_addr_data(net->proxy_addrs6);
trie_clear(net->proxy_addrs6);
}
static int endpoints_close(struct network *net,
struct endpoint_key_storage *key, ssize_t keylen,
endpoint_array_t *ep_array, int port)
{
size_t i = 0;
bool matched = false; /*< at least one match */
while (i < ep_array->len) {
struct endpoint *ep = &ep_array->at[i];
if (port < 0 || ep->port == port) {
endpoint_close(net, ep, false);
array_del(*ep_array, i);
matched = true;
/* do not advance i */
} else {
++i;
}
}
if (!matched) {
return kr_error(ENOENT);
}
return kr_ok();
}
static bool endpoint_key_addr_matches(struct endpoint_key_storage *key_a,
struct endpoint_key_storage *key_b)
{
if (key_a->type != key_b->type)
return false;
if (key_a->type == ENDPOINT_KEY_IFNAME)
return strncmp(key_a->ifname.ifname,
key_b->ifname.ifname,
sizeof(key_a->ifname.ifname)) == 0;
if (key_a->type == ENDPOINT_KEY_SOCKADDR) {
return kr_sockaddr_key_same_addr(
key_a->sa.sa_key.bytes, key_b->sa.sa_key.bytes);
}
kr_assert(false);
return kr_error(EINVAL);
}
struct endpoint_key_with_len {
struct endpoint_key_storage key;
size_t keylen;
};
typedef array_t(struct endpoint_key_with_len) endpoint_key_array_t;
struct endpoint_close_wildcard_context {
struct network *net;
struct endpoint_key_storage *match_key;
endpoint_key_array_t del;
int ret;
};
static int endpoints_close_wildcard(const char *s_key, uint32_t keylen, trie_val_t *val, void *baton)
{
struct endpoint_close_wildcard_context *ctx = baton;
struct endpoint_key_storage *key = (struct endpoint_key_storage *)s_key;
if (!endpoint_key_addr_matches(key, ctx->match_key))
return kr_ok();
endpoint_array_t *ep_array = *val;
int ret = endpoints_close(ctx->net, key, keylen, ep_array, -1);
if (ret)
ctx->ret = ret;
if (ep_array->len == 0) {
struct endpoint_key_with_len to_del = {
.key = *key,
.keylen = keylen
};
array_push(ctx->del, to_del);
}
return kr_ok();
}
int network_close(struct network *net, const char *addr_str, int port)
{
auto_free struct sockaddr *addr = kr_straddr_socket(addr_str, port, NULL);
struct endpoint_key_storage key;
ssize_t keylen = endpoint_key_create(&key, addr_str, addr);
if (keylen < 0)
return keylen;
if (port < 0) {
struct endpoint_close_wildcard_context ctx = {
.net = net,
.match_key = &key
};
array_init(ctx.del);
trie_apply_with_key(net->endpoints, endpoints_close_wildcard, &ctx);
for (size_t i = 0; i < ctx.del.len; i++) {
trie_val_t val;
trie_del(net->endpoints,
ctx.del.at[i].key.bytes, ctx.del.at[i].keylen,
&val);
if (val) {
array_clear(*(endpoint_array_t *) val);
free(val);
}
}
return ctx.ret;
}
trie_val_t *val = trie_get_try(net->endpoints, key.bytes, keylen);
if (!val)
return kr_error(ENOENT);
endpoint_array_t *ep_array = *val;
int ret = endpoints_close(net, &key, keylen, ep_array, port);
/* Collapse key if it has no endpoint. */
if (ep_array->len == 0) {
array_clear(*ep_array);
free(ep_array);
trie_del(net->endpoints, key.bytes, keylen, NULL);
}
return ret;
}
void network_new_hostname(struct network *net, struct engine *engine)
{
if (net->tls_credentials &&
net->tls_credentials->ephemeral_servicename) {
struct tls_credentials *newcreds;
newcreds = tls_get_ephemeral_credentials(engine);
if (newcreds) {
tls_credentials_release(net->tls_credentials);
net->tls_credentials = newcreds;
kr_log_info(TLS, "Updated ephemeral X.509 cert with new hostname\n");
} else {
kr_log_error(TLS, "Failed to update ephemeral X.509 cert with new hostname, using existing one\n");
}
}
}
#ifdef SO_ATTACH_BPF
static int set_bpf_cb(trie_val_t *val, void *ctx)
{
endpoint_array_t *endpoints = *val;
int *bpffd = (int *)ctx;
if (kr_fails_assert(endpoints && bpffd))
return kr_error(EINVAL);
for (size_t i = 0; i < endpoints->len; i++) {
struct endpoint *endpoint = &endpoints->at[i];
uv_os_fd_t sockfd = -1;
if (endpoint->handle != NULL)
uv_fileno(endpoint->handle, &sockfd);
kr_require(sockfd != -1);
if (setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_BPF, bpffd, sizeof(int)) != 0) {
return 1; /* return error (and stop iterating over net->endpoints) */
}
}
return 0; /* OK */
}
#endif
int network_set_bpf(struct network *net, int bpf_fd)
{
#ifdef SO_ATTACH_BPF
if (trie_apply(net->endpoints, set_bpf_cb, &bpf_fd) != 0) {
/* set_bpf_cb() has returned error. */
network_clear_bpf(net);
return 0;
}
#else
kr_log_error(NETWORK, "SO_ATTACH_BPF socket option doesn't supported\n");
(void)net;
(void)bpf_fd;
return 0;
#endif
return 1;
}
#ifdef SO_DETACH_BPF
static int clear_bpf_cb(trie_val_t *val, void *ctx)
{
endpoint_array_t *endpoints = *val;
if (kr_fails_assert(endpoints))
return kr_error(EINVAL);
for (size_t i = 0; i < endpoints->len; i++) {
struct endpoint *endpoint = &endpoints->at[i];
uv_os_fd_t sockfd = -1;
if (endpoint->handle != NULL)
uv_fileno(endpoint->handle, &sockfd);
kr_require(sockfd != -1);
if (setsockopt(sockfd, SOL_SOCKET, SO_DETACH_BPF, NULL, 0) != 0) {
kr_log_error(NETWORK, "failed to clear SO_DETACH_BPF socket option\n");
}
/* Proceed even if setsockopt() failed,
* as we want to process all opened sockets. */
}
return 0;
}
#endif
void network_clear_bpf(struct network *net)
{
#ifdef SO_DETACH_BPF
trie_apply(net->endpoints, clear_bpf_cb, NULL);
#else
kr_log_error(NETWORK, "SO_DETACH_BPF socket option doesn't supported\n");
(void)net;
#endif
}