diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /src/libutil | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libutil')
48 files changed, 28075 insertions, 0 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt new file mode 100644 index 0000000..67b7e94 --- /dev/null +++ b/src/libutil/CMakeLists.txt @@ -0,0 +1,24 @@ +# Librspamd-util +SET(LIBRSPAMDUTILSRC + ${CMAKE_CURRENT_SOURCE_DIR}/addr.c + ${CMAKE_CURRENT_SOURCE_DIR}/libev_helper.c + ${CMAKE_CURRENT_SOURCE_DIR}/expression.c + ${CMAKE_CURRENT_SOURCE_DIR}/fstring.c + ${CMAKE_CURRENT_SOURCE_DIR}/hash.c + ${CMAKE_CURRENT_SOURCE_DIR}/mem_pool.c + ${CMAKE_CURRENT_SOURCE_DIR}/printf.c + ${CMAKE_CURRENT_SOURCE_DIR}/radix.c + ${CMAKE_CURRENT_SOURCE_DIR}/regexp.c + ${CMAKE_CURRENT_SOURCE_DIR}/rrd.c + ${CMAKE_CURRENT_SOURCE_DIR}/shingles.c + ${CMAKE_CURRENT_SOURCE_DIR}/sqlite_utils.c + ${CMAKE_CURRENT_SOURCE_DIR}/str_util.c + ${CMAKE_CURRENT_SOURCE_DIR}/upstream.c + ${CMAKE_CURRENT_SOURCE_DIR}/util.c + ${CMAKE_CURRENT_SOURCE_DIR}/heap.c + ${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c + ${CMAKE_CURRENT_SOURCE_DIR}/cxx/utf8_util.cxx + ${CMAKE_CURRENT_SOURCE_DIR}/cxx/util_tests.cxx + ${CMAKE_CURRENT_SOURCE_DIR}/cxx/file_util.cxx) +# Rspamdutil +SET(RSPAMD_UTIL ${LIBRSPAMDUTILSRC} PARENT_SCOPE)
\ No newline at end of file diff --git a/src/libutil/addr.c b/src/libutil/addr.c new file mode 100644 index 0000000..e011c99 --- /dev/null +++ b/src/libutil/addr.c @@ -0,0 +1,2049 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "addr.h" +#include "util.h" +#include "logger.h" +#include "cryptobox.h" +#include "unix-std.h" +/* pwd and grp */ +#ifdef HAVE_PWD_H +#include <pwd.h> +#endif + +#ifdef HAVE_GRP_H +#include <grp.h> +#endif + +static void *local_addrs; + +enum { + RSPAMD_IPV6_UNDEFINED = 0, + RSPAMD_IPV6_SUPPORTED, + RSPAMD_IPV6_UNSUPPORTED +} ipv6_status = RSPAMD_IPV6_UNDEFINED; + +/** + * Union that is used for storing sockaddrs + */ +union sa_union { + struct sockaddr sa; + struct sockaddr_in s4; + struct sockaddr_in6 s6; + struct sockaddr_un su; + struct sockaddr_storage ss; +}; + +union sa_inet { + struct sockaddr sa; + struct sockaddr_in s4; + struct sockaddr_in6 s6; +}; + +struct rspamd_addr_unix { + struct sockaddr_un addr; + gint mode; + uid_t owner; + gid_t group; +}; + +struct rspamd_addr_inet { + union sa_inet addr; +}; + +struct rspamd_inet_addr_s { + union { + struct rspamd_addr_inet in; + struct rspamd_addr_unix *un; + } u; + gint af; + socklen_t slen; +}; + +static void +rspamd_ip_validate_af(rspamd_inet_addr_t *addr) +{ + if (addr->af != AF_UNIX) { + if (addr->u.in.addr.sa.sa_family != addr->af) { + addr->u.in.addr.sa.sa_family = addr->af; + } + } + else { + addr->u.un->addr.sun_family = AF_UNIX; + } + + if (addr->af == AF_INET) { + addr->slen = sizeof(struct sockaddr_in); + } + else if (addr->af == AF_INET6) { + addr->slen = sizeof(struct sockaddr_in6); + } + else if (addr->af == AF_UNIX) { +#ifdef SUN_LEN + addr->slen = SUN_LEN(&addr->u.un->addr); +#else + addr->slen = sizeof(addr->u.un->addr); +#endif +#if defined(FREEBSD) || defined(__APPLE__) + addr->u.un->addr.sun_len = addr->slen; +#endif + } +} + +#define RSPAMD_MAYBE_ALLOC_POOL(pool, sz) \ + (pool != NULL) ? rspamd_mempool_alloc((pool), (sz)) : g_malloc(sz) +#define RSPAMD_MAYBE_ALLOC0_POOL(pool, sz) \ + (pool != NULL) ? rspamd_mempool_alloc0((pool), (sz)) : g_malloc0(sz) + +static rspamd_inet_addr_t * +rspamd_inet_addr_create(gint af, rspamd_mempool_t *pool) +{ + rspamd_inet_addr_t *addr; + + addr = RSPAMD_MAYBE_ALLOC0_POOL(pool, sizeof(*addr)); + + addr->af = af; + + if (af == AF_UNIX) { + addr->u.un = RSPAMD_MAYBE_ALLOC0_POOL(pool, sizeof(*addr->u.un)); + addr->slen = sizeof(addr->u.un->addr); + } + else { + rspamd_ip_validate_af(addr); + } + + return addr; +} + +void rspamd_inet_address_free(rspamd_inet_addr_t *addr) +{ + if (addr) { + if (addr->af == AF_UNIX) { + if (addr->u.un) { + g_free(addr->u.un); + } + } + g_free(addr); + } +} + +static void +rspamd_ip_check_ipv6(void) +{ + if (ipv6_status == RSPAMD_IPV6_UNDEFINED) { + gint s; + + s = socket(AF_INET6, SOCK_STREAM, 0); + + if (s == -1) { + ipv6_status = RSPAMD_IPV6_UNSUPPORTED; + } + else { + /* + * Try to check /proc if we are on Linux (the common case) + */ + struct stat st; + + close(s); + + if (stat("/proc/net/dev", &st) != -1) { + if (stat("/proc/net/if_inet6", &st) != -1) { + ipv6_status = RSPAMD_IPV6_SUPPORTED; + } + else { + ipv6_status = RSPAMD_IPV6_UNSUPPORTED; + } + } + else { + /* Not a Linux, so we assume it supports ipv6 somehow... */ + ipv6_status = RSPAMD_IPV6_SUPPORTED; + } + } + } +} + +gboolean +rspamd_ip_is_valid(const rspamd_inet_addr_t *addr) +{ + const struct in_addr ip4_any = {INADDR_ANY}, ip4_none = {INADDR_NONE}; + const struct in6_addr ip6_any = IN6ADDR_ANY_INIT; + gboolean ret = FALSE; + + if (G_LIKELY(addr->af == AF_INET)) { + if (memcmp(&addr->u.in.addr.s4.sin_addr, &ip4_any, + sizeof(struct in_addr)) != 0 && + memcmp(&addr->u.in.addr.s4.sin_addr, &ip4_none, + sizeof(struct in_addr)) != 0) { + ret = TRUE; + } + } + else if (G_UNLIKELY(addr->af == AF_INET6)) { + if (memcmp(&addr->u.in.addr.s6.sin6_addr, &ip6_any, + sizeof(struct in6_addr)) != 0) { + ret = TRUE; + } + } + + return ret; +} + +gint rspamd_accept_from_socket(gint sock, rspamd_inet_addr_t **target, + rspamd_accept_throttling_handler hdl, + void *hdl_data) +{ + gint nfd, serrno; + union sa_union su; + socklen_t len = sizeof(su); + rspamd_inet_addr_t *addr = NULL; + + if ((nfd = accept(sock, &su.sa, &len)) == -1) { + if (target) { + *target = NULL; + } + + if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) { + return 0; + } + else if (errno == EMFILE || errno == ENFILE) { + /* Temporary disable accept event */ + if (hdl) { + hdl(sock, hdl_data); + } + + return 0; + } + + return -1; + } + + if (su.sa.sa_family == AF_INET6) { + /* Deal with bloody v4 mapped to v6 addresses */ + + static const guint8 mask[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + const guint8 *p; + + if (memcmp((const guint8 *) &su.s6.sin6_addr, mask, sizeof(mask)) == 0) { + p = (const guint8 *) &su.s6.sin6_addr; + + if ((p[10] == 0xff && p[11] == 0xff)) { + addr = rspamd_inet_addr_create(AF_INET, NULL); + memcpy(&addr->u.in.addr.s4.sin_addr, &p[12], + sizeof(struct in_addr)); + addr->u.in.addr.s4.sin_port = su.s6.sin6_port; + } + else { + /* Something strange but not mapped v4 address */ + addr = rspamd_inet_addr_create(AF_INET6, NULL); + memcpy(&addr->u.in.addr.s6, &su.s6, + sizeof(struct sockaddr_in6)); + } + } + else { + addr = rspamd_inet_addr_create(AF_INET6, NULL); + memcpy(&addr->u.in.addr.s6, &su.s6, + sizeof(struct sockaddr_in6)); + } + } + else { + addr = rspamd_inet_addr_create(su.sa.sa_family, NULL); + addr->slen = len; + + if (addr->af == AF_UNIX) { + len = sizeof(su); + + if (getsockname(sock, &su.sa, &len) != -1) { + memcpy(&addr->u.un->addr, &su.su, MIN(len, sizeof(struct sockaddr_un))); + } + else { + /* Just copy socket address */ + memcpy(&addr->u.un->addr, &su.sa, sizeof(struct sockaddr)); + } + } + else { + memcpy(&addr->u.in.addr, &su, MIN(len, sizeof(addr->u.in.addr))); + } + } + + if (rspamd_socket_nonblocking(nfd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl(nfd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn("fcntl failed: %d, '%s'", errno, strerror(errno)); + goto out; + } + + if (target) { + *target = addr; + } + else { + /* Avoid leak */ + rspamd_inet_address_free(addr); + } + + return (nfd); + +out: + serrno = errno; + close(nfd); + errno = serrno; + rspamd_inet_address_free(addr); + + return (-1); +} + +static gboolean +rspamd_parse_unix_path(rspamd_inet_addr_t **target, + const char *src, gsize len, + rspamd_mempool_t *pool, + enum rspamd_inet_address_parse_flags how) +{ + gchar **tokens, **cur_tok, *p, *pwbuf; + glong pwlen; + struct passwd pw, *ppw; + struct group gr, *pgr; + rspamd_inet_addr_t *addr; + bool has_group = false; + + addr = rspamd_inet_addr_create(AF_UNIX, pool); + + addr->u.un->mode = 00644; + addr->u.un->owner = (uid_t) -1; + addr->u.un->group = (gid_t) -1; + + if (!(how & RSPAMD_INET_ADDRESS_PARSE_REMOTE)) { + tokens = rspamd_string_len_split(src, len, " ,", -1, pool); + + if (tokens[0] == NULL) { + + if (!pool) { + rspamd_inet_address_free(addr); + g_strfreev(tokens); + } + + return FALSE; + } + + rspamd_strlcpy(addr->u.un->addr.sun_path, tokens[0], + sizeof(addr->u.un->addr.sun_path)); +#if defined(FREEBSD) || defined(__APPLE__) + addr->u.un->addr.sun_len = SUN_LEN(&addr->u.un->addr); +#endif + } + else { + rspamd_strlcpy(addr->u.un->addr.sun_path, src, + MIN(len + 1, sizeof(addr->u.un->addr.sun_path))); +#if defined(FREEBSD) || defined(__APPLE__) + addr->u.un->addr.sun_len = SUN_LEN(&addr->u.un->addr); +#endif + + if (target) { + rspamd_ip_validate_af(addr); + *target = addr; + } + else { + if (!pool) { + rspamd_inet_address_free(addr); + } + } + + return TRUE; + } + + /* Skip for remote */ + cur_tok = &tokens[1]; +#ifdef _SC_GETPW_R_SIZE_MAX + pwlen = sysconf(_SC_GETPW_R_SIZE_MAX); + if (pwlen <= 0) { + pwlen = 8192; + } +#else + pwlen = 8192; +#endif + + pwbuf = g_malloc0(pwlen); + + while (*cur_tok) { + if (g_ascii_strncasecmp(*cur_tok, "mode=", sizeof("mode=") - 1) == 0) { + p = strchr(*cur_tok, '='); + /* XXX: add error check */ + addr->u.un->mode = strtoul(p + 1, NULL, 0); + + if (addr->u.un->mode == 0) { + msg_err("bad mode: %s", p + 1); + errno = EINVAL; + goto err; + } + } + else if (g_ascii_strncasecmp(*cur_tok, "owner=", + sizeof("owner=") - 1) == 0) { + p = strchr(*cur_tok, '='); + + if (getpwnam_r(p + 1, &pw, pwbuf, pwlen, &ppw) != 0 || ppw == NULL) { + msg_err("bad user: %s", p + 1); + if (ppw == NULL) { + errno = ENOENT; + } + goto err; + } + addr->u.un->owner = pw.pw_uid; + + if (!has_group) { + addr->u.un->group = pw.pw_gid; + } + } + else if (g_ascii_strncasecmp(*cur_tok, "group=", + sizeof("group=") - 1) == 0) { + p = strchr(*cur_tok, '='); + + if (getgrnam_r(p + 1, &gr, pwbuf, pwlen, &pgr) != 0 || pgr == NULL) { + msg_err("bad group: %s", p + 1); + if (pgr == NULL) { + errno = ENOENT; + } + goto err; + } + + has_group = true; + addr->u.un->group = gr.gr_gid; + } + cur_tok++; + } + + g_free(pwbuf); + + if (!pool) { + g_strfreev(tokens); + } + + if (target) { + rspamd_ip_validate_af(addr); + *target = addr; + } + else { + if (!pool) { + rspamd_inet_address_free(addr); + } + } + + return TRUE; + +err: + + g_free(pwbuf); + + if (!pool) { + g_strfreev(tokens); + rspamd_inet_address_free(addr); + } + + return FALSE; +} + +gboolean +rspamd_parse_inet_address_ip4(const guchar *text, gsize len, gpointer target) +{ + const guchar *p; + guchar c; + guint32 addr = 0, *addrptr = target; + guint octet = 0, n = 0; + + g_assert(text != NULL); + g_assert(target != NULL); + + if (len == 0) { + len = strlen(text); + } + + for (p = text; p < text + len; p++) { + c = *p; + + if (c >= '0' && c <= '9') { + octet = octet * 10 + (c - '0'); + + if (octet > 255) { + return FALSE; + } + + continue; + } + + if (c == '.') { + addr = (addr << 8) + octet; + octet = 0; + n++; + continue; + } + + return FALSE; + } + + if (n == 3) { + addr = (addr << 8) + octet; + *addrptr = ntohl(addr); + + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_parse_inet_address_ip6(const guchar *text, gsize len, gpointer target) +{ + guchar t, *zero = NULL, *s, *d, *addr = target; + const guchar *p, *digit = NULL, *percent; + gsize len4 = 0; + guint n = 8, nibbles = 0, word = 0; + + g_assert(text != NULL); + g_assert(target != NULL); + + p = text; + if (len == 0) { + len = strlen(text); + } + + /* Check IPv6 scope */ + if ((percent = memchr(p, '%', len)) != NULL && percent > p) { + len = percent - p; /* Ignore scope */ + } + + if (len > sizeof("IPv6:") - 1 && + g_ascii_strncasecmp(p, "IPv6:", sizeof("IPv6:") - 1) == 0) { + /* Special case, SMTP conformant IPv6 address */ + p += sizeof("IPv6:") - 1; + len -= sizeof("IPv6:") - 1; + } + + if (*p == '[' && len > 1 && p[len - 1] == ']') { + /* Strip [] as well */ + p++; + len -= 2; + } + + /* Ignore leading colon */ + if (len > 0 && *p == ':') { + p++; + len--; + } + + for (/* void */; len; len--) { + t = *p++; + + if (t == ':') { + if (nibbles) { + digit = p; + len4 = len; + *addr++ = (u_char) (word >> 8); + *addr++ = (u_char) (word & 0xff); + + if (--n) { + nibbles = 0; + word = 0; + continue; + } + } + else { + if (zero == NULL) { + digit = p; + len4 = len; + zero = addr; + continue; + } + } + + return FALSE; + } + + if (t == '.' && nibbles) { + if (n < 2 || digit == NULL) { + return FALSE; + } + + /* IPv4 encoded in IPv6 */ + if (!rspamd_parse_inet_address_ip4(digit, len4 - 1, &word)) { + return FALSE; + } + + word = ntohl(word); + *addr++ = (guchar) ((word >> 24) & 0xff); + *addr++ = (guchar) ((word >> 16) & 0xff); + n--; + break; + } + + if (++nibbles > 4) { + /* Too many digits */ + return FALSE; + } + + /* Restore from hex */ + if (t >= '0' && t <= '9') { + word = word * 16 + (t - '0'); + continue; + } + + t |= 0x20; + + if (t >= 'a' && t <= 'f') { + word = word * 16 + (t - 'a') + 10; + continue; + } + + return FALSE; + } + + if (nibbles == 0 && zero == NULL) { + return FALSE; + } + + *addr++ = (guchar) (word >> 8); + *addr++ = (guchar) (word & 0xff); + + if (--n) { + if (zero) { + n *= 2; + s = addr - 1; + d = s + n; + while (s >= zero) { + *d-- = *s--; + } + memset(zero, 0, n); + + return TRUE; + } + } + else { + if (zero == NULL) { + return TRUE; + } + } + + return FALSE; +} + +/* Checks for ipv6 mapped address */ +static rspamd_inet_addr_t * +rspamd_inet_address_v6_maybe_map(const struct sockaddr_in6 *sin6, + rspamd_mempool_t *pool) +{ + rspamd_inet_addr_t *addr = NULL; + /* 10 zero bytes or 80 bits */ + static const guint8 mask[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + const guint8 *p; + + if (memcmp((const guint8 *) &sin6->sin6_addr, mask, sizeof(mask)) == 0) { + p = (const guint8 *) &sin6->sin6_addr; + + if ((p[10] == 0xff && p[11] == 0xff)) { + addr = rspamd_inet_addr_create(AF_INET, pool); + memcpy(&addr->u.in.addr.s4.sin_addr, &p[12], + sizeof(struct in_addr)); + } + else { + /* Something strange but not mapped v4 address */ + addr = rspamd_inet_addr_create(AF_INET6, pool); + memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr, + sizeof(struct in6_addr)); + } + } + else { + addr = rspamd_inet_addr_create(AF_INET6, pool); + memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr, + sizeof(struct in6_addr)); + } + + return addr; +} + +static void +rspamd_inet_address_v6_maybe_map_static(const struct sockaddr_in6 *sin6, + rspamd_inet_addr_t *addr) +{ + /* 10 zero bytes or 80 bits */ + static const guint8 mask[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + const guint8 *p; + + if (memcmp((const guint8 *) &sin6->sin6_addr, mask, sizeof(mask)) == 0) { + p = (const guint8 *) &sin6->sin6_addr; + + if ((p[10] == 0xff && p[11] == 0xff)) { + memcpy(&addr->u.in.addr.s4.sin_addr, &p[12], + sizeof(struct in_addr)); + addr->af = AF_INET; + addr->slen = sizeof(addr->u.in.addr.s4); + } + else { + /* Something strange but not mapped v4 address */ + memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr, + sizeof(struct in6_addr)); + addr->af = AF_INET6; + addr->slen = sizeof(addr->u.in.addr.s6); + } + } + else { + memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr, + sizeof(struct in6_addr)); + addr->af = AF_INET6; + addr->slen = sizeof(addr->u.in.addr.s6); + } +} + +static gboolean +rspamd_parse_inet_address_common(rspamd_inet_addr_t **target, + const char *src, + gsize srclen, + rspamd_mempool_t *pool, + enum rspamd_inet_address_parse_flags how) +{ + gboolean ret = FALSE; + rspamd_inet_addr_t *addr = NULL; + union sa_inet su; + const char *end = NULL; + char ipbuf[INET6_ADDRSTRLEN + 1]; + guint iplen; + gulong portnum; + + if (srclen == 0) { + return FALSE; + } + + g_assert(src != NULL); + g_assert(target != NULL); + + rspamd_ip_check_ipv6(); + + if (!(how & RSPAMD_INET_ADDRESS_PARSE_NO_UNIX) && + (src[0] == '/' || src[0] == '.')) { + return rspamd_parse_unix_path(target, src, srclen, pool, how); + } + + if (src[0] == '[') { + const gchar *ip_start; + /* Ipv6 address in format [::1]:port or just [::1] */ + end = memchr(src + 1, ']', srclen - 1); + + if (end == NULL) { + return FALSE; + } + + iplen = end - src - 1; + + if (iplen == 0 || iplen >= sizeof(ipbuf)) { + return FALSE; + } + + ip_start = src + 1; + rspamd_strlcpy(ipbuf, ip_start, iplen + 1); + + if (rspamd_parse_inet_address_ip6(ipbuf, iplen, + &su.s6.sin6_addr)) { + addr = rspamd_inet_address_v6_maybe_map(&su.s6, pool); + ret = TRUE; + } + + if (!(how & RSPAMD_INET_ADDRESS_PARSE_NO_PORT) && ret && end[1] == ':') { + /* Port part */ + rspamd_strtoul(end + 1, srclen - iplen - 3, &portnum); + rspamd_inet_address_set_port(addr, portnum); + } + } + else { + + if (!(how & RSPAMD_INET_ADDRESS_PARSE_NO_PORT) && + (end = memchr(src, ':', srclen)) != NULL) { + /* This is either port number and ipv4 addr or ipv6 addr */ + /* Search for another semicolon */ + if (memchr(end + 1, ':', srclen - (end - src + 1)) && + rspamd_parse_inet_address_ip6(src, srclen, + &su.s6.sin6_addr)) { + addr = rspamd_inet_address_v6_maybe_map(&su.s6, pool); + ret = TRUE; + } + else { + /* Not ipv6, so try ip:port */ + iplen = end - src; + + if (iplen >= sizeof(ipbuf) || iplen <= 1) { + return FALSE; + } + else { + rspamd_strlcpy(ipbuf, src, iplen + 1); + } + + if (rspamd_parse_inet_address_ip4(ipbuf, iplen, + &su.s4.sin_addr)) { + addr = rspamd_inet_addr_create(AF_INET, pool); + memcpy(&addr->u.in.addr.s4.sin_addr, &su.s4.sin_addr, + sizeof(struct in_addr)); + rspamd_strtoul(end + 1, srclen - iplen - 1, &portnum); + rspamd_inet_address_set_port(addr, portnum); + ret = TRUE; + } + } + } + else { + if (rspamd_parse_inet_address_ip4(src, srclen, &su.s4.sin_addr)) { + addr = rspamd_inet_addr_create(AF_INET, pool); + memcpy(&addr->u.in.addr.s4.sin_addr, &su.s4.sin_addr, + sizeof(struct in_addr)); + ret = TRUE; + } + else if (rspamd_parse_inet_address_ip6(src, srclen, &su.s6.sin6_addr)) { + addr = rspamd_inet_address_v6_maybe_map(&su.s6, pool); + ret = TRUE; + } + } + } + + if (ret && target) { + *target = addr; + } + + return ret; +} + +gboolean +rspamd_parse_inet_address(rspamd_inet_addr_t **target, + const char *src, + gsize srclen, + enum rspamd_inet_address_parse_flags how) +{ + return rspamd_parse_inet_address_common(target, src, srclen, NULL, how); +} + +rspamd_inet_addr_t * +rspamd_parse_inet_address_pool(const char *src, + gsize srclen, + rspamd_mempool_t *pool, + enum rspamd_inet_address_parse_flags how) +{ + rspamd_inet_addr_t *ret = NULL; + + if (!rspamd_parse_inet_address_common(&ret, src, srclen, pool, how)) { + return NULL; + } + + return ret; +} + +gboolean +rspamd_parse_inet_address_ip(const char *src, gsize srclen, + rspamd_inet_addr_t *target) +{ + const char *end; + char ipbuf[INET6_ADDRSTRLEN + 1]; + guint iplen; + gulong portnum; + gboolean ret = FALSE; + union sa_inet su; + + g_assert(target != NULL); + g_assert(src != NULL); + + if (src[0] == '[') { + /* Ipv6 address in format [::1]:port or just [::1] */ + end = memchr(src + 1, ']', srclen - 1); + + if (end == NULL) { + return FALSE; + } + + iplen = end - src - 1; + + if (iplen == 0 || iplen >= sizeof(ipbuf)) { + return FALSE; + } + + rspamd_strlcpy(ipbuf, src + 1, iplen + 1); + + if (rspamd_parse_inet_address_ip6(ipbuf, iplen, + &su.s6.sin6_addr)) { + rspamd_inet_address_v6_maybe_map_static(&su.s6, target); + ret = TRUE; + } + + if (ret && end[1] == ':') { + /* Port part */ + rspamd_strtoul(end + 1, srclen - iplen - 3, &portnum); + rspamd_inet_address_set_port(target, portnum); + } + } + else { + + if ((end = memchr(src, ':', srclen)) != NULL) { + /* This is either port number and ipv4 addr or ipv6 addr */ + /* Search for another semicolon */ + if (memchr(end + 1, ':', srclen - (end - src + 1)) && + rspamd_parse_inet_address_ip6(src, srclen, &su.s6.sin6_addr)) { + rspamd_inet_address_v6_maybe_map_static(&su.s6, target); + ret = TRUE; + } + else { + /* Not ipv6, so try ip:port */ + iplen = end - src; + + if (iplen >= sizeof(ipbuf) || iplen <= 1) { + return FALSE; + } + else { + rspamd_strlcpy(ipbuf, src, iplen + 1); + } + + if (rspamd_parse_inet_address_ip4(ipbuf, iplen, + &su.s4.sin_addr)) { + memcpy(&target->u.in.addr.s4.sin_addr, &su.s4.sin_addr, + sizeof(struct in_addr)); + target->af = AF_INET; + target->slen = sizeof(target->u.in.addr.s4); + rspamd_strtoul(end + 1, srclen - iplen - 1, &portnum); + rspamd_inet_address_set_port(target, portnum); + ret = TRUE; + } + } + } + else { + if (rspamd_parse_inet_address_ip4(src, srclen, &su.s4.sin_addr)) { + memcpy(&target->u.in.addr.s4.sin_addr, &su.s4.sin_addr, + sizeof(struct in_addr)); + target->af = AF_INET; + target->slen = sizeof(target->u.in.addr.s4); + ret = TRUE; + } + else if (rspamd_parse_inet_address_ip6(src, srclen, + &su.s6.sin6_addr)) { + rspamd_inet_address_v6_maybe_map_static(&su.s6, target); + ret = TRUE; + } + } + } + + return ret; +} + +/* + * This is used to allow rspamd_inet_address_to_string to be used several times + * at the same function invocation, like printf("%s -> %s", f(ip1), f(ip2)); + * Yes, it is bad but it helps to utilise this function without temporary buffers + * for up to 5 simultaneous invocations. + */ +#define NADDR_BUFS 5 + +const char * +rspamd_inet_address_to_string(const rspamd_inet_addr_t *addr) +{ + static char addr_str[NADDR_BUFS][INET6_ADDRSTRLEN + 1]; + static guint cur_addr = 0; + char *addr_buf; + + if (addr == NULL) { + return "<empty inet address>"; + } + + addr_buf = addr_str[cur_addr++ % NADDR_BUFS]; + + switch (addr->af) { + case AF_INET: + return inet_ntop(addr->af, &addr->u.in.addr.s4.sin_addr, addr_buf, + INET6_ADDRSTRLEN + 1); + case AF_INET6: + return inet_ntop(addr->af, &addr->u.in.addr.s6.sin6_addr, addr_buf, + INET6_ADDRSTRLEN + 1); + case AF_UNIX: + return addr->u.un->addr.sun_path; + } + + return "undefined"; +} + +#define PRETTY_IP_BUFSIZE 128 + +const char * +rspamd_inet_address_to_string_pretty(const rspamd_inet_addr_t *addr) +{ + static char addr_str[NADDR_BUFS][PRETTY_IP_BUFSIZE]; + static guint cur_addr = 0; + char *addr_buf; + + if (addr == NULL) { + return "<empty inet address>"; + } + + addr_buf = addr_str[cur_addr++ % NADDR_BUFS]; + + switch (addr->af) { + case AF_INET: + rspamd_snprintf(addr_buf, PRETTY_IP_BUFSIZE, "%s:%d", + rspamd_inet_address_to_string(addr), + rspamd_inet_address_get_port(addr)); + break; + case AF_INET6: + rspamd_snprintf(addr_buf, PRETTY_IP_BUFSIZE, "[%s]:%d", + rspamd_inet_address_to_string(addr), + rspamd_inet_address_get_port(addr)); + break; + case AF_UNIX: + rspamd_snprintf(addr_buf, PRETTY_IP_BUFSIZE, "unix:%s", + rspamd_inet_address_to_string(addr)); + break; + } + + return addr_buf; +} + +uint16_t +rspamd_inet_address_get_port(const rspamd_inet_addr_t *addr) +{ + switch (addr->af) { + case AF_INET: + return ntohs(addr->u.in.addr.s4.sin_port); + case AF_INET6: + return ntohs(addr->u.in.addr.s6.sin6_port); + } + + return 0; +} + +void rspamd_inet_address_set_port(rspamd_inet_addr_t *addr, uint16_t port) +{ + switch (addr->af) { + case AF_INET: + addr->u.in.addr.s4.sin_port = htons(port); + break; + case AF_INET6: + addr->u.in.addr.s6.sin6_port = htons(port); + break; + } +} + +int rspamd_inet_address_connect(const rspamd_inet_addr_t *addr, gint type, + gboolean async) +{ + int fd, r; + const struct sockaddr *sa; + + if (addr == NULL) { + return -1; + } + + fd = rspamd_socket_create(addr->af, type, 0, async); + if (fd == -1) { + return -1; + } + + if (addr->af == AF_UNIX) { + sa = (const struct sockaddr *) &addr->u.un->addr; + + if (type == (int) SOCK_DGRAM) { + struct sockaddr ca; + + memset(&ca, 0, sizeof(ca)); + ca.sa_family = AF_UNIX; + + r = bind(fd, &ca, sizeof(sa_family_t)); + if (r == -1) { + msg_info("unix socket client autobind failed: %s, '%s'", + addr->u.un->addr.sun_path, strerror(errno)); + } + } + } + else { + sa = &addr->u.in.addr.sa; + } + + r = connect(fd, sa, addr->slen); + + if (r == -1) { + if (!async || errno != EINPROGRESS) { + close(fd); + msg_info("connect %s failed: %d, '%s'", + rspamd_inet_address_to_string_pretty(addr), + errno, strerror(errno)); + return -1; + } + } + + return fd; +} + +int rspamd_inet_address_listen(const rspamd_inet_addr_t *addr, gint type, + enum rspamd_inet_address_listen_opts opts, + gint listen_queue) +{ + gint fd, r; + gint on = 1, serrno; + const struct sockaddr *sa; + const char *path; + + if (addr == NULL) { + return -1; + } + + fd = rspamd_socket_create(addr->af, type, 0, + (opts & RSPAMD_INET_ADDRESS_LISTEN_ASYNC)); + if (fd == -1) { + return -1; + } + + if (addr->af == AF_UNIX && access(addr->u.un->addr.sun_path, W_OK) != -1) { + /* Unlink old socket */ + (void) unlink(addr->u.un->addr.sun_path); + } + + if (addr->af == AF_UNIX) { + sa = (const struct sockaddr *) &addr->u.un->addr; + } + else { + sa = &addr->u.in.addr.sa; + } + +#if defined(SO_REUSEADDR) + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *) &on, sizeof(gint)) == -1) { + msg_err("cannot set SO_REUSEADDR on %s (fd=%d): %s", + rspamd_inet_address_to_string_pretty(addr), + fd, strerror(errno)); + goto err; + } +#endif + +#if defined(SO_REUSEPORT) && defined(LINUX) + if (opts & RSPAMD_INET_ADDRESS_LISTEN_REUSEPORT) { + on = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (const void *) &on, sizeof(gint)) == -1) { + msg_err("cannot set SO_REUSEPORT on %s (fd=%d): %s", + rspamd_inet_address_to_string_pretty(addr), + fd, strerror(errno)); + goto err; + } + } +#endif + +#ifdef HAVE_IPV6_V6ONLY + if (addr->af == AF_INET6) { + /* We need to set this flag to avoid errors */ + on = 1; +#ifdef SOL_IPV6 + (void) setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, (const void *) &on, sizeof(gint)); +#elif defined(IPPROTO_IPV6) + (void) setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *) &on, sizeof(gint)); +#endif + } +#endif + + r = bind(fd, sa, addr->slen); + if (r == -1) { + if (!(opts & RSPAMD_INET_ADDRESS_LISTEN_ASYNC) || errno != EINPROGRESS) { + msg_warn("bind %s failed: %d, '%s'", + rspamd_inet_address_to_string_pretty(addr), + errno, + strerror(errno)); + + goto err; + } + } + + if (addr->af == AF_UNIX) { + path = addr->u.un->addr.sun_path; + /* Try to set mode and owner */ + + if (addr->u.un->owner != (uid_t) -1 || addr->u.un->group != (gid_t) -1) { + if (chown(path, addr->u.un->owner, addr->u.un->group) == -1) { + msg_info("cannot change owner for %s to %d:%d: %s", + path, addr->u.un->owner, addr->u.un->group, + strerror(errno)); + } + } + + if (chmod(path, addr->u.un->mode) == -1) { + msg_info("cannot change mode for %s to %od %s", + path, addr->u.un->mode, strerror(errno)); + } + } + + if (type != (int) SOCK_DGRAM) { + + if (!(opts & RSPAMD_INET_ADDRESS_LISTEN_NOLISTEN)) { + r = listen(fd, listen_queue); + + if (r == -1) { + msg_warn("listen %s failed: %d, '%s'", + rspamd_inet_address_to_string_pretty(addr), + errno, strerror(errno)); + + goto err; + } + } + } + + return fd; + +err: + /* Error path */ + serrno = errno; + + if (fd != -1) { + close(fd); + } + + errno = serrno; + + return -1; +} + +gssize +rspamd_inet_address_recvfrom(gint fd, void *buf, gsize len, gint fl, + rspamd_inet_addr_t **target) +{ + gssize ret; + union sa_union su; + socklen_t slen = sizeof(su); + rspamd_inet_addr_t *addr = NULL; + + if ((ret = recvfrom(fd, buf, len, fl, &su.sa, &slen)) == -1) { + if (target) { + *target = NULL; + } + + return -1; + } + + if (target) { + addr = rspamd_inet_addr_create(su.sa.sa_family, NULL); + addr->slen = slen; + + if (addr->af == AF_UNIX) { + addr->u.un = g_malloc(sizeof(*addr->u.un)); + memcpy(&addr->u.un->addr, &su.su, sizeof(struct sockaddr_un)); + } + else { + memcpy(&addr->u.in.addr, &su.sa, MIN(slen, sizeof(addr->u.in.addr))); + } + + *target = addr; + } + + return (ret); +} + +gssize +rspamd_inet_address_sendto(gint fd, const void *buf, gsize len, gint fl, + const rspamd_inet_addr_t *addr) +{ + gssize r; + const struct sockaddr *sa; + + if (addr == NULL) { +#ifdef EADDRNOTAVAIL + errno = EADDRNOTAVAIL; +#endif + return -1; + } + + if (addr->af == AF_UNIX) { + sa = (struct sockaddr *) &addr->u.un->addr; + } + else { + sa = &addr->u.in.addr.sa; + } + + r = sendto(fd, buf, len, fl, sa, addr->slen); + + return r; +} + +static gboolean +rspamd_check_port_priority(const char *line, guint default_port, + guint *priority, gchar *out, + gsize outlen, rspamd_mempool_t *pool) +{ + guint real_port = default_port, real_priority = 0; + gchar *err_str, *err_str_prio; + + if (line && line[0] == ':') { + errno = 0; + real_port = strtoul(line + 1, &err_str, 10); + + if (err_str && *err_str == ':') { + /* We have priority */ + real_priority = strtoul(err_str + 1, &err_str_prio, 10); + + if (err_str_prio && *err_str_prio != '\0') { + msg_err_pool_check( + "cannot parse priority: %s, at symbol %c, error: %s", + line, + *err_str_prio, + strerror(errno)); + + return FALSE; + } + } + else if (err_str && *err_str != '\0') { + msg_err_pool_check( + "cannot parse port: %s, at symbol %c, error: %s", + line, + *err_str, + strerror(errno)); + + return FALSE; + } + } + + if (priority) { + *priority = real_priority; + } + + rspamd_snprintf(out, outlen, "%ud", real_port); + + return TRUE; +} + +static enum rspamd_parse_host_port_result +rspamd_resolve_addrs(const char *begin, size_t len, GPtrArray **addrs, + const gchar *portbuf, gint flags, + rspamd_mempool_t *pool) +{ + struct addrinfo hints, *res, *cur; + rspamd_inet_addr_t *cur_addr = NULL; + gint r, addr_cnt; + gchar *addr_cpy = NULL; + enum rspamd_parse_host_port_result ret = RSPAMD_PARSE_ADDR_FAIL; + + rspamd_ip_check_ipv6(); + + if (rspamd_parse_inet_address(&cur_addr, + begin, len, RSPAMD_INET_ADDRESS_PARSE_DEFAULT) && + cur_addr != NULL) { + if (*addrs == NULL) { + *addrs = g_ptr_array_new_full(1, + (GDestroyNotify) rspamd_inet_address_free); + + if (pool != NULL) { + rspamd_mempool_add_destructor(pool, + rspamd_ptr_array_free_hard, *addrs); + } + } + + rspamd_inet_address_set_port(cur_addr, strtoul(portbuf, NULL, 10)); + g_ptr_array_add(*addrs, cur_addr); + ret = RSPAMD_PARSE_ADDR_NUMERIC; + } + else { + memset(&hints, 0, sizeof(hints)); + hints.ai_socktype = SOCK_STREAM; /* Type of the socket */ + hints.ai_flags = AI_NUMERICSERV | flags; + + if (len > 0) { + if (pool) { + addr_cpy = rspamd_mempool_alloc(pool, len + 1); + } + else { + addr_cpy = g_malloc(len + 1); + } + + rspamd_strlcpy(addr_cpy, begin, len + 1); + } + /* Otherwise it will be NULL */ + + if (ipv6_status == RSPAMD_IPV6_SUPPORTED) { + hints.ai_family = AF_UNSPEC; + } + else { + hints.ai_family = AF_INET; + } + + if ((r = getaddrinfo(addr_cpy, portbuf, &hints, &res)) == 0) { + /* Now copy up to max_addrs of addresses */ + addr_cnt = 0; + cur = res; + while (cur) { + cur = cur->ai_next; + addr_cnt++; + } + + if (*addrs == NULL) { + *addrs = g_ptr_array_new_full(addr_cnt, + (GDestroyNotify) rspamd_inet_address_free); + + if (pool != NULL) { + rspamd_mempool_add_destructor(pool, + rspamd_ptr_array_free_hard, *addrs); + } + } + + cur = res; + while (cur) { + cur_addr = rspamd_inet_address_from_sa(cur->ai_addr, + cur->ai_addrlen); + + if (cur_addr != NULL) { + g_ptr_array_add(*addrs, cur_addr); + } + cur = cur->ai_next; + } + + freeaddrinfo(res); + ret = RSPAMD_PARSE_ADDR_RESOLVED; + } + else if (addr_cpy) { + msg_err_pool_check("address resolution for %s failed: %s", + addr_cpy, + gai_strerror(r)); + + if (pool == NULL) { + g_free(addr_cpy); + } + + return RSPAMD_PARSE_ADDR_FAIL; + } + else { + /* Should never ever happen */ + g_assert(0); + } + } + + if (pool == NULL) { + g_free(addr_cpy); + } + + return ret; +} + +enum rspamd_parse_host_port_result +rspamd_parse_host_port_priority(const gchar *str, + GPtrArray **addrs, + guint *priority, + gchar **name_ptr, + guint default_port, + gboolean allow_listen, + rspamd_mempool_t *pool) +{ + gchar portbuf[8]; + const gchar *p, *name = NULL; + gsize namelen; + rspamd_inet_addr_t *cur_addr = NULL; + enum rspamd_parse_host_port_result ret = RSPAMD_PARSE_ADDR_FAIL; + union sa_union su; + + /* + * In this function, we can have several possibilities: + * 1) Unix socket: check for '.' or '/' at the begin of string + * 2) \[ipv6\]: check for '[' at the beginning + * 3) '*': means listening on any address + * 4) ip|host[:port[:priority]] + */ + + if (allow_listen && str[0] == '*') { + bool v4_any = true, v6_any = true; + + p = &str[1]; + + if (g_ascii_strncasecmp(p, "v4", 2) == 0) { + p += 2; + name = "*v4"; + v6_any = false; + } + else if (g_ascii_strncasecmp(p, "v6", 2) == 0) { + p += 2; + name = "*v6"; + v4_any = false; + } + else { + name = "*"; + } + + if (!rspamd_check_port_priority(p, default_port, priority, + portbuf, sizeof(portbuf), pool)) { + return ret; + } + + if (*addrs == NULL) { + *addrs = g_ptr_array_new_full(1, + pool == NULL ? NULL : (GDestroyNotify) rspamd_inet_address_free); + + if (pool != NULL) { + rspamd_mempool_add_destructor(pool, + rspamd_ptr_array_free_hard, *addrs); + } + } + + if (v4_any) { + cur_addr = rspamd_inet_addr_create(AF_INET, NULL); + rspamd_parse_inet_address_ip4("0.0.0.0", + sizeof("0.0.0.0") - 1, &su.s4.sin_addr); + memcpy(&cur_addr->u.in.addr.s4.sin_addr, &su.s4.sin_addr, + sizeof(struct in_addr)); + rspamd_inet_address_set_port(cur_addr, + strtoul(portbuf, NULL, 10)); + g_ptr_array_add(*addrs, cur_addr); + } + if (v6_any) { + cur_addr = rspamd_inet_addr_create(AF_INET6, NULL); + rspamd_parse_inet_address_ip6("::", + sizeof("::") - 1, &su.s6.sin6_addr); + memcpy(&cur_addr->u.in.addr.s6.sin6_addr, &su.s6.sin6_addr, + sizeof(struct in6_addr)); + rspamd_inet_address_set_port(cur_addr, + strtoul(portbuf, NULL, 10)); + g_ptr_array_add(*addrs, cur_addr); + } + + namelen = strlen(name); + ret = RSPAMD_PARSE_ADDR_NUMERIC; /* No resolution here */ + } + else if (str[0] == '[') { + /* This is braced IPv6 address */ + p = strchr(str, ']'); + + if (p == NULL) { + msg_err_pool_check("cannot parse address definition %s: %s", + str, + strerror(EINVAL)); + + return ret; + } + + name = str + 1; + namelen = p - str - 1; + + if (!rspamd_check_port_priority(p + 1, default_port, priority, portbuf, + sizeof(portbuf), pool)) { + return ret; + } + + ret = rspamd_resolve_addrs(name, namelen, addrs, portbuf, 0, pool); + } + else if (str[0] == '/' || str[0] == '.') { + /* Special case of unix socket, as getaddrinfo cannot deal with them */ + if (*addrs == NULL) { + *addrs = g_ptr_array_new_full(1, + (GDestroyNotify) rspamd_inet_address_free); + + if (pool != NULL) { + rspamd_mempool_add_destructor(pool, + rspamd_ptr_array_free_hard, *addrs); + } + } + + if (!rspamd_parse_inet_address(&cur_addr, + str, strlen(str), RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + msg_err_pool_check("cannot parse unix socket definition %s: %s", + str, + strerror(errno)); + + return ret; + } + + g_ptr_array_add(*addrs, cur_addr); + name = str; + namelen = strlen(str); + ret = RSPAMD_PARSE_ADDR_NUMERIC; /* No resolution here: unix socket */ + } + else { + p = strchr(str, ':'); + + if (p == NULL) { + /* Just address or IP */ + name = str; + namelen = strlen(str); + rspamd_check_port_priority("", default_port, priority, portbuf, + sizeof(portbuf), pool); + + ret = rspamd_resolve_addrs(name, namelen, addrs, + portbuf, 0, pool); + } + else { + const gchar *second_semicolon = strchr(p + 1, ':'); + + name = str; + + if (second_semicolon) { + /* name + port part excluding priority */ + namelen = second_semicolon - str; + } + else { + /* Full ip/name + port */ + namelen = strlen(str); + } + + if (!rspamd_check_port_priority(p, default_port, priority, portbuf, + sizeof(portbuf), pool)) { + return ret; + } + + ret = rspamd_resolve_addrs(str, p - str, addrs, + portbuf, 0, pool); + } + } + + if (name_ptr != NULL) { + if (pool) { + *name_ptr = rspamd_mempool_alloc(pool, namelen + 1); + } + else { + *name_ptr = g_malloc(namelen + 1); + } + + rspamd_strlcpy(*name_ptr, name, namelen + 1); + } + + return ret; +} + +guchar * +rspamd_inet_address_get_hash_key(const rspamd_inet_addr_t *addr, guint *klen) +{ + guchar *res = NULL; + static struct in_addr local = {INADDR_LOOPBACK}; + + g_assert(addr != NULL); + g_assert(klen != NULL); + + if (addr->af == AF_INET) { + *klen = sizeof(struct in_addr); + res = (guchar *) &addr->u.in.addr.s4.sin_addr; + } + else if (addr->af == AF_INET6) { + *klen = sizeof(struct in6_addr); + res = (guchar *) &addr->u.in.addr.s6.sin6_addr; + } + else if (addr->af == AF_UNIX) { + *klen = sizeof(struct in_addr); + res = (guchar *) &local; + } + else { + *klen = 0; + res = NULL; + } + + return res; +} + + +rspamd_inet_addr_t * +rspamd_inet_address_new(int af, const void *init) +{ + rspamd_inet_addr_t *addr; + + addr = rspamd_inet_addr_create(af, NULL); + + if (init != NULL) { + if (af == AF_UNIX) { + /* Init is a path */ + rspamd_strlcpy(addr->u.un->addr.sun_path, init, + sizeof(addr->u.un->addr.sun_path)); +#if defined(FREEBSD) || defined(__APPLE__) + addr->u.un->addr.sun_len = SUN_LEN(&addr->u.un->addr); +#endif + } + else if (af == AF_INET) { + memcpy(&addr->u.in.addr.s4.sin_addr, init, sizeof(struct in_addr)); + } + else if (af == AF_INET6) { + memcpy(&addr->u.in.addr.s6.sin6_addr, init, sizeof(struct in6_addr)); + } + } + + return addr; +} + +rspamd_inet_addr_t * +rspamd_inet_address_from_sa(const struct sockaddr *sa, socklen_t slen) +{ + rspamd_inet_addr_t *addr; + + g_assert(sa != NULL); + /* Address of an AF_UNIX socket can be tiny */ + g_assert(slen >= sizeof(sa_family_t) + 1); + + addr = rspamd_inet_addr_create(sa->sa_family, NULL); + + if (sa->sa_family == AF_UNIX) { + /* Init is a path */ + const struct sockaddr_un *un = (const struct sockaddr_un *) sa; + + g_assert(slen >= SUN_LEN(un)); + g_assert(slen <= sizeof(addr->u.un->addr)); + + /* sun_path can legally contain intermittent NULL bytes */ + memcpy(&addr->u.un->addr, un, slen); + + /* length of AF_UNIX addresses is variable */ + addr->slen = slen; + } + else if (sa->sa_family == AF_INET) { + g_assert(slen >= sizeof(struct sockaddr_in)); + memcpy(&addr->u.in.addr.s4, sa, sizeof(struct sockaddr_in)); + } + else if (sa->sa_family == AF_INET6) { + g_assert(slen >= sizeof(struct sockaddr_in6)); + memcpy(&addr->u.in.addr.s6, sa, sizeof(struct sockaddr_in6)); + } + else { + /* XXX: currently we cannot deal with other AF */ + g_assert(0); + } + + return addr; +} + +rspamd_inet_addr_t * +rspamd_inet_address_from_rnds(const struct rdns_reply_entry *rep) +{ + rspamd_inet_addr_t *addr = NULL; + + g_assert(rep != NULL); + + if (rep->type == RDNS_REQUEST_A) { + addr = rspamd_inet_addr_create(AF_INET, NULL); + memcpy(&addr->u.in.addr.s4.sin_addr, &rep->content.a.addr, + sizeof(struct in_addr)); + } + else if (rep->type == RDNS_REQUEST_AAAA) { + addr = rspamd_inet_addr_create(AF_INET6, NULL); + memcpy(&addr->u.in.addr.s6.sin6_addr, &rep->content.aaa.addr, + sizeof(struct in6_addr)); + } + + return addr; +} + +void rspamd_inet_address_apply_mask(rspamd_inet_addr_t *addr, guint mask) +{ + guint32 umsk, *p; + + if (mask > 0 && addr != NULL) { + if (addr->af == AF_INET && mask <= 32) { + umsk = htonl(G_MAXUINT32 << (32 - mask)); + addr->u.in.addr.s4.sin_addr.s_addr &= umsk; + } + else if (addr->af == AF_INET6 && mask <= 128) { + p = (uint32_t *) &addr->u.in.addr.s6.sin6_addr; + mask = 128 - mask; + p += 3; + + for (;;) { + if (mask >= 32) { + mask -= 32; + *p = 0; + } + else { + umsk = htonl(G_MAXUINT32 << mask); + *p &= umsk; + break; + } + + p--; + } + } + } +} + +static gint +rspamd_inet_address_af_order(const rspamd_inet_addr_t *addr) +{ + int ret; + + switch (addr->af) { + case AF_UNIX: + ret = 2; + break; + case AF_INET: + ret = 1; + break; + default: + ret = 0; + break; + } + + return ret; +} + +gint rspamd_inet_address_compare(const rspamd_inet_addr_t *a1, + const rspamd_inet_addr_t *a2, gboolean compare_ports) +{ + g_assert(a1 != NULL); + g_assert(a2 != NULL); + + if (a1->af != a2->af) { + return (rspamd_inet_address_af_order(a2) - + rspamd_inet_address_af_order(a1)); + } + else { + switch (a1->af) { + case AF_INET: + if (!compare_ports) { + return memcmp(&a1->u.in.addr.s4.sin_addr, + &a2->u.in.addr.s4.sin_addr, sizeof(struct in_addr)); + } + else { + if (a1->u.in.addr.s4.sin_port == a2->u.in.addr.s4.sin_port) { + return memcmp(&a1->u.in.addr.s4.sin_addr, + &a2->u.in.addr.s4.sin_addr, sizeof(struct in_addr)); + } + else { + return a1->u.in.addr.s4.sin_port - a2->u.in.addr.s4.sin_port; + } + } + case AF_INET6: + if (!compare_ports) { + return memcmp(&a1->u.in.addr.s6.sin6_addr, + &a2->u.in.addr.s6.sin6_addr, sizeof(struct in6_addr)); + } + else { + if (a1->u.in.addr.s6.sin6_port == a2->u.in.addr.s6.sin6_port) { + return memcmp(&a1->u.in.addr.s6.sin6_addr, + &a2->u.in.addr.s6.sin6_addr, sizeof(struct in6_addr)); + } + else { + return a1->u.in.addr.s6.sin6_port - a2->u.in.addr.s6.sin6_port; + } + } + case AF_UNIX: + return strncmp(a1->u.un->addr.sun_path, + a2->u.un->addr.sun_path, sizeof(a1->u.un->addr.sun_path)); + default: + return memcmp(&a1->u.in, &a2->u.in, sizeof(a1->u.in)); + } + } + + return 0; +} + +gint rspamd_inet_address_compare_ptr(gconstpointer a1, + gconstpointer a2) +{ + const rspamd_inet_addr_t **i1 = (const rspamd_inet_addr_t **) a1, + **i2 = (const rspamd_inet_addr_t **) a2; + + return rspamd_inet_address_compare(*i1, *i2, FALSE); +} + +rspamd_inet_addr_t * +rspamd_inet_address_copy(const rspamd_inet_addr_t *addr, rspamd_mempool_t *pool) +{ + rspamd_inet_addr_t *n; + + if (addr == NULL) { + return NULL; + } + + n = rspamd_inet_addr_create(addr->af, pool); + + if (n->af == AF_UNIX) { + memcpy(n->u.un, addr->u.un, sizeof(*addr->u.un)); + } + else { + memcpy(&n->u.in, &addr->u.in, sizeof(addr->u.in)); + } + + return n; +} + +gint rspamd_inet_address_get_af(const rspamd_inet_addr_t *addr) +{ + g_assert(addr != NULL); + + return addr->af; +} + +struct sockaddr * +rspamd_inet_address_get_sa(const rspamd_inet_addr_t *addr, + socklen_t *sz) +{ + g_assert(addr != NULL); + + if (addr->af == AF_UNIX) { + *sz = addr->slen; + return (struct sockaddr *) &addr->u.un->addr; + } + else { + *sz = addr->slen; + return (struct sockaddr *) &addr->u.in.addr.sa; + } +} + + +guint rspamd_inet_address_hash(gconstpointer a) +{ + const rspamd_inet_addr_t *addr = a; + struct { + gchar buf[sizeof(struct in6_addr)]; /* 16 bytes */ + int af; + } layout; + + gint32 k; + + if (addr->af == AF_UNIX && addr->u.un) { + rspamd_cryptobox_fast_hash_state_t st; + + rspamd_cryptobox_fast_hash_init(&st, rspamd_hash_seed()); + rspamd_cryptobox_fast_hash_update(&st, &addr->af, sizeof(addr->af)); + rspamd_cryptobox_fast_hash_update(&st, addr->u.un, sizeof(*addr->u.un)); + + return rspamd_cryptobox_fast_hash_final(&st); + } + else { + memset(&layout, 0, sizeof(layout)); + layout.af = addr->af; + + /* We ignore port part here */ + if (addr->af == AF_INET) { + memcpy(layout.buf, &addr->u.in.addr.s4.sin_addr, + sizeof(addr->u.in.addr.s4.sin_addr)); + } + else { + memcpy(layout.buf, &addr->u.in.addr.s6.sin6_addr, + sizeof(addr->u.in.addr.s6.sin6_addr)); + } + + k = rspamd_cryptobox_fast_hash(&layout, sizeof(layout), + rspamd_hash_seed()); + } + + return k; +} + +guint rspamd_inet_address_port_hash(gconstpointer a) +{ + const rspamd_inet_addr_t *addr = a; + struct { + gchar buf[sizeof(struct in6_addr)]; /* 16 bytes */ + int port; + int af; + } layout; + + gint32 k; + + if (addr->af == AF_UNIX && addr->u.un) { + rspamd_cryptobox_fast_hash_state_t st; + + rspamd_cryptobox_fast_hash_init(&st, rspamd_hash_seed()); + rspamd_cryptobox_fast_hash_update(&st, &addr->af, sizeof(addr->af)); + rspamd_cryptobox_fast_hash_update(&st, addr->u.un, sizeof(*addr->u.un)); + + return rspamd_cryptobox_fast_hash_final(&st); + } + else { + memset(&layout, 0, sizeof(layout)); + layout.af = addr->af; + + /* We consider port part here */ + if (addr->af == AF_INET) { + memcpy(layout.buf, &addr->u.in.addr.s4.sin_addr, + sizeof(addr->u.in.addr.s4.sin_addr)); + layout.port = addr->u.in.addr.s4.sin_port; + } + else { + memcpy(layout.buf, &addr->u.in.addr.s6.sin6_addr, + sizeof(addr->u.in.addr.s6.sin6_addr)); + layout.port = addr->u.in.addr.s6.sin6_port; + } + + k = rspamd_cryptobox_fast_hash(&layout, sizeof(layout), + rspamd_hash_seed()); + } + + return k; +} + +gboolean +rspamd_inet_address_equal(gconstpointer a, gconstpointer b) +{ + const rspamd_inet_addr_t *a1 = a, *a2 = b; + + return rspamd_inet_address_compare(a1, a2, FALSE) == 0; +} + +gboolean +rspamd_inet_address_port_equal(gconstpointer a, gconstpointer b) +{ + const rspamd_inet_addr_t *a1 = a, *a2 = b; + + return rspamd_inet_address_compare(a1, a2, TRUE) == 0; +} + +#ifndef IN6_IS_ADDR_LOOPBACK +#define IN6_IS_ADDR_LOOPBACK(a) \ + ((*(const __uint32_t *) (const void *) (&(a)->s6_addr[0]) == 0) && \ + (*(const __uint32_t *) (const void *) (&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *) (const void *) (&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *) (const void *) (&(a)->s6_addr[12]) == ntohl(1))) +#endif +#ifndef IN6_IS_ADDR_LINKLOCAL +#define IN6_IS_ADDR_LINKLOCAL(a) \ + (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) +#endif +#ifndef IN6_IS_ADDR_SITELOCAL +#define IN6_IS_ADDR_SITELOCAL(a) \ + (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) +#endif + +gboolean +rspamd_inet_address_is_local(const rspamd_inet_addr_t *addr) +{ + if (addr == NULL) { + return FALSE; + } + + if (addr->af == AF_UNIX) { + /* Always true for unix sockets */ + return TRUE; + } + else { + if (addr->af == AF_INET) { + if ((ntohl(addr->u.in.addr.s4.sin_addr.s_addr) & 0xff000000) == 0x7f000000) { + return TRUE; + } + } + else if (addr->af == AF_INET6) { + if (IN6_IS_ADDR_LOOPBACK(&addr->u.in.addr.s6.sin6_addr) || + IN6_IS_ADDR_LINKLOCAL(&addr->u.in.addr.s6.sin6_addr) || + IN6_IS_ADDR_SITELOCAL(&addr->u.in.addr.s6.sin6_addr)) { + return TRUE; + } + } + } + + return FALSE; +} + +void ** +rspamd_inet_library_init(void) +{ + return &local_addrs; +} + +void * +rspamd_inet_library_get_lib_ctx(void) +{ + return local_addrs; +} + +void rspamd_inet_library_destroy(void) +{ + /* Ugly: local_addrs will actually be freed by config object */ +} + +gsize rspamd_inet_address_storage_size(void) +{ + return sizeof(rspamd_inet_addr_t); +} diff --git a/src/libutil/addr.h b/src/libutil/addr.h new file mode 100644 index 0000000..25a3641 --- /dev/null +++ b/src/libutil/addr.h @@ -0,0 +1,356 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ADDR_H_ +#define ADDR_H_ + +#include "config.h" +#include "rdns.h" + +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> +#endif +#ifdef HAVE_NETINET_IN_H +#include <netinet/in.h> +#endif +#ifdef HAVE_ARPA_INET_H +#include <arpa/inet.h> +#endif +/* unix sockets */ +#ifdef HAVE_SYS_UN_H +#include <sys/un.h> +#endif + +#include "mem_pool.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Opaque structure + */ +typedef struct rspamd_inet_addr_s rspamd_inet_addr_t; + +/** + * Returns pointer storage for global singleton (map for local addresses) + * @return + */ +void **rspamd_inet_library_init(void); +/** + * Returns local addresses singleton + * @return + */ +void *rspamd_inet_library_get_lib_ctx(void); +/** + * Cleanup library (currently it does nothing) + */ +void rspamd_inet_library_destroy(void); + +/** + * Create new inet address structure based on the address family and opaque init pointer + * @param af + * @param init + * @return new inet addr + */ +rspamd_inet_addr_t *rspamd_inet_address_new(int af, const void *init); + +/** + * Create new inet address structure from struct sockaddr + * @param sa + * @param slen + * @return + */ +rspamd_inet_addr_t *rspamd_inet_address_from_sa(const struct sockaddr *sa, + socklen_t slen); + +/** + * Create new inet address from rdns reply + * @param rep reply element + * @return new ipv4 or ipv6 addr (port is NOT set) + */ +rspamd_inet_addr_t *rspamd_inet_address_from_rnds( + const struct rdns_reply_entry *rep); + +/** + * Parse string with ipv6 address of length `len` to `target` which should be + * at least sizeof (struct in6_addr) + * @param text input string + * @param len length of `text` (if 0, then `text` must be zero terminated) + * @param target target structure + * @return TRUE if the address has been parsed, otherwise `target` content is undefined + */ +gboolean rspamd_parse_inet_address_ip6(const guchar *text, gsize len, + gpointer target); + +enum rspamd_inet_address_parse_flags { + RSPAMD_INET_ADDRESS_PARSE_DEFAULT = 0, + RSPAMD_INET_ADDRESS_PARSE_REMOTE = 1u << 0u, + RSPAMD_INET_ADDRESS_PARSE_NO_UNIX = 1u << 1u, + RSPAMD_INET_ADDRESS_PARSE_NO_PORT = 1u << 2u, +}; + +/** + * Parse string with ipv4 address of length `len` to `target` which should be + * at least sizeof (in4_addr_t) + * @param text input string + * @param len length of `text` (if 0, then `text` must be zero terminated) + * @param target target structure + * @return TRUE if the address has been parsed, otherwise `target` content is undefined + */ +gboolean rspamd_parse_inet_address_ip4(const guchar *text, gsize len, + gpointer target); + +/** + * Parse ipv4 or ipv6 address to a static buffer `target`. Does not support Unix sockets + * @param src + * @param srclen + * @param target + * @return + */ +gboolean rspamd_parse_inet_address_ip(const char *src, + gsize srclen, + rspamd_inet_addr_t *target); + +/** + * Try to parse address from string + * @param target target to fill + * @param src IP string representation + * @return TRUE if addr has been parsed + */ +gboolean rspamd_parse_inet_address(rspamd_inet_addr_t **target, + const char *src, + gsize srclen, + enum rspamd_inet_address_parse_flags how); + +/** + * Use memory pool allocated inet address + * @param src + * @param srclen + * @param pool + * @return + */ +rspamd_inet_addr_t *rspamd_parse_inet_address_pool(const char *src, + gsize srclen, + rspamd_mempool_t *pool, + enum rspamd_inet_address_parse_flags how); + +/** + * Returns string representation of inet address + * @param addr + * @return statically allocated string pointer (not thread safe) + */ +const char *rspamd_inet_address_to_string(const rspamd_inet_addr_t *addr); + +/** + * Returns pretty string representation of inet address + * @param addr + * @return statically allocated string pointer (not thread safe) + */ +const char *rspamd_inet_address_to_string_pretty(const rspamd_inet_addr_t *addr); + +/** + * Returns port number for the specified inet address in host byte order + * @param addr + * @return + */ +uint16_t rspamd_inet_address_get_port(const rspamd_inet_addr_t *addr); + +/** + * Returns address family of inet address + * @param addr + * @return + */ +gint rspamd_inet_address_get_af(const rspamd_inet_addr_t *addr); + +/** + * Returns sockaddr and size for this address + * @param addr + * @param sz + * @return + */ +struct sockaddr *rspamd_inet_address_get_sa(const rspamd_inet_addr_t *addr, + socklen_t *sz); + +/** + * Makes a radix key from inet address + * @param addr + * @param klen + * @return + */ +guchar *rspamd_inet_address_get_hash_key(const rspamd_inet_addr_t *addr, guint *klen); + +/** + * Receive data from an unconnected socket and fill the inet_addr structure if needed + * @param fd + * @param buf + * @param len + * @param target + * @return same as recvfrom(2) + */ +gssize rspamd_inet_address_recvfrom(gint fd, void *buf, gsize len, gint fl, + rspamd_inet_addr_t **target); + +/** + * Send data via unconnected socket using the specified inet_addr structure + * @param fd + * @param buf + * @param len + * @param target + * @return + */ +gssize rspamd_inet_address_sendto(gint fd, const void *buf, gsize len, gint fl, + const rspamd_inet_addr_t *addr); + +/** + * Set port for inet address + */ +void rspamd_inet_address_set_port(rspamd_inet_addr_t *addr, uint16_t port); + +/** + * Connect to inet_addr address + * @param addr + * @param async perform operations asynchronously + * @return newly created and connected socket + */ +int rspamd_inet_address_connect(const rspamd_inet_addr_t *addr, gint type, + gboolean async); + +enum rspamd_inet_address_listen_opts { + RSPAMD_INET_ADDRESS_LISTEN_DEFAULT = 0, + RSPAMD_INET_ADDRESS_LISTEN_ASYNC = (1u << 0u), + RSPAMD_INET_ADDRESS_LISTEN_REUSEPORT = (1u << 1u), + RSPAMD_INET_ADDRESS_LISTEN_NOLISTEN = (1u << 2u), +}; +/** + * Listen on a specified inet address + * @param addr + * @param type + * @param opts + * @return + */ +int rspamd_inet_address_listen(const rspamd_inet_addr_t *addr, gint type, + enum rspamd_inet_address_listen_opts opts, + gint listen_queue); + +/** + * Check whether specified ip is valid (not INADDR_ANY or INADDR_NONE) for ipv4 or ipv6 + * @param ptr pointer to struct in_addr or struct in6_addr + * @param af address family (AF_INET or AF_INET6) + * @return TRUE if the address is valid + */ +gboolean rspamd_ip_is_valid(const rspamd_inet_addr_t *addr); + +typedef void (*rspamd_accept_throttling_handler)(gint, void *); + +/** + * Accept from listening socket filling addr structure + * @param sock listening socket + * @param target allocated inet addr structure + * @return + */ +gint rspamd_accept_from_socket(gint sock, + rspamd_inet_addr_t **target, + rspamd_accept_throttling_handler hdl, + void *hdl_data); + +enum rspamd_parse_host_port_result { + RSPAMD_PARSE_ADDR_FAIL = 0, + RSPAMD_PARSE_ADDR_RESOLVED = 1, + RSPAMD_PARSE_ADDR_NUMERIC = 2, +}; +/** + * Parse host[:port[:priority]] line + * @param ina host address + * @param port port + * @param priority priority + * @return RSPAMD_PARSE_ADDR_FAIL in case of error, RSPAMD_PARSE_ADDR_NUMERIC in case of pure ip/unix socket + */ +enum rspamd_parse_host_port_result +rspamd_parse_host_port_priority(const gchar *str, + GPtrArray **addrs, + guint *priority, gchar **name, + guint default_port, + gboolean allow_listen, + rspamd_mempool_t *pool); + +/** + * Destroy the specified IP address + * @param addr + */ +void rspamd_inet_address_free(rspamd_inet_addr_t *addr); + +/** + * Apply the specified mask to an address (ignored for AF_UNIX) + * @param addr + * @param mask + */ +void rspamd_inet_address_apply_mask(rspamd_inet_addr_t *addr, guint mask); + +/** + * Compare a1 and a2 and return value >0, ==0 and <0 if a1 is more, equal or less than a2 correspondingly + * @param a1 + * @param a2 + * @return + */ +gint rspamd_inet_address_compare(const rspamd_inet_addr_t *a1, + const rspamd_inet_addr_t *a2, gboolean compare_ports); + +/** + * Utility function to compare addresses by in g_ptr_array + * @param a1 + * @param a2 + * @return + */ +gint rspamd_inet_address_compare_ptr(gconstpointer a1, + gconstpointer a2); + +/** + * Performs deep copy of rspamd inet addr + * @param addr + * @return + */ +rspamd_inet_addr_t *rspamd_inet_address_copy(const rspamd_inet_addr_t *addr, rspamd_mempool_t *pool); + +/** + * Returns hash for inet address (ignoring port) + */ +guint rspamd_inet_address_hash(gconstpointer a); + +guint rspamd_inet_address_port_hash(gconstpointer a); + +/** + * Returns true if two address are equal + */ +gboolean rspamd_inet_address_equal(gconstpointer a, gconstpointer b); + +gboolean rspamd_inet_address_port_equal(gconstpointer a, gconstpointer b); + +/** + * Returns TRUE if an address belongs to some local address + */ +gboolean rspamd_inet_address_is_local(const rspamd_inet_addr_t *addr); + +/** + * Returns size of storage required to store a complete IP address + * @return + */ +gsize rspamd_inet_address_storage_size(void); + +#ifdef __cplusplus +} +#endif + +#endif /* ADDR_H_ */ diff --git a/src/libutil/cxx/error.hxx b/src/libutil/cxx/error.hxx new file mode 100644 index 0000000..4689d42 --- /dev/null +++ b/src/libutil/cxx/error.hxx @@ -0,0 +1,161 @@ +/* + * Copyright 2024 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_ERROR_HXX +#define RSPAMD_ERROR_HXX +#pragma once + +#include "config.h" +#include <string> +#include <string_view> +#include <cstdint> +#include <optional> + +/*** + * This unit is used to represent Rspamd C++ errors in a way to interoperate + * with C code if needed and avoid allocations for static strings + */ +namespace rspamd::util { + +enum class error_category : std::uint8_t { + INFORMAL, + IMPORTANT, + CRITICAL +}; + +struct error { +public: + /** + * Construct from a static string, this string must live long enough to outlive this object + * @param msg + * @param code + * @param category + */ + error(const char *msg, int code, error_category category = error_category::INFORMAL) + : error_message(msg), error_code(code), category(category) + { + } + /** + * Construct error from a temporary string taking membership + * @param msg + * @param code + * @param category + */ + error(std::string &&msg, int code, error_category category = error_category::INFORMAL) + : error_code(code), category(category) + { + static_storage = std::move(msg); + error_message = static_storage.value(); + } + /** + * Construct error from another string copying it into own storage + * @param msg + * @param code + * @param category + */ + error(const std::string &msg, int code, error_category category = error_category::INFORMAL) + : error_code(code), category(category) + { + static_storage = msg; + error_message = static_storage.value(); + } + + error(const error &other) + : error_code(other.error_code), category(other.category) + { + if (other.static_storage) { + static_storage = other.static_storage; + error_message = static_storage.value(); + } + else { + error_message = other.error_message; + } + } + + error(error &&other) noexcept + { + *this = std::move(other); + } + + error &operator=(error &&other) noexcept + { + if (other.static_storage.has_value()) { + std::swap(static_storage, other.static_storage); + error_message = static_storage.value(); + } + else { + std::swap(error_message, other.error_message); + } + std::swap(other.error_code, error_code); + std::swap(other.category, category); + + return *this; + } + + /** + * Convert into GError + * @return + */ + auto into_g_error() const -> GError * + { + return g_error_new(g_quark_from_static_string("rspamd"), error_code, "%s", + error_message.data()); + } + + /** + * Convenience alias for the `into_g_error` + * @param err + */ + auto into_g_error_set(GError **err) const -> void + { + if (err && *err == nullptr) { + *err = into_g_error(); + } + } + + /** + * Convert into GError + * @return + */ + auto into_g_error(GQuark quark) const -> GError * + { + return g_error_new(quark, error_code, "%s", + error_message.data()); + } + + /** + * Convenience alias for the `into_g_error` + * @param err + */ + auto into_g_error_set(GQuark quark, GError **err) const -> void + { + if (err && *err == nullptr) { + *err = into_g_error(quark); + } + } + +public: + std::string_view error_message; + int error_code; + error_category category; + +private: + std::optional<std::string> static_storage; +}; + +}// namespace rspamd::util + +#endif//RSPAMD_ERROR_HXX diff --git a/src/libutil/cxx/file_util.cxx b/src/libutil/cxx/file_util.cxx new file mode 100644 index 0000000..2f031f0 --- /dev/null +++ b/src/libutil/cxx/file_util.cxx @@ -0,0 +1,457 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "file_util.hxx" +#include <fmt/core.h> +#include "libutil/util.h" +#include "libutil/unix-std.h" + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL + +#include "doctest/doctest.h" + +namespace rspamd::util { + +auto raii_file::open(const char *fname, int flags) -> tl::expected<raii_file, error> +{ + int oflags = flags; +#ifdef O_CLOEXEC + oflags |= O_CLOEXEC; +#endif + + if (fname == nullptr) { + return tl::make_unexpected(error{"cannot open file; filename is nullptr", EINVAL, error_category::CRITICAL}); + } + + auto fd = ::open(fname, oflags); + + if (fd == -1) { + return tl::make_unexpected(error{fmt::format("cannot open file {}: {}", fname, ::strerror(errno)), errno}); + } + + auto ret = raii_file{fname, fd, false}; + + if (fstat(ret.fd, &ret.st) == -1) { + return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", fname, ::strerror(errno)), errno}); + } + + return ret; +} + +auto raii_file::create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error> +{ + int oflags = flags | O_CREAT; +#ifdef O_CLOEXEC + oflags |= O_CLOEXEC; +#endif + + if (fname == nullptr) { + return tl::make_unexpected(error{"cannot create file; filename is nullptr", EINVAL, error_category::CRITICAL}); + } + + auto fd = ::open(fname, oflags, perms); + + if (fd == -1) { + return tl::make_unexpected(error{fmt::format("cannot create file {}: {}", fname, ::strerror(errno)), errno}); + } + + auto ret = raii_file{fname, fd, false}; + + if (fstat(ret.fd, &ret.st) == -1) { + return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", fname, ::strerror(errno)), errno}); + } + + return ret; +} + +auto raii_file::create_temp(const char *fname, int flags, int perms) -> tl::expected<raii_file, error> +{ + int oflags = flags; +#ifdef O_CLOEXEC + oflags |= O_CLOEXEC | O_CREAT | O_EXCL; +#endif + if (fname == nullptr) { + return tl::make_unexpected(error{"cannot open file; filename is nullptr", EINVAL, error_category::CRITICAL}); + } + + auto fd = ::open(fname, oflags, perms); + + if (fd == -1) { + return tl::make_unexpected(error{fmt::format("cannot create file {}: {}", fname, ::strerror(errno)), errno}); + } + + auto ret = raii_file{fname, fd, true}; + + if (fstat(ret.fd, &ret.st) == -1) { + return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", fname, ::strerror(errno)), errno}); + } + + return ret; +} + +auto raii_file::mkstemp(const char *pattern, int flags, int perms) -> tl::expected<raii_file, error> +{ + int oflags = flags; +#ifdef O_CLOEXEC + oflags |= O_CLOEXEC | O_CREAT | O_EXCL; +#endif + if (pattern == nullptr) { + return tl::make_unexpected(error{"cannot open file; pattern is nullptr", EINVAL, error_category::CRITICAL}); + } + + std::string mutable_pattern = pattern; + + auto fd = g_mkstemp_full(mutable_pattern.data(), oflags, perms); + + if (fd == -1) { + return tl::make_unexpected(error{fmt::format("cannot create file {}: {}", pattern, ::strerror(errno)), errno}); + } + + auto ret = raii_file{mutable_pattern.c_str(), fd, true}; + + if (fstat(ret.fd, &ret.st) == -1) { + return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", + mutable_pattern, ::strerror(errno)), + errno}); + } + + return ret; +} + +raii_file::~raii_file() noexcept +{ + if (fd != -1) { + if (temp) { + (void) unlink(fname.c_str()); + } + close(fd); + } +} + +auto raii_file::update_stat() noexcept -> bool +{ + return fstat(fd, &st) != -1; +} + +raii_file::raii_file(const char *fname, int fd, bool temp) + : fd(fd), temp(temp) +{ + std::size_t nsz; + + /* Normalize path */ + this->fname = fname; + rspamd_normalize_path_inplace(this->fname.data(), this->fname.size(), &nsz); + this->fname.resize(nsz); +} + + +raii_locked_file::~raii_locked_file() noexcept +{ + if (fd != -1) { + (void) rspamd_file_unlock(fd, FALSE); + } +} + +auto raii_locked_file::lock_raii_file(raii_file &&unlocked) -> tl::expected<raii_locked_file, error> +{ + if (!rspamd_file_lock(unlocked.get_fd(), TRUE)) { + return tl::make_unexpected( + error{fmt::format("cannot lock file {}: {}", unlocked.get_name(), ::strerror(errno)), errno}); + } + + return raii_locked_file{std::move(unlocked)}; +} + +auto raii_locked_file::unlock() -> raii_file +{ + if (fd != -1) { + (void) rspamd_file_unlock(fd, FALSE); + } + + return raii_file{static_cast<raii_file &&>(std::move(*this))}; +} + +raii_mmaped_file::raii_mmaped_file(raii_file &&file, void *map, std::size_t sz) + : file(std::move(file)), map(map), map_size(sz) +{ +} + +auto raii_mmaped_file::mmap_shared(raii_file &&file, + int flags, std::int64_t offset) -> tl::expected<raii_mmaped_file, error> +{ + void *map; + + if (file.get_stat().st_size < offset || offset < 0) { + return tl::make_unexpected(error{ + fmt::format("cannot mmap file {} due to incorrect offset; offset={}, size={}", + file.get_name(), offset, file.get_size()), + EINVAL}); + } + /* Update stat on file to ensure it is up-to-date */ + file.update_stat(); + map = mmap(nullptr, (std::size_t)(file.get_size() - offset), flags, MAP_SHARED, file.get_fd(), offset); + + if (map == MAP_FAILED) { + return tl::make_unexpected(error{fmt::format("cannot mmap file {}: {}", + file.get_name(), ::strerror(errno)), + errno}); + } + + return raii_mmaped_file{std::move(file), map, (std::size_t)(file.get_size() - offset)}; +} + +auto raii_mmaped_file::mmap_shared(const char *fname, int open_flags, + int mmap_flags, std::int64_t offset) -> tl::expected<raii_mmaped_file, error> +{ + auto file = raii_file::open(fname, open_flags); + + if (!file.has_value()) { + return tl::make_unexpected(file.error()); + } + + return raii_mmaped_file::mmap_shared(std::move(file.value()), mmap_flags, offset); +} + +raii_mmaped_file::~raii_mmaped_file() +{ + if (map != nullptr) { + munmap(map, map_size); + } +} + +raii_mmaped_file::raii_mmaped_file(raii_mmaped_file &&other) noexcept + : file(std::move(other.file)) +{ + std::swap(map, other.map); + std::swap(map_size, other.map_size); +} + +auto raii_file_sink::create(const char *fname, int flags, int perms, + const char *suffix) -> tl::expected<raii_file_sink, error> +{ + if (!fname || !suffix) { + return tl::make_unexpected(error{"cannot create file; filename is nullptr", EINVAL, error_category::CRITICAL}); + } + + auto tmp_fname = fmt::format("{}.{}", fname, suffix); + auto file = raii_locked_file::create(tmp_fname.c_str(), flags, perms); + + if (!file.has_value()) { + return tl::make_unexpected(file.error()); + } + + return raii_file_sink{std::move(file.value()), fname, std::move(tmp_fname)}; +} + +auto raii_file_sink::write_output() -> bool +{ + if (success) { + /* We cannot write output twice */ + return false; + } + + if (rename(tmp_fname.c_str(), output_fname.c_str()) == -1) { + return false; + } + + success = true; + + return true; +} + +raii_file_sink::~raii_file_sink() +{ + if (!success) { + /* Unlink sink */ + unlink(tmp_fname.c_str()); + } +} + +raii_file_sink::raii_file_sink(raii_locked_file &&_file, const char *_output, std::string &&_tmp_fname) + : file(std::move(_file)), output_fname(_output), tmp_fname(std::move(_tmp_fname)), success(false) +{ +} + +raii_file_sink::raii_file_sink(raii_file_sink &&other) noexcept + : file(std::move(other.file)), + output_fname(std::move(other.output_fname)), + tmp_fname(std::move(other.tmp_fname)), + success(other.success) +{ +} + +namespace tests { +template<class T> +static auto test_read_file(const T &f) +{ + auto fd = f.get_fd(); + (void) ::lseek(fd, 0, SEEK_SET); + std::string buf('\0', (std::size_t) f.get_size()); + ::read(fd, buf.data(), buf.size()); + return buf; +} +template<class T> +static auto test_write_file(const T &f, const std::string_view &buf) +{ + auto fd = f.get_fd(); + (void) ::lseek(fd, 0, SEEK_SET); + return ::write(fd, buf.data(), buf.size()); +} +auto random_fname(std::string_view extension) +{ + const auto *tmpdir = getenv("TMPDIR"); + if (tmpdir == nullptr) { + tmpdir = G_DIR_SEPARATOR_S "tmp"; + } + + std::string out_fname{tmpdir}; + out_fname += G_DIR_SEPARATOR_S; + + char hexbuf[32]; + rspamd_random_hex(hexbuf, sizeof(hexbuf)); + out_fname.append((const char *) hexbuf, sizeof(hexbuf)); + if (!extension.empty()) { + out_fname.append("."); + out_fname.append(extension); + } + + return out_fname; +} +TEST_SUITE("loked files utils") +{ + + TEST_CASE("create and delete file") + { + auto fname = random_fname("tmp"); + { + auto raii_locked_file = raii_locked_file::create_temp(fname.c_str(), O_RDONLY, 00600); + CHECK(raii_locked_file.has_value()); + CHECK(raii_locked_file.value().get_extension() == "tmp"); + CHECK(::access(fname.c_str(), R_OK) == 0); + } + // File must be deleted after this call + auto ret = ::access(fname.c_str(), R_OK); + auto serrno = errno; + CHECK(ret == -1); + CHECK(serrno == ENOENT); + // Create one more time + { + auto raii_locked_file = raii_locked_file::create_temp(fname.c_str(), O_RDONLY, 00600); + CHECK(raii_locked_file.has_value()); + CHECK(::access(fname.c_str(), R_OK) == 0); + } + ret = ::access(fname.c_str(), R_OK); + serrno = errno; + CHECK(ret == -1); + CHECK(serrno == ENOENT); + } + + TEST_CASE("check lock") + { + auto fname = random_fname(""); + { + auto raii_locked_file = raii_locked_file::create_temp(fname.c_str(), O_RDONLY, 00600); + CHECK(raii_locked_file.has_value()); + CHECK(raii_locked_file.value().get_extension() == ""); + CHECK(::access(fname.c_str(), R_OK) == 0); + auto raii_locked_file2 = raii_locked_file::open(fname.c_str(), O_RDONLY); + CHECK(!raii_locked_file2.has_value()); + CHECK(::access(fname.c_str(), R_OK) == 0); + } + // File must be deleted after this call + auto ret = ::access(fname.c_str(), R_OK); + auto serrno = errno; + CHECK(ret == -1); + CHECK(serrno == ENOENT); + } + + auto get_tmpdir()->std::string + { + const auto *tmpdir = getenv("TMPDIR"); + if (tmpdir == nullptr) { + tmpdir = G_DIR_SEPARATOR_S "tmp"; + } + + std::size_t sz; + std::string mut_fname = tmpdir; + rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz); + mut_fname.resize(sz); + + if (!mut_fname.ends_with(G_DIR_SEPARATOR)) { + mut_fname += G_DIR_SEPARATOR; + } + + return mut_fname; + } + + TEST_CASE("tempfile") + { + std::string tmpname; + const std::string tmpdir{get_tmpdir()}; + { + auto raii_locked_file = raii_locked_file::mkstemp(std::string(tmpdir + G_DIR_SEPARATOR_S + "doctest-XXXXXXXX").c_str(), + O_RDONLY, 00600); + CHECK(raii_locked_file.has_value()); + CHECK(raii_locked_file.value().get_dir() == tmpdir); + CHECK(access(raii_locked_file.value().get_name().data(), R_OK) == 0); + auto raii_locked_file2 = raii_locked_file::open(raii_locked_file.value().get_name().data(), O_RDONLY); + CHECK(!raii_locked_file2.has_value()); + CHECK(access(raii_locked_file.value().get_name().data(), R_OK) == 0); + tmpname = raii_locked_file.value().get_name(); + } + // File must be deleted after this call + auto ret = ::access(tmpname.c_str(), R_OK); + auto serrno = errno; + CHECK(ret == -1); + CHECK(serrno == ENOENT); + } + + TEST_CASE("mmap") + { + std::string tmpname; + const std::string tmpdir{get_tmpdir()}; + { + auto raii_file = raii_file::mkstemp(std::string(tmpdir + G_DIR_SEPARATOR_S + "doctest-XXXXXXXX").c_str(), + O_RDWR | O_CREAT | O_EXCL, 00600); + CHECK(raii_file.has_value()); + CHECK(raii_file->get_dir() == tmpdir); + CHECK(access(raii_file->get_name().data(), R_OK) == 0); + tmpname = std::string{raii_file->get_name()}; + char payload[] = {'1', '2', '3'}; + CHECK(write(raii_file->get_fd(), payload, sizeof(payload)) == sizeof(payload)); + auto mmapped_file1 = raii_mmaped_file::mmap_shared(std::move(raii_file.value()), PROT_READ | PROT_WRITE); + CHECK(mmapped_file1.has_value()); + CHECK(!raii_file->is_valid()); + CHECK(mmapped_file1->get_size() == sizeof(payload)); + CHECK(memcmp(mmapped_file1->get_map(), payload, sizeof(payload)) == 0); + *(char *) mmapped_file1->get_map() = '2'; + auto mmapped_file2 = raii_mmaped_file::mmap_shared(tmpname.c_str(), O_RDONLY, PROT_READ); + CHECK(mmapped_file2.has_value()); + CHECK(mmapped_file2->get_size() == sizeof(payload)); + CHECK(memcmp(mmapped_file2->get_map(), payload, sizeof(payload)) != 0); + CHECK(memcmp(mmapped_file2->get_map(), mmapped_file1->get_map(), sizeof(payload)) == 0); + } + // File must be deleted after this call + auto ret = ::access(tmpname.c_str(), R_OK); + auto serrno = errno; + CHECK(ret == -1); + CHECK(serrno == ENOENT); + } + +}// TEST_SUITE + +}// namespace tests + +}// namespace rspamd::util diff --git a/src/libutil/cxx/file_util.hxx b/src/libutil/cxx/file_util.hxx new file mode 100644 index 0000000..4528905 --- /dev/null +++ b/src/libutil/cxx/file_util.hxx @@ -0,0 +1,312 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_FILE_UTIL_HXX +#define RSPAMD_FILE_UTIL_HXX +#pragma once + +#include "config.h" +#include "contrib/expected/expected.hpp" +#include "libutil/cxx/error.hxx" +#include <string> +#include <sys/stat.h> + +namespace rspamd::util { +/** + * A simple RAII object to contain a move only file descriptor + * A file is unlocked and closed when not needed + */ +struct raii_file { +public: + virtual ~raii_file() noexcept; + + static auto open(const char *fname, int flags) -> tl::expected<raii_file, error>; + static auto open(const std::string &fname, int flags) -> tl::expected<raii_file, error> + { + return open(fname.c_str(), flags); + }; + static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>; + static auto create(const std::string &fname, int flags, int perms) -> tl::expected<raii_file, error> + { + return create(fname.c_str(), flags, perms); + }; + + static auto create_temp(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>; + static auto mkstemp(const char *pattern, int flags, int perms) -> tl::expected<raii_file, error>; + + auto get_fd() const -> int + { + return fd; + } + + auto get_stat() const -> const struct stat & + { + return st; + }; + + auto get_size() const -> std::size_t + { + return st.st_size; + }; + + auto get_name() const -> std::string_view + { + return std::string_view{fname}; + } + + auto get_dir() const -> std::string_view + { + auto sep_pos = fname.rfind(G_DIR_SEPARATOR); + + if (sep_pos == std::string::npos) { + return std::string_view{fname}; + } + + while (sep_pos >= 1 && fname[sep_pos - 1] == G_DIR_SEPARATOR) { + sep_pos--; + } + + return std::string_view{fname.c_str(), sep_pos + 1}; + } + + auto get_extension() const -> std::string_view + { + auto sep_pos = fname.rfind(G_DIR_SEPARATOR); + + if (sep_pos == std::string::npos) { + sep_pos = 0; + } + + auto filename = std::string_view{fname.c_str() + sep_pos}; + auto dot_pos = filename.find('.'); + + if (dot_pos == std::string::npos) { + return std::string_view{}; + } + else { + return std::string_view{filename.data() + dot_pos + 1, filename.size() - dot_pos - 1}; + } + } + + raii_file &operator=(raii_file &&other) noexcept + { + std::swap(fd, other.fd); + std::swap(temp, other.temp); + std::swap(fname, other.fname); + std::swap(st, other.st); + + return *this; + } + + raii_file(raii_file &&other) noexcept + { + *this = std::move(other); + } + + /** + * Prevent file from being deleted + * @return + */ + auto make_immortal() noexcept + { + temp = false; + } + + /** + * Performs fstat on an opened file to refresh internal stat + * @return + */ + auto update_stat() noexcept -> bool; + + auto is_valid() noexcept -> bool + { + return fd != -1; + } + + /* Do not allow copy/default ctor */ + const raii_file &operator=(const raii_file &other) = delete; + raii_file() = delete; + raii_file(const raii_file &other) = delete; + +protected: + int fd = -1; + bool temp; + std::string fname; + struct stat st; + + explicit raii_file(const char *fname, int fd, bool temp); +}; +/** + * A simple RAII object to contain a file descriptor with an flock wrap + * A file is unlocked and closed when not needed + */ +struct raii_locked_file final : public raii_file { +public: + ~raii_locked_file() noexcept override; + + static auto open(const char *fname, int flags) -> tl::expected<raii_locked_file, error> + { + auto locked = raii_file::open(fname, flags).and_then([]<class T>(T &&file) { + return lock_raii_file(std::forward<T>(file)); + }); + + return locked; + } + static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_locked_file, error> + { + auto locked = raii_file::create(fname, flags, perms).and_then([]<class T>(T &&file) { + return lock_raii_file(std::forward<T>(file)); + }); + + return locked; + } + static auto create_temp(const char *fname, int flags, int perms) -> tl::expected<raii_locked_file, error> + { + auto locked = raii_file::create_temp(fname, flags, perms).and_then([]<class T>(T &&file) { + return lock_raii_file(std::forward<T>(file)); + }); + + return locked; + } + static auto mkstemp(const char *pattern, int flags, int perms) -> tl::expected<raii_locked_file, error> + { + auto locked = raii_file::mkstemp(pattern, flags, perms).and_then([]<class T>(T &&file) { + return lock_raii_file(std::forward<T>(file)); + }); + + return locked; + } + + raii_locked_file &operator=(raii_locked_file &&other) noexcept + { + std::swap(fd, other.fd); + std::swap(temp, other.temp); + std::swap(fname, other.fname); + std::swap(st, other.st); + + return *this; + } + + /** + * Unlock a locked file and return back unlocked file transferring ownership. + * A locked file cannot be used after this method. + */ + auto unlock() -> raii_file; + + raii_locked_file(raii_locked_file &&other) noexcept + : raii_file(static_cast<raii_file &&>(std::move(other))) + { + } + /* Do not allow copy/default ctor */ + const raii_locked_file &operator=(const raii_locked_file &other) = delete; + raii_locked_file() = delete; + raii_locked_file(const raii_locked_file &other) = delete; + +private: + static auto lock_raii_file(raii_file &&unlocked) -> tl::expected<raii_locked_file, error>; + raii_locked_file(raii_file &&other) noexcept + : raii_file(std::move(other)) + { + } + explicit raii_locked_file(const char *fname, int fd, bool temp) + : raii_file(fname, fd, temp) + { + } +}; + +/** + * A mmap wrapper on top of a locked file + */ +struct raii_mmaped_file final { + ~raii_mmaped_file(); + static auto mmap_shared(raii_file &&file, int flags, std::int64_t offset = 0) -> tl::expected<raii_mmaped_file, error>; + static auto mmap_shared(const char *fname, int open_flags, int mmap_flags, std::int64_t offset = 0) -> tl::expected<raii_mmaped_file, error>; + // Returns a constant pointer to the underlying map + auto get_map() const -> void * + { + return map; + } + auto get_file() const -> const raii_file & + { + return file; + } + // Passes the ownership of the mmaped memory to the callee + auto steal_map() -> std::tuple<void *, std::size_t> + { + auto ret = std::make_tuple(this->map, map_size); + this->map = nullptr; + return ret; + } + + auto get_size() const -> std::size_t + { + return file.get_stat().st_size; + } + + raii_mmaped_file &operator=(raii_mmaped_file &&other) noexcept + { + std::swap(map, other.map); + std::swap(map_size, other.map_size); + file = std::move(other.file); + + return *this; + } + + raii_mmaped_file(raii_mmaped_file &&other) noexcept; + + /* Do not allow copy/default ctor */ + const raii_mmaped_file &operator=(const raii_mmaped_file &other) = delete; + raii_mmaped_file() = delete; + raii_mmaped_file(const raii_mmaped_file &other) = delete; + +private: + /* Is intended to be used with map_shared */ + explicit raii_mmaped_file(raii_file &&_file, void *_map, std::size_t sz); + raii_file file; + void *map = nullptr; + std::size_t map_size; +}; + +/** + * A helper to have a file to write that will be renamed to the + * target file if successful or deleted in the case of failure + */ +struct raii_file_sink final { + static auto create(const char *fname, int flags, int perms, const char *suffix = "new") + -> tl::expected<raii_file_sink, error>; + auto write_output() -> bool; + ~raii_file_sink(); + auto get_fd() const -> int + { + return file.get_fd(); + } + + raii_file_sink(raii_file_sink &&other) noexcept; + /* Do not allow copy/default ctor */ + const raii_file_sink &operator=(const raii_file_sink &other) = delete; + raii_file_sink() = delete; + raii_file_sink(const raii_file_sink &other) = delete; + +private: + explicit raii_file_sink(raii_locked_file &&_file, const char *_output, std::string &&_tmp_fname); + raii_locked_file file; + std::string output_fname; + std::string tmp_fname; + bool success; +}; + +}// namespace rspamd::util + +#endif//RSPAMD_FILE_UTIL_HXX diff --git a/src/libutil/cxx/hash_util.hxx b/src/libutil/cxx/hash_util.hxx new file mode 100644 index 0000000..05f3d97 --- /dev/null +++ b/src/libutil/cxx/hash_util.hxx @@ -0,0 +1,109 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_HASH_UTIL_HXX +#define RSPAMD_HASH_UTIL_HXX + +#pragma once + +#include <string_view> +#include <string> +#include "contrib/ankerl/unordered_dense.h" + + +namespace rspamd { +/* + * Transparent smart pointers hashing + */ +template<typename T> +struct smart_ptr_equal { + using is_transparent = void; /* We want to find values in a set of shared_ptr by reference */ + auto operator()(const std::shared_ptr<T> &a, const std::shared_ptr<T> &b) const + { + return (*a) == (*b); + } + auto operator()(const std::shared_ptr<T> &a, const T &b) const + { + return (*a) == b; + } + auto operator()(const T &a, const std::shared_ptr<T> &b) const + { + return a == (*b); + } + auto operator()(const std::unique_ptr<T> &a, const std::unique_ptr<T> &b) const + { + return (*a) == (*b); + } + auto operator()(const std::unique_ptr<T> &a, const T &b) const + { + return (*a) == b; + } + auto operator()(const T &a, const std::unique_ptr<T> &b) const + { + return a == (*b); + } +}; + +template<typename T> +struct smart_ptr_hash { + using is_transparent = void; /* We want to find values in a set of shared_ptr by reference */ + using is_avalanching = void; + auto operator()(const std::shared_ptr<T> &a) const + { + return std::hash<T>()(*a); + } + auto operator()(const std::unique_ptr<T> &a) const + { + return std::hash<T>()(*a); + } + auto operator()(const T &a) const + { + return std::hash<T>()(a); + } +}; + +/* Enable lookup by string view */ +struct smart_str_equal { + using is_transparent = void; + auto operator()(const std::string &a, const std::string &b) const + { + return a == b; + } + auto operator()(const std::string_view &a, const std::string &b) const + { + return a == b; + } + auto operator()(const std::string &a, const std::string_view &b) const + { + return a == b; + } +}; + +struct smart_str_hash { + using is_transparent = void; + using is_avalanching = void; + auto operator()(const std::string &a) const + { + return ankerl::unordered_dense::hash<std::string>()(a); + } + auto operator()(const std::string_view &a) const + { + return ankerl::unordered_dense::hash<std::string_view>()(a); + } +}; + +}// namespace rspamd + +#endif//RSPAMD_HASH_UTIL_HXX diff --git a/src/libutil/cxx/local_shared_ptr.hxx b/src/libutil/cxx/local_shared_ptr.hxx new file mode 100644 index 0000000..78ed5ba --- /dev/null +++ b/src/libutil/cxx/local_shared_ptr.hxx @@ -0,0 +1,440 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_LOCAL_SHARED_PTR_HXX +#define RSPAMD_LOCAL_SHARED_PTR_HXX + +#pragma once + +#include <memory> +#include <algorithm> // for std::swap +#include <cstddef> // for std::size_t +#include <functional>// for std::less + +/* + * Smart pointers with no atomic refcounts to speed up Rspamd which is + * apparently single threaded + */ +namespace rspamd { + +namespace detail { + +class ref_cnt { +public: + using refcount_t = int; + + constexpr auto add_shared() -> refcount_t + { + return ++ref_shared; + } + constexpr auto add_weak() -> refcount_t + { + return ++ref_weak; + } + constexpr auto release_shared() -> refcount_t + { + return --ref_shared; + } + constexpr auto release_weak() -> refcount_t + { + return --ref_weak; + } + constexpr auto shared_count() const -> refcount_t + { + return ref_shared; + } + constexpr auto weak_count() const -> refcount_t + { + return ref_weak; + } + virtual ~ref_cnt() + { + } + virtual void dispose() = 0; + +private: + refcount_t ref_weak = 0; + refcount_t ref_shared = 1; +}; + +template<class T> +class obj_and_refcnt : public ref_cnt { +private: + typedef typename std::aligned_storage<sizeof(T), std::alignment_of<T>::value>::type storage_type; + storage_type storage; + bool initialized; + virtual void dispose() override + { + if (initialized) { + T *p = reinterpret_cast<T *>(&storage); + p->~T(); + initialized = false; + } + } + +public: + template<typename... Args> + explicit obj_and_refcnt(Args &&...args) + : initialized(true) + { + new (&storage) T(std::forward<Args>(args)...); + } + auto get(void) -> T * + { + if (initialized) { + return reinterpret_cast<T *>(&storage); + } + + return nullptr; + } + virtual ~obj_and_refcnt() = default; +}; + +template<class T, class D = typename std::default_delete<T>> +class ptr_and_refcnt : public ref_cnt { +private: + T *ptr; + D deleter; + virtual void dispose() override + { + deleter(ptr); + ptr = nullptr; + } + +public: + explicit ptr_and_refcnt(T *_ptr, D &&d = std::default_delete<T>()) + : ptr(_ptr), + deleter(std::move(d)) + { + } + virtual ~ptr_and_refcnt() = default; +}; + +}// namespace detail + +template<class T> +class local_weak_ptr; + +template<class T> +class local_shared_ptr { +public: + typedef T element_type; + typedef local_weak_ptr<T> weak_type; + + // Simplified comparing to libc++, no custom deleter and no rebind here + // constructors: + constexpr local_shared_ptr() noexcept + : px(nullptr), cnt(nullptr) + { + } + + template<class Y, typename std::enable_if< + std::is_convertible<Y *, element_type *>::value, bool>::type = true> + explicit local_shared_ptr(Y *p) + : px(p), cnt(new detail::ptr_and_refcnt(p)) + { + } + + // custom deleter + template<class Y, class D, typename std::enable_if<std::is_convertible<Y *, element_type *>::value, bool>::type = true> + explicit local_shared_ptr(Y *p, D &&d) + : px(p), cnt(new detail::ptr_and_refcnt<Y, D>(p, std::forward<D>(d))) + { + } + + local_shared_ptr(const local_shared_ptr &r) noexcept + : px(r.px), cnt(r.cnt) + { + if (cnt) { + cnt->add_shared(); + } + } + local_shared_ptr(local_shared_ptr &&r) noexcept + : px(r.px), cnt(r.cnt) + { + r.px = nullptr; + r.cnt = nullptr; + } + template<class Y> + explicit local_shared_ptr(const local_weak_ptr<Y> &r) + : px(r.px), cnt(r.cnt) + { + if (cnt) { + cnt->add_shared(); + } + } + local_shared_ptr(std::nullptr_t) + : local_shared_ptr() + { + } + + ~local_shared_ptr() + { + if (cnt) { + if (cnt->release_shared() <= 0) { + cnt->dispose(); + + if (cnt->weak_count() == 0) { + delete cnt; + } + } + } + } + + // assignment: + local_shared_ptr &operator=(const local_shared_ptr &r) noexcept + { + local_shared_ptr(r).swap(*this); + return *this; + } + local_shared_ptr &operator=(local_shared_ptr &&r) noexcept + { + local_shared_ptr(std::move(r)).swap(*this); + return *this; + } + + // Mutators + void swap(local_shared_ptr &r) noexcept + { + std::swap(this->cnt, r.cnt); + std::swap(this->px, r.px); + } + void reset() noexcept + { + local_shared_ptr().swap(*this); + } + + // Observers: + T *get() const noexcept + { + return px; + } + + T &operator*() const noexcept + { + return *px; + } + T *operator->() const noexcept + { + return px; + } + long use_count() const noexcept + { + if (cnt) { + return cnt->shared_count(); + } + + return 0; + } + bool unique() const noexcept + { + return use_count() == 1; + } + + explicit operator bool() const noexcept + { + return px != nullptr; + } + + template<class Y, typename std::enable_if< + std::is_convertible<Y *, element_type *>::value, bool>::type = true> + auto operator==(const local_shared_ptr<Y> &other) const -> bool + { + return px == other.px; + } + + template<class Y, typename std::enable_if< + std::is_convertible<Y *, element_type *>::value, bool>::type = true> + auto operator<(const local_shared_ptr<Y> &other) const -> auto + { + return *px < *other.px; + } + +private: + T *px;// contained pointer + detail::ref_cnt *cnt; + + template<class _T, class... Args> + friend local_shared_ptr<_T> local_make_shared(Args &&...args); + friend class local_weak_ptr<T>; +}; + +template<class T, class... Args> +local_shared_ptr<T> local_make_shared(Args &&...args) +{ + local_shared_ptr<T> ptr; + auto tmp_object = new detail::obj_and_refcnt<T>(std::forward<Args>(args)...); + ptr.px = tmp_object->get(); + ptr.cnt = tmp_object; + + return ptr; +} + +template<class T> +class local_weak_ptr { +public: + typedef T element_type; + + // constructors + constexpr local_weak_ptr() noexcept + : px(nullptr), cnt(nullptr) + { + } + template<class Y, typename std::enable_if< + std::is_convertible<Y *, element_type *>::value, bool>::type = true> + local_weak_ptr(local_shared_ptr<Y> const &r) noexcept + : px(r.px), cnt(r.cnt) + { + if (cnt) { + cnt->add_weak(); + } + } + + local_weak_ptr(local_weak_ptr const &r) noexcept + : px(r.px), cnt(r.cnt) + { + if (cnt) { + cnt->add_weak(); + } + } + local_weak_ptr(local_weak_ptr &&r) noexcept + : px(r.px), cnt(r.cnt) + { + r.px = nullptr; + r.cnt = nullptr; + } + + ~local_weak_ptr() + { + if (cnt) { + if (cnt->release_weak() <= 0 && cnt->shared_count() == 0) { + delete cnt; + } + } + } + + // assignment + local_weak_ptr &operator=(local_weak_ptr const &r) noexcept + { + local_weak_ptr(r).swap(*this); + return *this; + } + local_weak_ptr &operator=(local_shared_ptr<T> const &r) noexcept + { + local_weak_ptr(r).swap(*this); + return *this; + } + + template<class Y, typename std::enable_if< + std::is_convertible<Y *, element_type *>::value, bool>::type = true> + local_weak_ptr &operator=(local_weak_ptr<Y> const &r) noexcept + { + local_weak_ptr(r).swap(*this); + return *this; + } + local_weak_ptr &operator=(local_weak_ptr &&r) noexcept + { + local_weak_ptr(std::move(r)).swap(*this); + return *this; + } + + // modifiers + void swap(local_weak_ptr &r) noexcept + { + std::swap(this->cnt, r.cnt); + std::swap(this->px, r.px); + } + void reset() noexcept + { + local_weak_ptr().swap(*this); + } + + // observers + long use_count() const noexcept + { + if (cnt) { + return cnt->shared_count(); + } + return 0; + } + bool expired() const noexcept + { + if (cnt) { + return cnt->shared_count() == 0; + } + + return true; + } + + local_shared_ptr<T> lock() const noexcept + { + local_shared_ptr<T> tmp; + tmp.cnt = cnt; + + if (cnt) { + cnt->add_shared(); + tmp.px = px; + } + + return tmp; + } + +private: + element_type *px; + detail::ref_cnt *cnt; +}; + + +}// namespace rspamd + +/* Hashing stuff */ +namespace std { +template<class T> +struct hash<rspamd::local_shared_ptr<T>> { + inline auto operator()(const rspamd::local_shared_ptr<T> &p) const -> auto + { + if (!p) { + throw std::logic_error("no hash for dangling pointer"); + } + return hash<T>()(*p.get()); + } +}; +template<class T> +struct hash<rspamd::local_weak_ptr<T>> { + inline auto operator()(const rspamd::local_weak_ptr<T> &p) const -> auto + { + if (!p) { + throw std::logic_error("no hash for dangling pointer"); + } + return hash<T>()(*p.get()); + } +}; + +template<class T> +inline void swap(rspamd::local_shared_ptr<T> &x, rspamd::local_shared_ptr<T> &y) noexcept +{ + x.swap(y); +} + +template<class T> +inline void swap(rspamd::local_weak_ptr<T> &x, rspamd::local_weak_ptr<T> &y) noexcept +{ + x.swap(y); +} + +}// namespace std + +#endif//RSPAMD_LOCAL_SHARED_PTR_HXX diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx new file mode 100644 index 0000000..5fc83ca --- /dev/null +++ b/src/libutil/cxx/utf8_util.cxx @@ -0,0 +1,421 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define U_CHARSET_IS_UTF8 1 +#include <unicode/utypes.h> +#include <unicode/utf8.h> +#include <unicode/uchar.h> +#include <unicode/normalizer2.h> +#include <unicode/schriter.h> +#include <unicode/coll.h> +#include <unicode/translit.h> +#include <utility> +#include <tuple> +#include <string> +#include <limits> +#include <memory> + +#include "utf8_util.h" +#include "str_util.h" + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +const char * +rspamd_string_unicode_trim_inplace(const char *str, size_t *len) +{ + const auto *p = str, *end = str + *len; + auto i = 0; + + while (i < *len) { + UChar32 uc; + auto prev_i = i; + + U8_NEXT(p, i, *len, uc); + + if (!u_isUWhiteSpace(uc) && !IS_ZERO_WIDTH_SPACE(uc)) { + i = prev_i; + break; + } + } + + p += i; + (*len) -= i; + i = end - p; + auto *ret = p; + + if (i > 0) { + + while (i > 0) { + UChar32 uc; + auto prev_i = i; + + U8_PREV(p, 0, i, uc); + + if (!u_isUWhiteSpace(uc) && !IS_ZERO_WIDTH_SPACE(uc)) { + i = prev_i; + break; + } + } + + *len = i; + } + + return ret; +} + +enum rspamd_utf8_normalise_result +rspamd_normalise_unicode_inplace(char *start, size_t *len) +{ + UErrorCode uc_err = U_ZERO_ERROR; + const auto *nfkc_norm = icu::Normalizer2::getNFKCInstance(uc_err); + static icu::UnicodeSet zw_spaces{}; + + if (!zw_spaces.isFrozen()) { + /* Add zw spaces to the set */ + zw_spaces.add(0x200B); + /* TODO: ZW non joiner, it might be used for ligatures, so it should possibly be excluded as well */ + zw_spaces.add(0x200C); + /* See github issue #4290 for explanation. It seems that the ZWJ has many legit use cases */ + //zw_spaces.add(0x200D); + zw_spaces.add(0xFEF); + zw_spaces.add(0x00AD); + zw_spaces.freeze(); + } + + int ret = RSPAMD_UNICODE_NORM_NORMAL; + + g_assert(U_SUCCESS(uc_err)); + + auto uc_string = icu::UnicodeString::fromUTF8(icu::StringPiece(start, *len)); + auto is_normal = nfkc_norm->quickCheck(uc_string, uc_err); + + if (!U_SUCCESS(uc_err)) { + return RSPAMD_UNICODE_NORM_ERROR; + } + + /* Filter zero width spaces and push resulting string back */ + const auto filter_zw_spaces_and_push_back = [&](const icu::UnicodeString &input) -> size_t { + icu::StringCharacterIterator it{input}; + size_t i = 0; + + while (it.hasNext()) { + /* libicu is very 'special' if it comes to 'safe' macro */ + if (i >= *len) { + ret |= RSPAMD_UNICODE_NORM_ERROR; + break; + } + + auto uc = it.next32PostInc(); + + if (zw_spaces.contains(uc)) { + ret |= RSPAMD_UNICODE_NORM_ZERO_SPACES; + } + else { + UBool err = 0; + + if (uc == 0xFFFD) { + ret |= RSPAMD_UNICODE_NORM_UNNORMAL; + } + U8_APPEND((uint8_t *) start, i, *len, uc, err); + + if (err) { + ret |= RSPAMD_UNICODE_NORM_ERROR; + break; + } + } + } + + return i; + }; + + if (is_normal != UNORM_YES) { + /* Need to normalise */ + ret |= RSPAMD_UNICODE_NORM_UNNORMAL; + + auto normalised = nfkc_norm->normalize(uc_string, uc_err); + + if (!U_SUCCESS(uc_err)) { + return RSPAMD_UNICODE_NORM_ERROR; + } + + *len = filter_zw_spaces_and_push_back(normalised); + } + else { + *len = filter_zw_spaces_and_push_back(uc_string); + } + + return static_cast<enum rspamd_utf8_normalise_result>(ret); +} + +gchar * +rspamd_utf8_transliterate(const gchar *start, gsize len, gsize *target_len) +{ + UErrorCode uc_err = U_ZERO_ERROR; + + static std::unique_ptr<icu::Transliterator> transliterator; + + if (!transliterator) { + UParseError parse_err; + static const auto rules = icu::UnicodeString{":: Any-Latin;" + ":: [:Nonspacing Mark:] Remove;" + ":: [:Punctuation:] Remove;" + ":: [:Symbol:] Remove;" + ":: [:Format:] Remove;" + ":: Latin-ASCII;" + ":: Lower();" + ":: NULL;" + "[:Space Separator:] > ' '"}; + transliterator = std::unique_ptr<icu::Transliterator>( + icu::Transliterator::createFromRules("RspamdTranslit", rules, UTRANS_FORWARD, parse_err, uc_err)); + + if (U_FAILURE(uc_err) || !transliterator) { + auto context = icu::UnicodeString(parse_err.postContext, sizeof(parse_err.preContext) / sizeof(UChar)); + g_error("fatal error: cannot init libicu transliteration engine: %s, line: %d, offset: %d", + u_errorName(uc_err), parse_err.line, parse_err.offset); + abort(); + } + } + + auto uc_string = icu::UnicodeString::fromUTF8(icu::StringPiece(start, len)); + transliterator->transliterate(uc_string); + + // We assume that all characters are now ascii + auto dest_len = uc_string.length(); + gchar *dest = (gchar *) g_malloc(dest_len + 1); + auto sink = icu::CheckedArrayByteSink(dest, dest_len); + uc_string.toUTF8(sink); + + *target_len = sink.NumberOfBytesWritten(); + dest[*target_len] = '\0'; + + return dest; +} + +struct rspamd_icu_collate_storage { + icu::Collator *collator = nullptr; + rspamd_icu_collate_storage() + { + UErrorCode uc_err = U_ZERO_ERROR; + collator = icu::Collator::createInstance(icu::Locale::getEnglish(), uc_err); + + if (U_FAILURE(uc_err) || collator == nullptr) { + g_error("fatal error: cannot init libicu collation engine: %s", + u_errorName(uc_err)); + abort(); + } + /* Ignore all difference except functional */ + collator->setStrength(icu::Collator::PRIMARY); + } + + ~rspamd_icu_collate_storage() + { + if (collator) { + delete collator; + } + } +}; + +static rspamd_icu_collate_storage collate_storage; + +int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2) +{ + if (n1 >= std::numeric_limits<int>::max() || n2 >= std::numeric_limits<int>::max()) { + /* + * It's hard to say what to do here... But libicu wants int, so we fall + * back to g_ascii_strcasecmp which can deal with size_t + */ + if (n1 == n2) { + return g_ascii_strncasecmp(s1, s2, n1); + } + else { + return n1 - n2; + } + } + + UErrorCode success = U_ZERO_ERROR; + auto res = collate_storage.collator->compareUTF8({s1, (int) n1}, {s2, (int) n2}, + success); + + switch (res) { + case UCOL_EQUAL: + return 0; + case UCOL_GREATER: + return 1; + case UCOL_LESS: + default: + return -1; + } +} + +int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n) +{ + return rspamd_utf8_strcmp_sizes(s1, n, s2, n); +} + +TEST_SUITE("utf8 utils") +{ + TEST_CASE("utf8 normalise") + { + std::tuple<const char *, const char *, int> cases[] = { + {"abc", "abc", RSPAMD_UNICODE_NORM_NORMAL}, + {"тест", "тест", RSPAMD_UNICODE_NORM_NORMAL}, + /* Zero width spaces */ + {"\xE2\x80\x8B" + "те" + "\xE2\x80\x8B" + "ст", + "тест", RSPAMD_UNICODE_NORM_ZERO_SPACES}, + /* Special case of diacritic */ + {"13_\u0020\u0308\u0301\u038e\u03ab", "13_ ̈́ΎΫ", RSPAMD_UNICODE_NORM_UNNORMAL}, + // String containing a non-joiner character + {"س\u200Cت", "ست", RSPAMD_UNICODE_NORM_ZERO_SPACES}, + // String containing a soft hyphen + {"in\u00ADter\u00ADest\u00ADing", "interesting", RSPAMD_UNICODE_NORM_ZERO_SPACES}, + // String with ligature + {"fish", "fish", RSPAMD_UNICODE_NORM_UNNORMAL}, + // String with accented characters and zero-width spaces + {"café\u200Blatté\u200C", "cafélatté", RSPAMD_UNICODE_NORM_ZERO_SPACES}, + /* Same with zw spaces */ + {"13\u200C_\u0020\u0308\u0301\u038e\u03ab", "13_ ̈́ΎΫ", + RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ZERO_SPACES}, + /* Buffer overflow case */ + {"u\xC2\xC2\xC2\xC2\xC2\xC2" + "abcdef" + "abcdef", + "u\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD", + RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ERROR}, + // String with a mix of special characters, ligatures, and zero-width spaces + {"fish\u200Bcafé\u200C\u200Dlatté\u200D\u00AD", "fishcafé\u200Dlatté\u200D", RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ZERO_SPACES}, + // Empty string + {"", "", RSPAMD_UNICODE_NORM_NORMAL}, + }; + + for (const auto &c: cases) { + std::string cpy{std::get<0>(c)}; + auto ns = cpy.size(); + auto res = rspamd_normalise_unicode_inplace(cpy.data(), &ns); + cpy.resize(ns); + CHECK(cpy == std::string(std::get<1>(c))); + CHECK(res == std::get<2>(c)); + } + } + + TEST_CASE("utf8 trim") + { + std::pair<const char *, const char *> cases[] = { + {" \u200B" + "abc ", + "abc"}, + {" ", ""}, + {" a", "a"}, + {"a ", "a"}, + {"a a", "a a"}, + {"abc", "abc"}, + {"a ", "a"}, + {" abc ", "abc"}, + {" abc ", "abc"}, + {" \xE2\x80\x8B" + "a\xE2\x80\x8B" + "bc ", + "a\xE2\x80\x8B" + "bc"}, + {" \xE2\x80\x8B" + "abc\xE2\x80\x8B ", + "abc"}, + {" \xE2\x80\x8B" + "abc \xE2\x80\x8B ", + "abc"}, + }; + + for (const auto &c: cases) { + std::string cpy{c.first}; + auto ns = cpy.size(); + auto *nstart = rspamd_string_unicode_trim_inplace(cpy.data(), &ns); + std::string res{nstart, ns}; + CHECK(res == std::string{c.second}); + } + } + + + TEST_CASE("utf8 strcmp") + { + std::tuple<const char *, const char *, int, int> cases[] = { + {"abc", "abc", -1, 0}, + {"", "", -1, 0}, + {"aBc", "AbC", -1, 0}, + {"abc", "ab", 2, 0}, + {"теСт", "ТесТ", -1, 0}, + {"теСт", "Тезт", 4, 0}, + {"теСт", "Тезт", -1, 1}, + {"abc", "ABD", -1, -1}, + {"\0a\0", "\0a\1", 2, 0}, + {"\0a\0", "\0b\1", 3, -1}, + }; + + for (const auto &c: cases) { + auto [s1, s2, n, expected] = c; + if (n == -1) { + n = MIN(strlen(s1), strlen(s2)); + } + SUBCASE((std::string("test case: ") + s1 + " <=> " + s2).c_str()) + { + auto ret = rspamd_utf8_strcmp(s1, s2, n); + CHECK(ret == expected); + } + } + } + + TEST_CASE("transliterate") + { + using namespace std::literals; + std::tuple<std::string_view, const char *> cases[] = { + {"abc"sv, "abc"}, + {""sv, ""}, + {"тест"sv, "test"}, + // Diacritic to ascii + {"Ύ"sv, "y"}, + // Chinese to pinyin + {"你好"sv, "ni hao"}, + // Japanese to romaji + {"こんにちは"sv, "konnichiha"}, + // Devanagari to latin + {"नमस्ते"sv, "namaste"}, + // Arabic to latin + {"مرحبا"sv, "mrhba"}, + // Remove of punctuation + {"a.b.c"sv, "abc"}, + // Lowercase + {"ABC"sv, "abc"}, + // Remove zero-width spaces + {"\xE2\x80\x8B" + "abc\xE2\x80\x8B" + "def"sv, + "abcdef"}, + }; + + for (const auto &c: cases) { + auto [s1, s2] = c; + SUBCASE((std::string("test case: ") + std::string(s1) + " => " + s2).c_str()) + { + gsize tlen; + auto *ret = rspamd_utf8_transliterate(s1.data(), s1.length(), &tlen); + CHECK(tlen == strlen(s2)); + CHECK(strcmp(s2, ret) == 0); + } + } + } +}
\ No newline at end of file diff --git a/src/libutil/cxx/utf8_util.h b/src/libutil/cxx/utf8_util.h new file mode 100644 index 0000000..044beae --- /dev/null +++ b/src/libutil/cxx/utf8_util.h @@ -0,0 +1,85 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_UTF8_UTIL_H +#define RSPAMD_UTF8_UTIL_H + +#include "config.h" +#include "mem_pool.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Removes all unicode spaces from a string + * @param str start of the string + * @param len length + * @return new length of the string trimmed + */ +const char *rspamd_string_unicode_trim_inplace(const char *str, size_t *len); + +enum rspamd_utf8_normalise_result { + RSPAMD_UNICODE_NORM_NORMAL = 0, + RSPAMD_UNICODE_NORM_UNNORMAL = (1 << 0), + RSPAMD_UNICODE_NORM_ZERO_SPACES = (1 << 1), + RSPAMD_UNICODE_NORM_ERROR = (1 << 2), + RSPAMD_UNICODE_NORM_OVERFLOW = (1 << 3) +}; + +/** + * Gets a string in UTF8 and normalises it to NFKC_Casefold form + * @param pool optional memory pool used for logging purposes + * @param start + * @param len + * @return TRUE if a string has been normalised + */ +enum rspamd_utf8_normalise_result rspamd_normalise_unicode_inplace(gchar *start, gsize *len); + +/** + * Transliterate a string to ASCII + * @param start + * @param len + * @param target_len + * @return a new string that should be freed with g_free + */ +gchar *rspamd_utf8_transliterate(const gchar *start, gsize len, gsize *target_len); + +/** + * Compare two strings using libicu collator + * @param s1 + * @param s2 + * @param n + * @return an integer greater than, equal to, or less than 0, according as the string s1 is greater than, equal to, or less than the string s2. + */ +int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n); +/** + * Similar to rspamd_utf8_strcmp but accepts two sizes + * @param s1 + * @param n1 + * @param s2 + * @param n2 + * @return + */ +int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2); + +#ifdef __cplusplus +} +#endif + +#endif//RSPAMD_UTF8_UTIL_H diff --git a/src/libutil/cxx/util.hxx b/src/libutil/cxx/util.hxx new file mode 100644 index 0000000..32ec0b5 --- /dev/null +++ b/src/libutil/cxx/util.hxx @@ -0,0 +1,238 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_UTIL_HXX +#define RSPAMD_UTIL_HXX + +#pragma once + +#include <memory> +#include <array> +#include <string_view> +#include <optional> +#include <tuple> +#include <algorithm> + +/* + * Common C++ utilities + */ + +namespace rspamd { +/* + * Creates std::array from a standard C style array with automatic size calculation + */ +template<typename... Ts> +constexpr auto array_of(Ts &&...t) -> std::array<typename std::decay_t<typename std::common_type_t<Ts...>>, sizeof...(Ts)> +{ + using T = typename std::decay_t<typename std::common_type_t<Ts...>>; + return {{std::forward<T>(t)...}}; +} + +/** + * Find a value in a map + * @tparam C Map type + * @tparam K Key type + * @tparam V Value type + * @param c Map to search + * @param k Key to search + * @return Value if found or std::nullopt otherwise + */ +template<class C, class K, class V = typename C::mapped_type, typename std::enable_if_t<std::is_constructible_v<typename C::key_type, K> && std::is_constructible_v<typename C::mapped_type, V>, bool> = false> +constexpr auto find_map(const C &c, const K &k) -> std::optional<std::reference_wrapper<const V>> +{ + auto f = c.find(k); + + if (f != c.end()) { + return std::cref<V>(f->second); + } + + return std::nullopt; +} + + +template<typename It> +inline constexpr auto make_string_view_from_it(It begin, It end) +{ + using result_type = std::string_view; + + return result_type{((begin != end) ? &*begin : nullptr), + (typename result_type::size_type) std::max(std::distance(begin, end), + (typename result_type::difference_type) 0)}; +} + +/** + * Iterate over lines in a string, newline characters are dropped + * @tparam S + * @tparam F + * @param input + * @param functor + * @return + */ +template<class S, class F, typename std::enable_if_t<std::is_invocable_v<F, std::string_view> && std::is_constructible_v<std::string_view, S>, bool> = true> +inline auto string_foreach_line(const S &input, const F &functor) +{ + auto it = input.begin(); + auto end = input.end(); + + while (it != end) { + auto next = std::find(it, end, '\n'); + while (next >= it && (*next == '\n' || *next == '\r')) { + --next; + } + functor(make_string_view_from_it(it, next)); + it = next; + + if (it != end) { + ++it; + } + } +} + +/** + * Iterate over elements in a string + * @tparam S string type + * @tparam D delimiter type + * @tparam F functor type + * @param input string to iterate + * @param delim delimiter to use + * @param functor functor to call + * @param ignore_empty ignore empty elements + * @return nothing + */ +template<class S, class D, class F, + typename std::enable_if_t<std::is_invocable_v<F, std::string_view> && std::is_constructible_v<std::string_view, S> && std::is_constructible_v<std::string_view, D>, bool> = true> +inline auto string_foreach_delim(const S &input, const D &delim, const F &functor, const bool ignore_empty = true) -> void +{ + size_t first = 0; + auto sv_input = std::string_view{input}; + auto sv_delim = std::string_view{delim}; + + while (first < sv_input.size()) { + const auto second = sv_input.find_first_of(sv_delim, first); + + if (first != second || !ignore_empty) { + functor(sv_input.substr(first, second - first)); + } + + if (second == std::string_view::npos) { + break; + } + + first = second + 1; + } +} + +/** + * Split string on a character + * @tparam S string type + * @param input string to split + * @param chr character to split on + * @return pair of strings + */ +template<class S, typename std::enable_if_t<std::is_constructible_v<std::string_view, S>, bool> = true> +inline auto string_split_on(const S &input, std::string_view::value_type chr) -> std::pair<std::string_view, std::string_view> +{ + auto pos = std::find(std::begin(input), std::end(input), chr); + + if (pos != input.end()) { + auto first = std::string_view{std::begin(input), static_cast<std::size_t>(std::distance(std::begin(input), pos))}; + while (*pos == chr && pos != input.end()) { + ++pos; + } + auto last = std::string_view{pos, static_cast<std::size_t>(std::distance(pos, std::end(input)))}; + + return {first, last}; + } + + return {std::string_view{input}, std::string_view{}}; +} + +/** + * Enumerate for range loop + * @tparam T iterable type + * @tparam TIter iterator type + * @param iterable iterable object + * @return iterator object + */ +template<typename T, + typename TIter = decltype(std::begin(std::declval<T>())), + typename = decltype(std::end(std::declval<T>()))> +constexpr auto enumerate(T &&iterable) +{ + struct iterator { + size_t i; + TIter iter; + bool operator!=(const iterator &other) const + { + return iter != other.iter; + } + void operator++() + { + ++i; + ++iter; + } + auto operator*() const + { + return std::tie(i, *iter); + } + }; + struct iterable_wrapper { + T iterable; + auto begin() + { + return iterator{0, std::begin(iterable)}; + } + auto end() + { + return iterator{0, std::end(iterable)}; + } + }; + return iterable_wrapper{std::forward<T>(iterable)}; +} + +/** + * Allocator that cleans up memory in a secure way on destruction + * @tparam T + */ +template<class T> +class secure_mem_allocator : public std::allocator<T> { +public: + using value_type = typename std::allocator<T>::value_type; + using size_type = typename std::allocator<T>::size_type; + template<class U> + struct rebind { + typedef secure_mem_allocator<U> other; + }; + secure_mem_allocator() noexcept = default; + secure_mem_allocator(const secure_mem_allocator &_) noexcept + : std::allocator<T>(_) + { + } + template<class U> + explicit secure_mem_allocator(const secure_mem_allocator<U> &) noexcept + { + } + + void deallocate(value_type *p, size_type num) noexcept + { + rspamd_explicit_memzero((void *) p, num); + std::allocator<T>::deallocate(p, num); + } +}; + + +}// namespace rspamd + +#endif//RSPAMD_UTIL_HXX diff --git a/src/libutil/cxx/util_tests.cxx b/src/libutil/cxx/util_tests.cxx new file mode 100644 index 0000000..6c3c177 --- /dev/null +++ b/src/libutil/cxx/util_tests.cxx @@ -0,0 +1,82 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util.hxx" + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" +#include <vector> + +using namespace rspamd; +using namespace std::literals::string_view_literals; + +TEST_SUITE("cxx utils") +{ + TEST_CASE("string_split_on") + { + std::tuple<std::string_view, char, std::pair<std::string_view, std::string_view>> cases[] = { + {"test test"sv, ' ', std::pair{"test"sv, "test"sv}}, + {"test test"sv, ' ', std::pair{"test"sv, "test"sv}}, + {"test test "sv, ' ', std::pair{"test"sv, "test "sv}}, + {"testtest "sv, ' ', std::pair{"testtest"sv, ""sv}}, + {" testtest "sv, ' ', std::pair{""sv, "testtest "sv}}, + {"testtest"sv, ' ', std::pair{"testtest"sv, ""sv}}, + {""sv, ' ', std::pair{""sv, ""sv}}, + }; + + for (const auto &c: cases) { + auto res = string_split_on(std::get<0>(c), std::get<1>(c)); + auto expected = std::get<2>(c); + CHECK(res.first == expected.first); + CHECK(res.second == expected.second); + } + } + + TEST_CASE("string_foreach_delim") + { + std::tuple<std::string_view, std::string_view, std::pair<std::vector<std::string_view>, std::vector<std::string_view>>> cases[] = { + {"test"sv, ","sv, {{"test"}, {"test"}}}, + {"test,test"sv, ","sv, {{"test", "test"}, {"test", "test"}}}, + {"test, test"sv, ", "sv, {{"test", "test"}, {"test", "", "test"}}}, + {"test, test,,"sv, ", "sv, {{"test", "test"}, {"test", "", "test", ""}}}, + }; + + for (const auto &c: cases) { + auto res = std::vector<std::string_view>(); + string_foreach_delim(std::get<0>(c), std::get<1>(c), [&](const auto &v) { + res.push_back(v); + }); + + auto compare_vec = []<class T>(const std::vector<T> &v1, const std::vector<T> &v2) { + CHECK(v1.size() == v2.size()); + for (size_t i = 0; i < v1.size(); ++i) { + CHECK(v1[i] == v2[i]); + } + }; + + compare_vec(res, std::get<2>(c).first); + + res.clear(); + // Perform the same test but with no skip empty + string_foreach_delim( + std::get<0>(c), std::get<1>(c), [&](const auto &v) { + res.push_back(v); + }, + false); + compare_vec(res, std::get<2>(c).second); + } + } +}
\ No newline at end of file diff --git a/src/libutil/expression.c b/src/libutil/expression.c new file mode 100644 index 0000000..957c47f --- /dev/null +++ b/src/libutil/expression.c @@ -0,0 +1,1635 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "expression.h" +#include "printf.h" +#include "regexp.h" +#include "util.h" +#include "utlist.h" +#include "ottery.h" +#include "libserver/logger.h" +#include "libcryptobox/cryptobox.h" +#include <math.h> + +#define RSPAMD_EXPR_FLAG_NEGATE (1 << 0) +#define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1) + +#define MIN_RESORT_EVALS 50 +#define MAX_RESORT_EVALS 150 + +enum rspamd_expression_elt_type { + ELT_OP = 0, + ELT_ATOM, + ELT_LIMIT +}; + +enum rspamd_expression_op_flag { + RSPAMD_EXPRESSION_UNARY = 1u << 0u, + RSPAMD_EXPRESSION_BINARY = 1u << 1u, + RSPAMD_EXPRESSION_NARY = 1u << 2u, + RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u, + RSPAMD_EXPRESSION_LOGICAL = 1u << 4u, + RSPAMD_EXPRESSION_COMPARISON = 1u << 5u, +}; + +struct rspamd_expression_operation { + enum rspamd_expression_op op; + guint logical_priority; + guint op_flags; +}; + +struct rspamd_expression_elt { + enum rspamd_expression_elt_type type; + union { + rspamd_expression_atom_t *atom; + struct rspamd_expression_operation op; + gdouble lim; + } p; + + gint flags; + gint priority; + gdouble value; +}; + +struct rspamd_expression { + const struct rspamd_atom_subr *subr; + GArray *expressions; + GPtrArray *expression_stack; + GNode *ast; + gchar *log_id; + guint next_resort; + guint evals; +}; + +struct rspamd_expr_process_data { + gpointer *ud; + gint flags; + /* != NULL if trace is collected */ + GPtrArray *trace; + rspamd_expression_process_cb process_closure; +}; + +#define msg_debug_expression(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_expression_log_id, "expression", e->log_id, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) + +#ifdef DEBUG_EXPRESSIONS +#define msg_debug_expression_verbose(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_expression_log_id, "expression", e->log_id, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#else +#define msg_debug_expression_verbose(...) \ + do { \ + } while (0) +#endif + +INIT_LOG_MODULE(expression) + +static GQuark +rspamd_expr_quark(void) +{ + return g_quark_from_static_string("rspamd-expression"); +} + +static const gchar *RSPAMD_CONST_FUNCTION +rspamd_expr_op_to_str(enum rspamd_expression_op op); +static const gchar * +rspamd_expr_op_to_str(enum rspamd_expression_op op) +{ + const gchar *op_str = NULL; + + switch (op) { + case OP_AND: + op_str = "&"; + break; + case OP_OR: + op_str = "|"; + break; + case OP_MULT: + op_str = "*"; + break; + case OP_PLUS: + op_str = "+"; + break; + case OP_MINUS: + op_str = "-"; + break; + case OP_DIVIDE: + op_str = "/"; + break; + case OP_NOT: + op_str = "!"; + break; + case OP_GE: + op_str = ">="; + break; + case OP_GT: + op_str = ">"; + break; + case OP_LE: + op_str = "<="; + break; + case OP_LT: + op_str = "<"; + break; + case OP_EQ: + op_str = "=="; + break; + case OP_NE: + op_str = "!="; + break; + case OP_OBRACE: + op_str = "("; + break; + case OP_CBRACE: + op_str = ")"; + break; + default: + op_str = "???"; + break; + } + + return op_str; +} + +#define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1)) + +static void +rspamd_expr_stack_elt_push(GPtrArray *stack, + gpointer elt) +{ + g_ptr_array_add(stack, elt); +} + + +static gpointer +rspamd_expr_stack_elt_pop(GPtrArray *stack) +{ + gpointer e; + gint idx; + + if (stack->len == 0) { + return NULL; + } + + idx = stack->len - 1; + e = g_ptr_array_index(stack, idx); + g_ptr_array_remove_index_fast(stack, idx); + + return e; +} + + +static void +rspamd_expr_stack_push(struct rspamd_expression *expr, + gpointer elt) +{ + rspamd_expr_stack_elt_push(expr->expression_stack, elt); +} + +static gpointer +rspamd_expr_stack_pop(struct rspamd_expression *expr) +{ + return rspamd_expr_stack_elt_pop(expr->expression_stack); +} + +static gpointer +rspamd_expr_stack_peek(struct rspamd_expression *expr) +{ + gpointer e; + gint idx; + GPtrArray *stack = expr->expression_stack; + + if (stack->len == 0) { + return NULL; + } + + idx = stack->len - 1; + e = g_ptr_array_index(stack, idx); + + return e; +} + +/* + * Return operation priority + */ +static gint RSPAMD_CONST_FUNCTION +rspamd_expr_logic_priority(enum rspamd_expression_op op); +static gint +rspamd_expr_logic_priority(enum rspamd_expression_op op) +{ + gint ret = 0; + + switch (op) { + case OP_NOT: + ret = 7; + break; + case OP_MULT: + case OP_DIVIDE: + ret = 6; + break; + case OP_PLUS: + case OP_MINUS: + ret = 5; + break; + case OP_GE: + case OP_GT: + case OP_LE: + case OP_LT: + case OP_EQ: + case OP_NE: + ret = 4; + break; + case OP_AND: + ret = 3; + break; + case OP_OR: + ret = 2; + break; + case OP_OBRACE: + case OP_CBRACE: + ret = 1; + break; + case OP_INVALID: + ret = -1; + break; + } + + return ret; +} + +static guint RSPAMD_CONST_FUNCTION +rspamd_expr_op_flags(enum rspamd_expression_op op); + +static guint +rspamd_expr_op_flags(enum rspamd_expression_op op) +{ + guint ret = 0; + + switch (op) { + case OP_NOT: + ret |= RSPAMD_EXPRESSION_UNARY | RSPAMD_EXPRESSION_LOGICAL; + break; + case OP_MULT: + ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_ARITHMETIC; + break; + case OP_DIVIDE: + ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_ARITHMETIC; + break; + case OP_PLUS: + ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_ARITHMETIC; + break; + case OP_MINUS: + ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_ARITHMETIC; + break; + case OP_GE: + case OP_GT: + case OP_LE: + case OP_LT: + case OP_EQ: + case OP_NE: + ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_COMPARISON; + break; + case OP_AND: + case OP_OR: + ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_LOGICAL; + break; + case OP_OBRACE: + case OP_CBRACE: + case OP_INVALID: + break; + } + + return ret; +} + +/* + * Return FALSE if symbol is not operation symbol (operand) + * Return TRUE if symbol is operation symbol + */ +static gboolean RSPAMD_CONST_FUNCTION +rspamd_expr_is_operation_symbol(gchar a); +static gboolean +rspamd_expr_is_operation_symbol(gchar a) +{ + switch (a) { + case '!': + case '&': + case '|': + case '(': + case ')': + case '>': + case '<': + case '+': + case '*': + case '-': + case '/': + case '=': + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_expr_is_operation(struct rspamd_expression *e, + const gchar *p, const gchar *end, rspamd_regexp_t *num_re) +{ + if (rspamd_expr_is_operation_symbol(*p)) { + if (p + 1 < end) { + gchar t = *(p + 1); + + if (t == ':') { + /* Special case, treat it as an atom */ + } + else if (*p == '/') { + /* Lookahead for division operation to distinguish from regexp */ + const gchar *track = p + 1; + + /* Skip spaces */ + while (track < end && g_ascii_isspace(*track)) { + track++; + } + + /* Check for a number */ + if (rspamd_regexp_search(num_re, + track, + end - track, + NULL, + NULL, + FALSE, + NULL)) { + msg_debug_expression_verbose("found divide operation"); + return TRUE; + } + + msg_debug_expression_verbose("false divide operation"); + /* Fallback to PARSE_ATOM state */ + } + else if (*p == '-') { + /* - is used in composites, so we need to distinguish - from + * 1) unary minus of a limit! + * 2) -BLAH in composites + * Decision is simple: require a space after binary `-` op + */ + if (g_ascii_isspace(t)) { + return TRUE; + } + /* Fallback to PARSE_ATOM state */ + msg_debug_expression_verbose("false minus operation"); + } + else { + /* Generic operation */ + return TRUE; + } + } + else { + /* Last op */ + return TRUE; + } + } + + return FALSE; +} + +/* Return character representation of operation */ +static enum rspamd_expression_op +rspamd_expr_str_to_op(const gchar *a, const gchar *end, const gchar **next) +{ + enum rspamd_expression_op op = OP_INVALID; + + g_assert(a < end); + + switch (*a) { + case '!': + case '&': + case '|': + case '+': + case '*': + case '/': + case '-': + case '(': + case ')': + case '=': { + if (a < end - 1) { + if ((a[0] == '&' && a[1] == '&') || + (a[0] == '|' && a[1] == '|') || + (a[0] == '!' && a[1] == '=') || + (a[0] == '=' && a[1] == '=')) { + *next = a + 2; + } + else { + *next = a + 1; + } + } + else { + *next = end; + } + /* XXX: not especially effective */ + switch (*a) { + case '!': + if (a < end - 1 && a[1] == '=') { + op = OP_NE; + } + else { + op = OP_NOT; + } + break; + case '&': + op = OP_AND; + break; + case '*': + op = OP_MULT; + break; + case '|': + op = OP_OR; + break; + case '+': + op = OP_PLUS; + break; + case '/': + op = OP_DIVIDE; + break; + case '-': + op = OP_MINUS; + break; + case '=': + op = OP_EQ; + break; + case ')': + op = OP_CBRACE; + break; + case '(': + op = OP_OBRACE; + break; + default: + op = OP_INVALID; + break; + } + break; + } + case 'O': + case 'o': + if ((gulong) (end - a) >= sizeof("or") && + g_ascii_strncasecmp(a, "or", sizeof("or") - 1) == 0) { + *next = a + sizeof("or") - 1; + op = OP_OR; + } + break; + case 'A': + case 'a': + if ((gulong) (end - a) >= sizeof("and") && + g_ascii_strncasecmp(a, "and", sizeof("and") - 1) == 0) { + *next = a + sizeof("and") - 1; + op = OP_AND; + } + break; + case 'N': + case 'n': + if ((gulong) (end - a) >= sizeof("not") && + g_ascii_strncasecmp(a, "not", sizeof("not") - 1) == 0) { + *next = a + sizeof("not") - 1; + op = OP_NOT; + } + break; + case '>': + if (a < end - 1 && a[1] == '=') { + *next = a + 2; + op = OP_GE; + } + else { + *next = a + 1; + op = OP_GT; + } + break; + case '<': + if (a < end - 1 && a[1] == '=') { + *next = a + 2; + op = OP_LE; + } + else { + *next = a + 1; + op = OP_LT; + } + break; + default: + op = OP_INVALID; + break; + } + + return op; +} + +static void +rspamd_expression_destroy(struct rspamd_expression *expr) +{ + guint i; + struct rspamd_expression_elt *elt; + + if (expr != NULL) { + + if (expr->subr->destroy) { + /* Free atoms */ + for (i = 0; i < expr->expressions->len; i++) { + elt = &g_array_index(expr->expressions, + struct rspamd_expression_elt, i); + + if (elt->type == ELT_ATOM) { + expr->subr->destroy(elt->p.atom); + } + } + } + + if (expr->expressions) { + g_array_free(expr->expressions, TRUE); + } + if (expr->expression_stack) { + g_ptr_array_free(expr->expression_stack, TRUE); + } + if (expr->ast) { + g_node_destroy(expr->ast); + } + if (expr->log_id) { + g_free(expr->log_id); + } + + g_free(expr); + } +} + +static gboolean +rspamd_ast_add_node(struct rspamd_expression *e, + GPtrArray *operands, + struct rspamd_expression_elt *op, + GError **err) +{ + + GNode *res, *a1, *a2, *test; + + g_assert(op->type == ELT_OP); + + if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) { + /* Unary operator */ + struct rspamd_expression_elt *test_elt; + + res = g_node_new(op); + a1 = rspamd_expr_stack_elt_pop(operands); + + if (a1 == NULL) { + g_set_error(err, rspamd_expr_quark(), EINVAL, "no operand to " + "unary '%s' operation", + rspamd_expr_op_to_str(op->p.op.op)); + g_node_destroy(res); + + return FALSE; + } + + g_node_append(res, a1); + test_elt = a1->data; + + if (test_elt->type == ELT_ATOM) { + test_elt->p.atom->parent = res; + msg_debug_expression("added unary op %s to AST; operand: %*s", + rspamd_expr_op_to_str(op->p.op.op), + (int) test_elt->p.atom->len, test_elt->p.atom->str); + } + else { + msg_debug_expression("added unary op %s to AST; operand type: %d", + rspamd_expr_op_to_str(op->p.op.op), + test_elt->type); + } + } + else { + struct rspamd_expression_elt *e1, *e2; + /* For binary/nary operators we might want to examine chains */ + a2 = rspamd_expr_stack_elt_pop(operands); + a1 = rspamd_expr_stack_elt_pop(operands); + + if (a2 == NULL) { + g_set_error(err, rspamd_expr_quark(), EINVAL, "no left operand to " + "'%s' operation", + rspamd_expr_op_to_str(op->p.op.op)); + return FALSE; + } + + if (a1 == NULL) { + g_set_error(err, rspamd_expr_quark(), EINVAL, "no right operand to " + "'%s' operation", + rspamd_expr_op_to_str(op->p.op.op)); + return FALSE; + } + + /* Nary stuff */ + if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) { + /* + * We convert a set of ops like X + Y + Z to a nary tree like + * X Y Z + + * for the longest possible prefix of atoms/limits + */ + + /* First try with a1 */ + test = a1; + e1 = test->data; + + if (e1->type == ELT_OP && e1->p.op.op == op->p.op.op) { + /* Add children */ + g_node_append(test, a2); + rspamd_expr_stack_elt_push(operands, a1); + + msg_debug_expression("added nary op %s to AST merged with the first operand", + rspamd_expr_op_to_str(op->p.op.op)); + + return TRUE; + } + + /* Now test a2 */ + test = a2; + e2 = test->data; + + if (e2->type == ELT_OP && e2->p.op.op == op->p.op.op) { + /* Add children */ + g_node_prepend(test, a1); + rspamd_expr_stack_elt_push(operands, a2); + + msg_debug_expression("added nary op %s to AST merged with the second operand", + rspamd_expr_op_to_str(op->p.op.op)); + + return TRUE; + } + } + + /* No optimizations possible, so create a new level */ + res = g_node_new(op); + g_node_append(res, a1); + g_node_append(res, a2); + + e1 = a1->data; + e2 = a2->data; + + if (e1->type == ELT_ATOM) { + e1->p.atom->parent = res; + } + + if (e2->type == ELT_ATOM) { + e2->p.atom->parent = res; + } + + if (e1->type == ELT_ATOM && e2->type == ELT_ATOM) { + msg_debug_expression("added binary op %s to AST; operands: (%*s; %*s)", + rspamd_expr_op_to_str(op->p.op.op), + (int) e1->p.atom->len, e1->p.atom->str, + (int) e2->p.atom->len, e2->p.atom->str); + } + else { + msg_debug_expression("added binary op %s to AST; operands (types): (%d; %d)", + rspamd_expr_op_to_str(op->p.op.op), + e1->type, + e2->type); + } + } + + /* Push back resulting node to the stack */ + rspamd_expr_stack_elt_push(operands, res); + + return TRUE; +} + +static gboolean +rspamd_ast_priority_traverse(GNode *node, gpointer d) +{ + struct rspamd_expression_elt *elt = node->data, *cur_elt; + struct rspamd_expression *expr = d; + gint cnt = 0; + GNode *cur; + + if (node->children) { + cur = node->children; + while (cur) { + cur_elt = cur->data; + cnt += cur_elt->priority; + cur = cur->next; + } + elt->priority = cnt; + } + else { + /* It is atom or limit */ + g_assert(elt->type != ELT_OP); + + if (elt->type == ELT_LIMIT) { + /* Always push limit first */ + elt->priority = 0; + } + else { + elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY; + + if (expr->subr->priority != NULL) { + elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY - + expr->subr->priority(elt->p.atom); + } + elt->p.atom->hits = 0; + } + } + + return FALSE; +} + +#define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->exec_time.mean > 0 ? (a)->p.atom->exec_time.mean * 10000000 : 1.0)) + +static gint +rspamd_ast_priority_cmp(GNode *a, GNode *b) +{ + struct rspamd_expression_elt *ea = a->data, *eb = b->data; + gdouble w1, w2; + + if (ea->type == ELT_LIMIT) { + return 1; + } + else if (eb->type == ELT_LIMIT) { + return -1; + } + + /* Special logic for atoms */ + if (ea->type == ELT_ATOM && eb->type == ELT_ATOM && + ea->priority == eb->priority) { + w1 = ATOM_PRIORITY(ea); + w2 = ATOM_PRIORITY(eb); + + ea->p.atom->hits = 0; + + return w1 - w2; + } + else { + return ea->priority - eb->priority; + } +} + +static gboolean +rspamd_ast_resort_traverse(GNode *node, gpointer unused) +{ + GNode *children, *last; + struct rspamd_expression_elt *elt; + + elt = (struct rspamd_expression_elt *) node->data; + + /* + * We sort merely logical operations, everything else is dangerous + */ + if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) { + + if (node->children) { + + children = node->children; + last = g_node_last_sibling(children); + /* Needed for utlist compatibility */ + children->prev = last; + DL_SORT(node->children, rspamd_ast_priority_cmp); + /* Restore GLIB compatibility */ + children = node->children; + children->prev = NULL; + } + } + + return FALSE; +} + +static struct rspamd_expression_elt * +rspamd_expr_dup_elt(rspamd_mempool_t *pool, struct rspamd_expression_elt *elt) +{ + struct rspamd_expression_elt *n; + + n = rspamd_mempool_alloc(pool, sizeof(*n)); + memcpy(n, elt, sizeof(*n)); + + return n; +} + +gboolean +rspamd_parse_expression(const gchar *line, gsize len, + const struct rspamd_atom_subr *subr, gpointer subr_data, + rspamd_mempool_t *pool, GError **err, + struct rspamd_expression **target) +{ + struct rspamd_expression *e; + struct rspamd_expression_elt elt; + rspamd_expression_atom_t *atom; + rspamd_regexp_t *num_re; + enum rspamd_expression_op op, op_stack; + const gchar *p, *c, *end; + GPtrArray *operand_stack; + GNode *tmp; + + enum { + PARSE_ATOM = 0, + PARSE_OP, + PARSE_LIM, + SKIP_SPACES + } state = PARSE_ATOM; + + g_assert(line != NULL); + g_assert(subr != NULL && subr->parse != NULL); + + if (len == 0) { + len = strlen(line); + } + + memset(&elt, 0, sizeof(elt)); + num_re = rspamd_regexp_cache_create(NULL, + "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL); + + p = line; + c = line; + end = line + len; + e = g_malloc0(sizeof(*e)); + e->expressions = g_array_new(FALSE, FALSE, + sizeof(struct rspamd_expression_elt)); + operand_stack = g_ptr_array_sized_new(32); + e->ast = NULL; + e->expression_stack = g_ptr_array_sized_new(32); + e->subr = subr; + e->evals = 0; + e->next_resort = ottery_rand_range(MAX_RESORT_EVALS) + MIN_RESORT_EVALS; + e->log_id = g_malloc0(RSPAMD_LOG_ID_LEN + 1); + guint64 h = rspamd_cryptobox_fast_hash(line, len, 0xdeadbabe); + rspamd_snprintf(e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h); + msg_debug_expression("start to parse expression '%*s'", (int) len, line); + + /* Shunting-yard algorithm */ + while (p < end) { + switch (state) { + case PARSE_ATOM: + if (g_ascii_isspace(*p)) { + state = SKIP_SPACES; + continue; + } + else if (rspamd_expr_is_operation(e, p, end, num_re)) { + /* Lookahead */ + state = PARSE_OP; + continue; + } + + /* + * First of all, we check some pre-conditions: + * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op + * 2) if we have full numeric string, then we check for + * the following expression: + * ^\d+\s*[><]$ + * and check the operation on stack + */ + if ((gulong) (end - p) > sizeof("and ") && + (g_ascii_strncasecmp(p, "and ", sizeof("and ") - 1) == 0 || + g_ascii_strncasecmp(p, "not ", sizeof("not ") - 1) == 0)) { + state = PARSE_OP; + } + else if ((gulong) (end - p) > sizeof("or ") && + g_ascii_strncasecmp(p, "or ", sizeof("or ") - 1) == 0) { + state = PARSE_OP; + } + else { + /* + * If we have any comparison or arithmetic operator in the stack, then try + * to parse limit + */ + op = GPOINTER_TO_INT(rspamd_expr_stack_peek(e)); + + if (op == OP_MULT || op == OP_MINUS || op == OP_DIVIDE || + op == OP_PLUS || (op >= OP_LT && op <= OP_NE)) { + if (rspamd_regexp_search(num_re, + p, + end - p, + NULL, + NULL, + FALSE, + NULL)) { + c = p; + state = PARSE_LIM; + continue; + } + /* Fallback to atom parsing */ + } + + /* Try to parse atom */ + atom = subr->parse(p, end - p, pool, subr_data, err); + if (atom == NULL || atom->len == 0) { + /* We couldn't parse the atom, so go out */ + if (err != NULL && *err == NULL) { + g_set_error(err, + rspamd_expr_quark(), + 500, + "Cannot parse atom: callback function failed" + " to parse '%.*s'", + (int) (end - p), + p); + } + goto error_label; + } + + if (atom->str == NULL) { + atom->str = p; + } + + p = p + atom->len; + + /* Push to output */ + elt.type = ELT_ATOM; + elt.p.atom = atom; + g_array_append_val(e->expressions, elt); + rspamd_expr_stack_elt_push(operand_stack, + g_node_new(rspamd_expr_dup_elt(pool, &elt))); + msg_debug_expression("found atom: %*s; pushed onto operand stack (%d size)", + (int) atom->len, atom->str, operand_stack->len); + } + break; + case PARSE_LIM: + if ((g_ascii_isdigit(*p) || *p == '-' || *p == '.') && p < end - 1) { + p++; + } + else { + if (p == end - 1 && g_ascii_isdigit(*p)) { + p++; + } + + if (p - c > 0) { + elt.type = ELT_LIMIT; + elt.p.lim = strtod(c, NULL); + g_array_append_val(e->expressions, elt); + rspamd_expr_stack_elt_push(operand_stack, + g_node_new(rspamd_expr_dup_elt(pool, &elt))); + msg_debug_expression("found limit: %.1f; pushed onto operand stack (%d size)", + elt.p.lim, operand_stack->len); + c = p; + state = SKIP_SPACES; + } + else { + g_set_error(err, rspamd_expr_quark(), 400, "Empty number"); + goto error_label; + } + } + break; + case PARSE_OP: + op = rspamd_expr_str_to_op(p, end, &p); + if (op == OP_INVALID) { + g_set_error(err, rspamd_expr_quark(), 500, "Bad operator %c", + *p); + goto error_label; + } + else if (op == OP_OBRACE) { + /* + * If the token is a left parenthesis, then push it onto + * the stack. + */ + rspamd_expr_stack_push(e, GINT_TO_POINTER(op)); + msg_debug_expression("found obrace, pushed to operators stack (%d size)", + e->expression_stack->len); + } + else if (op == OP_CBRACE) { + /* + * Until the token at the top of the stack is a left + * parenthesis, pop operators off the stack onto the + * output queue. + * + * Pop the left parenthesis from the stack, + * but not onto the output queue. + * + * If the stack runs out without finding a left parenthesis, + * then there are mismatched parentheses. + */ + msg_debug_expression("found cbrace, rewind operators stack (%d size)", + e->expression_stack->len); + + do { + op = GPOINTER_TO_INT(rspamd_expr_stack_pop(e)); + + if (op == OP_INVALID) { + g_set_error(err, rspamd_expr_quark(), 600, + "Braces mismatch"); + goto error_label; + } + + guint op_priority = rspamd_expr_logic_priority(op); + msg_debug_expression("found op: %s; priority = %d", + rspamd_expr_op_to_str(op), op_priority); + + if (op != OP_OBRACE) { + elt.type = ELT_OP; + elt.p.op.op = op; + elt.p.op.op_flags = rspamd_expr_op_flags(op); + elt.p.op.logical_priority = op_priority; + g_array_append_val(e->expressions, elt); + + if (!rspamd_ast_add_node(e, operand_stack, + rspamd_expr_dup_elt(pool, &elt), err)) { + goto error_label; + } + } + + } while (op != OP_OBRACE); + } + else { + /* + * While there is an operator token, o2, at the top of + * the operator stack, and either: + * + * - o1 is left-associative and its precedence is less than + * or equal to that of o2, or + * - o1 is right associative, and has precedence less than + * that of o2, + * + * then pop o2 off the operator stack, onto the output queue; + * + * push o1 onto the operator stack. + */ + + for (;;) { + op_stack = GPOINTER_TO_INT(rspamd_expr_stack_pop(e)); + + if (op_stack == OP_INVALID) { + /* Stack is empty */ + msg_debug_expression("no operations in operators stack"); + break; + } + + /* We ignore associativity for now */ + guint op_priority = rspamd_expr_logic_priority(op), + stack_op_priority = rspamd_expr_logic_priority(op_stack); + + msg_debug_expression("operators stack %d; operands stack: %d; " + "process operation '%s'(%d); pop operation '%s'(%d)", + e->expression_stack->len, + operand_stack->len, + rspamd_expr_op_to_str(op), op_priority, + rspamd_expr_op_to_str(op_stack), stack_op_priority); + + if (op_stack != OP_OBRACE && + op_priority < stack_op_priority) { + elt.type = ELT_OP; + elt.p.op.op = op_stack; + elt.p.op.op_flags = rspamd_expr_op_flags(op_stack); + elt.p.op.logical_priority = op_priority; + + g_array_append_val(e->expressions, elt); + + if (!rspamd_ast_add_node(e, operand_stack, + rspamd_expr_dup_elt(pool, &elt), err)) { + goto error_label; + } + } + else { + /* Push op_stack back */ + msg_debug_expression("operators stack %d; operands stack: %d; " + "process operation '%s'(%d); push back to stack '%s'(%d)", + e->expression_stack->len, + operand_stack->len, + rspamd_expr_op_to_str(op), op_priority, + rspamd_expr_op_to_str(op_stack), stack_op_priority); + rspamd_expr_stack_push(e, GINT_TO_POINTER(op_stack)); + break; + } + } + + /* Push new operator itself */ + msg_debug_expression("operators stack %d; operands stack: %d; " + "process operation '%s'; push to stack", + e->expression_stack->len, + operand_stack->len, + rspamd_expr_op_to_str(op)); + rspamd_expr_stack_push(e, GINT_TO_POINTER(op)); + } + + state = SKIP_SPACES; + break; + case SKIP_SPACES: + if (g_ascii_isspace(*p)) { + p++; + } + if (rspamd_expr_is_operation(e, p, end, num_re)) { + /* Lookahead */ + state = PARSE_OP; + } + else { + state = PARSE_ATOM; + } + break; + } + } + + /* Now we process the stack and push operators to the output */ + while ((op_stack = GPOINTER_TO_INT(rspamd_expr_stack_pop(e))) != OP_INVALID) { + msg_debug_expression("operators stack %d; operands stack: %d; " + "rewind stack; op: %s", + e->expression_stack->len, + operand_stack->len, + rspamd_expr_op_to_str(op_stack)); + + if (op_stack != OP_OBRACE) { + elt.type = ELT_OP; + elt.p.op.op = op_stack; + elt.p.op.op_flags = rspamd_expr_op_flags(op_stack); + elt.p.op.logical_priority = rspamd_expr_logic_priority(op_stack); + + g_array_append_val(e->expressions, elt); + if (!rspamd_ast_add_node(e, operand_stack, + rspamd_expr_dup_elt(pool, &elt), err)) { + goto error_label; + } + } + else { + g_set_error(err, rspamd_expr_quark(), 600, + "Braces mismatch"); + goto error_label; + } + } + + if (operand_stack->len != 1) { + g_set_error(err, rspamd_expr_quark(), 601, + "Operators mismatch: %d elts in stack", operand_stack->len); + goto error_label; + } + + e->ast = rspamd_expr_stack_elt_pop(operand_stack); + g_ptr_array_free(operand_stack, TRUE); + + /* Set priorities for branches */ + g_node_traverse(e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1, + rspamd_ast_priority_traverse, e); + + /* Now set less expensive branches to be evaluated first */ + g_node_traverse(e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1, + rspamd_ast_resort_traverse, NULL); + + if (target) { + *target = e; + rspamd_mempool_add_destructor(pool, + (rspamd_mempool_destruct_t) rspamd_expression_destroy, e); + } + else { + rspamd_expression_destroy(e); + } + + return TRUE; + +error_label: + if (err && *err) { + msg_debug_expression("fatal expression parse error: %e", *err); + } + + while ((tmp = rspamd_expr_stack_elt_pop(operand_stack)) != NULL) { + g_node_destroy(tmp); + } + + g_ptr_array_free(operand_stack, TRUE); + rspamd_expression_destroy(e); + + return FALSE; +} + +/* + * Node optimizer function: skip nodes that are not relevant + */ +static gboolean +rspamd_ast_node_done(struct rspamd_expression_elt *elt, gdouble acc) +{ + gboolean ret = FALSE; + + g_assert(elt->type == ELT_OP); + + switch (elt->p.op.op) { + case OP_NOT: + ret = TRUE; + break; + case OP_AND: + ret = acc == 0; + break; + case OP_OR: + ret = acc != 0; + break; + default: + break; + } + + return ret; +} + + +static gdouble +rspamd_ast_do_unary_op(struct rspamd_expression_elt *elt, gdouble operand) +{ + gdouble ret; + g_assert(elt->type == ELT_OP); + + switch (elt->p.op.op) { + case OP_NOT: + ret = fabs(operand) > DBL_EPSILON ? 0.0 : 1.0; + break; + default: + g_assert_not_reached(); + } + + return ret; +} + +static gdouble +rspamd_ast_do_binary_op(struct rspamd_expression_elt *elt, gdouble op1, gdouble op2) +{ + gdouble ret; + + g_assert(elt->type == ELT_OP); + + switch (elt->p.op.op) { + case OP_MINUS: + ret = op1 - op2; + break; + case OP_DIVIDE: + ret = op1 / op2; + break; + case OP_GE: + ret = op1 >= op2; + break; + case OP_GT: + ret = op1 > op2; + break; + case OP_LE: + ret = op1 <= op2; + break; + case OP_LT: + ret = op1 < op2; + break; + case OP_EQ: + ret = op1 == op2; + break; + case OP_NE: + ret = op1 != op2; + break; + + case OP_NOT: + case OP_PLUS: + case OP_MULT: + case OP_AND: + case OP_OR: + default: + g_assert_not_reached(); + break; + } + + return ret; +} + +static gdouble +rspamd_ast_do_nary_op(struct rspamd_expression_elt *elt, gdouble val, gdouble acc) +{ + gdouble ret; + + g_assert(elt->type == ELT_OP); + + if (isnan(acc)) { + return val; + } + + switch (elt->p.op.op) { + case OP_PLUS: + ret = acc + val; + break; + case OP_MULT: + ret = acc * val; + break; + case OP_AND: + ret = (fabs(acc) > DBL_EPSILON) && (fabs(val) > DBL_EPSILON); + break; + case OP_OR: + ret = (fabs(acc) > DBL_EPSILON) || (fabs(val) > DBL_EPSILON); + break; + default: + case OP_NOT: + case OP_MINUS: + case OP_DIVIDE: + case OP_GE: + case OP_GT: + case OP_LE: + case OP_LT: + case OP_EQ: + case OP_NE: + g_assert_not_reached(); + break; + } + + return ret; +} + +static gdouble +rspamd_ast_process_node(struct rspamd_expression *e, GNode *node, + struct rspamd_expr_process_data *process_data) +{ + struct rspamd_expression_elt *elt; + GNode *cld; + gdouble acc = NAN; + float t1, t2; + gdouble val; + gboolean calc_ticks = FALSE; + __attribute__((unused)) const gchar *op_name = NULL; + + elt = node->data; + + switch (elt->type) { + case ELT_ATOM: + if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) { + /* + * Check once per 256 evaluations approx + */ + calc_ticks = (rspamd_random_uint64_fast() & 0xff) == 0xff; + if (calc_ticks) { + t1 = rspamd_get_ticks(TRUE); + } + + elt->value = process_data->process_closure(process_data->ud, elt->p.atom); + + if (fabs(elt->value) > DBL_EPSILON) { + elt->p.atom->hits++; + + if (process_data->trace) { + g_ptr_array_add(process_data->trace, elt->p.atom); + } + } + + if (calc_ticks) { + t2 = rspamd_get_ticks(TRUE); + rspamd_set_counter_ema(&elt->p.atom->exec_time, (t2 - t1), 0.5f); + } + + elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED; + } + + acc = elt->value; + msg_debug_expression_verbose("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc); + break; + case ELT_LIMIT: + + acc = elt->p.lim; + msg_debug_expression_verbose("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc); + break; + case ELT_OP: + g_assert(node->children != NULL); +#ifdef DEBUG_EXPRESSIONS + op_name = rspamd_expr_op_to_str(elt->p.op.op); +#endif + + if (elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) { + msg_debug_expression_verbose("proceed nary operation %s", op_name); + /* Proceed all ops in chain */ + DL_FOREACH(node->children, cld) + { + val = rspamd_ast_process_node(e, cld, process_data); + msg_debug_expression_verbose("before op: op=%s; acc=%.1f; val = %.2f", op_name, + acc, val); + acc = rspamd_ast_do_nary_op(elt, val, acc); + msg_debug_expression_verbose("after op: op=%s; acc=%.1f; val = %.2f", op_name, + acc, val); + + /* Check if we need to process further */ + if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) { + if (rspamd_ast_node_done(elt, acc)) { + msg_debug_expression_verbose("optimizer: done"); + return acc; + } + } + } + } + else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_BINARY) { + GNode *c1 = node->children, *c2; + + c2 = c1->next; + g_assert(c2->next == NULL); + gdouble val1, val2; + + msg_debug_expression_verbose("proceed binary operation %s", + op_name); + val1 = rspamd_ast_process_node(e, c1, process_data); + val2 = rspamd_ast_process_node(e, c2, process_data); + + msg_debug_expression_verbose("before op: op=%s; op1 = %.1f, op2 = %.1f", + op_name, val1, val2); + acc = rspamd_ast_do_binary_op(elt, val1, val2); + msg_debug_expression_verbose("after op: op=%s; res=%.1f", + op_name, acc); + } + else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) { + GNode *c1 = node->children; + + g_assert(c1->next == NULL); + + msg_debug_expression_verbose("proceed unary operation %s", + op_name); + val = rspamd_ast_process_node(e, c1, process_data); + + msg_debug_expression_verbose("before op: op=%s; op1 = %.1f", + op_name, val); + acc = rspamd_ast_do_unary_op(elt, val); + msg_debug_expression_verbose("after op: op=%s; res=%.1f", + op_name, acc); + } + break; + } + + return acc; +} + +static gboolean +rspamd_ast_cleanup_traverse(GNode *n, gpointer d) +{ + struct rspamd_expression_elt *elt = n->data; + + elt->value = 0; + elt->flags = 0; + + return FALSE; +} + +gdouble +rspamd_process_expression_closure(struct rspamd_expression *expr, + rspamd_expression_process_cb cb, + gint flags, + gpointer runtime_ud, + GPtrArray **track) +{ + struct rspamd_expr_process_data pd; + gdouble ret = 0; + + g_assert(expr != NULL); + /* Ensure that stack is empty at this point */ + g_assert(expr->expression_stack->len == 0); + + expr->evals++; + + memset(&pd, 0, sizeof(pd)); + pd.process_closure = cb; + pd.flags = flags; + pd.ud = runtime_ud; + + if (track) { + pd.trace = g_ptr_array_sized_new(32); + *track = pd.trace; + } + + ret = rspamd_ast_process_node(expr, expr->ast, &pd); + + /* Cleanup */ + g_node_traverse(expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1, + rspamd_ast_cleanup_traverse, NULL); + + /* Check if we need to resort */ + if (expr->evals % expr->next_resort == 0) { + expr->next_resort = ottery_rand_range(MAX_RESORT_EVALS) + + MIN_RESORT_EVALS; + /* Set priorities for branches */ + g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1, + rspamd_ast_priority_traverse, expr); + + /* Now set less expensive branches to be evaluated first */ + g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1, + rspamd_ast_resort_traverse, NULL); + } + + return ret; +} + +gdouble +rspamd_process_expression_track(struct rspamd_expression *expr, + gint flags, + gpointer runtime_ud, + GPtrArray **track) +{ + return rspamd_process_expression_closure(expr, + expr->subr->process, flags, runtime_ud, track); +} + +gdouble +rspamd_process_expression(struct rspamd_expression *expr, + gint flags, + gpointer runtime_ud) +{ + return rspamd_process_expression_closure(expr, + expr->subr->process, flags, runtime_ud, NULL); +} + +static gboolean +rspamd_ast_string_traverse(GNode *n, gpointer d) +{ + GString *res = d; + gint cnt; + GNode *cur; + struct rspamd_expression_elt *elt = n->data; + const char *op_str = NULL; + + if (elt->type == ELT_ATOM) { + rspamd_printf_gstring(res, "(%*s)", + (int) elt->p.atom->len, elt->p.atom->str); + } + else if (elt->type == ELT_LIMIT) { + if (elt->p.lim == (double) (gint64) elt->p.lim) { + rspamd_printf_gstring(res, "%L", (gint64) elt->p.lim); + } + else { + rspamd_printf_gstring(res, "%f", elt->p.lim); + } + } + else { + op_str = rspamd_expr_op_to_str(elt->p.op.op); + g_string_append(res, op_str); + + if (n->children) { + LL_COUNT(n->children, cur, cnt); + + if (cnt > 2) { + /* Print n-ary of the operator */ + g_string_append_printf(res, "(%d)", cnt); + } + } + } + + g_string_append_c(res, ' '); + + return FALSE; +} + +GString * +rspamd_expression_tostring(struct rspamd_expression *expr) +{ + GString *res; + + g_assert(expr != NULL); + + res = g_string_new(NULL); + g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1, + rspamd_ast_string_traverse, res); + + /* Last space */ + if (res->len > 0) { + g_string_erase(res, res->len - 1, 1); + } + + return res; +} + +struct atom_foreach_cbdata { + rspamd_expression_atom_foreach_cb cb; + gpointer cbdata; +}; + +static gboolean +rspamd_ast_atom_traverse(GNode *n, gpointer d) +{ + struct atom_foreach_cbdata *data = d; + struct rspamd_expression_elt *elt = n->data; + rspamd_ftok_t tok; + + if (elt->type == ELT_ATOM) { + tok.begin = elt->p.atom->str; + tok.len = elt->p.atom->len; + + data->cb(&tok, data->cbdata); + } + + return FALSE; +} + +void rspamd_expression_atom_foreach(struct rspamd_expression *expr, + rspamd_expression_atom_foreach_cb cb, gpointer cbdata) +{ + struct atom_foreach_cbdata data; + + g_assert(expr != NULL); + + data.cb = cb; + data.cbdata = cbdata; + g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1, + rspamd_ast_atom_traverse, &data); +} + +gboolean +rspamd_expression_node_is_op(GNode *node, enum rspamd_expression_op op) +{ + struct rspamd_expression_elt *elt; + + g_assert(node != NULL); + + elt = node->data; + + if (elt->type == ELT_OP && elt->p.op.op == op) { + return TRUE; + } + + return FALSE; +} diff --git a/src/libutil/expression.h b/src/libutil/expression.h new file mode 100644 index 0000000..ea4e102 --- /dev/null +++ b/src/libutil/expression.h @@ -0,0 +1,173 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_EXPRESSION_H_ +#define SRC_LIBUTIL_EXPRESSION_H_ + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define RSPAMD_EXPRESSION_MAX_PRIORITY 1024 + +#define RSPAMD_EXPRESSION_FLAG_NOOPT (1 << 0) + +enum rspamd_expression_op { + OP_INVALID = 0, + OP_PLUS, /* + */ + OP_MULT, /* * */ + OP_MINUS, /* - */ + OP_DIVIDE, /* / */ + OP_OR, /* || or | */ + OP_AND, /* && or & */ + OP_NOT, /* ! */ + OP_LT, /* < */ + OP_GT, /* > */ + OP_LE, /* <= */ + OP_GE, /* >= */ + OP_EQ, /* == */ + OP_NE, /* != */ + OP_OBRACE, /* ( */ + OP_CBRACE /* ) */ +}; + +typedef struct rspamd_expression_atom_s { + /* Parent node */ + GNode *parent; + /* Opaque userdata */ + gpointer data; + /* String representation of atom */ + const gchar *str; + /* Length of the string representation of atom */ + guint len; + /* Relative priority */ + gint priority; + guint hits; + struct rspamd_counter_data exec_time; +} rspamd_expression_atom_t; + +typedef gdouble (*rspamd_expression_process_cb)(gpointer runtime_data, + rspamd_expression_atom_t *atom); + +struct rspamd_atom_subr { + /* Parses atom from string and returns atom structure */ + rspamd_expression_atom_t *(*parse)(const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err); + + /* Process atom via the opaque pointer (e.g. struct rspamd_task *) */ + rspamd_expression_process_cb process; + + /* Calculates the relative priority of the expression */ + gint (*priority)(rspamd_expression_atom_t *atom); + + void (*destroy)(rspamd_expression_atom_t *atom); +}; + +/* Opaque structure */ +struct rspamd_expression; + +/** + * Parse symbolic expression and create the expression using the specified subroutines for atoms processing + * @param line line to parse + * @param len length of the line (if 0 then line should be NULL terminated) + * @param subr subroutines for atoms parsing + * @param subr_data opaque dat pointer + * @param pool pool to use for memory allocations + * @param err error pointer + * @param target the target expression + * @return TRUE if an expression have been parsed + */ +gboolean rspamd_parse_expression(const gchar *line, gsize len, + const struct rspamd_atom_subr *subr, gpointer subr_data, + rspamd_mempool_t *pool, GError **err, + struct rspamd_expression **target); + +/** + * Process the expression and return its value using atom 'process' functions with the specified data pointer + * @param expr expression to process + * @param data opaque data pointer for all the atoms + * @return the value of expression + */ +gdouble rspamd_process_expression(struct rspamd_expression *expr, + gint flags, + gpointer runtime_ud); + +/** + * Process the expression and return its value using atom 'process' functions with the specified data pointer. + * This function also accepts `track` argument where it writes matched atoms (those whose value is more than 0) + * @param expr expression to process + * @param data opaque data pointer for all the atoms + * @param track pointer array to atoms tracking + * @return the value of expression + */ +gdouble rspamd_process_expression_track(struct rspamd_expression *expr, + gint flags, + gpointer runtime_ud, + GPtrArray **track); + +/** + * Process the expression with the custom processor + * @param expr + * @param cb + * @param process_data + * @return + */ +gdouble rspamd_process_expression_closure(struct rspamd_expression *expr, + rspamd_expression_process_cb cb, + gint flags, + gpointer runtime_ud, + GPtrArray **track); + +/** + * Shows string representation of an expression + * @param expr expression to show + * @return freshly allocated string with expression + */ +GString *rspamd_expression_tostring(struct rspamd_expression *expr); + +/** + * Callback that is called on @see rspamd_expression_atom_foreach, atom is ephemeral + * and should not be modified within callback + */ +typedef void (*rspamd_expression_atom_foreach_cb)(const rspamd_ftok_t *atom, + gpointer ud); + +/** + * Traverse over all atoms in the expression + * @param expr expression + * @param cb callback to be called + * @param ud opaque data passed to `cb` + */ +void rspamd_expression_atom_foreach(struct rspamd_expression *expr, + rspamd_expression_atom_foreach_cb cb, gpointer cbdata); + +/** + * Checks if a specified node in AST is the specified operation + * @param node AST node packed in GNode container + * @param op operation to check + * @return TRUE if node is operation node and is exactly the specified option + */ +gboolean rspamd_expression_node_is_op(GNode *node, enum rspamd_expression_op op); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_EXPRESSION_H_ */ diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c new file mode 100644 index 0000000..a921f32 --- /dev/null +++ b/src/libutil/fstring.c @@ -0,0 +1,482 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "fstring.h" +#include "str_util.h" +#include "contrib/fastutf8/fastutf8.h" +#include "contrib/mumhash/mum.h" + + +#ifdef WITH_JEMALLOC +#include <jemalloc/jemalloc.h> +#if (JEMALLOC_VERSION_MAJOR == 3 && JEMALLOC_VERSION_MINOR >= 6) || (JEMALLOC_VERSION_MAJOR > 3) +#define HAVE_MALLOC_SIZE 1 +#define sys_alloc_size(sz) nallocx(sz, 0) +#endif +#elif defined(__APPLE__) +#include <malloc/malloc.h> +#define HAVE_MALLOC_SIZE 1 +#define sys_alloc_size(sz) malloc_good_size(sz) +#endif + +static const gsize default_initial_size = 16; + +#define fstravail(s) ((s)->allocated - (s)->len) + +rspamd_fstring_t * +rspamd_fstring_new(void) +{ + rspamd_fstring_t *s; + + if ((s = malloc(default_initial_size + sizeof(*s))) == NULL) { + g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes", + G_STRLOC, default_initial_size + sizeof(*s)); + + return NULL; + } + + s->len = 0; + s->allocated = default_initial_size; + + return s; +} + +rspamd_fstring_t * +rspamd_fstring_sized_new(gsize initial_size) +{ + rspamd_fstring_t *s; + gsize real_size = MAX(default_initial_size, initial_size); + + if ((s = malloc(real_size + sizeof(*s))) == NULL) { + g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes", + G_STRLOC, real_size + sizeof(*s)); + + return NULL; + } + s->len = 0; + s->allocated = real_size; + + return s; +} + +rspamd_fstring_t * +rspamd_fstring_new_init(const gchar *init, gsize len) +{ + rspamd_fstring_t *s; + gsize real_size = MAX(default_initial_size, len); + + if ((s = malloc(real_size + sizeof(*s))) == NULL) { + g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes", + G_STRLOC, real_size + sizeof(*s)); + + abort(); + } + + s->len = len; + s->allocated = real_size; + memcpy(s->str, init, len); + + return s; +} + +rspamd_fstring_t * +rspamd_fstring_assign(rspamd_fstring_t *str, const gchar *init, gsize len) +{ + gsize avail; + + if (str == NULL) { + return rspamd_fstring_new_init(init, len); + } + + avail = fstravail(str); + + if (avail < len) { + str = rspamd_fstring_grow(str, len); + } + + if (len > 0) { + memcpy(str->str, init, len); + } + + str->len = len; + + return str; +} + +void rspamd_fstring_free(rspamd_fstring_t *str) +{ + free(str); +} + +inline gsize +rspamd_fstring_suggest_size(gsize len, gsize allocated, gsize needed_len) +{ + gsize newlen, optlen = 0; + + if (allocated < 4096) { + newlen = MAX(len + needed_len, allocated * 2); + } + else { + newlen = MAX(len + needed_len, 1 + allocated * 3 / 2); + } + +#ifdef HAVE_MALLOC_SIZE + optlen = sys_alloc_size(newlen + sizeof(rspamd_fstring_t)); +#endif + + return MAX(newlen, optlen); +} + +rspamd_fstring_t * +rspamd_fstring_grow(rspamd_fstring_t *str, gsize needed_len) +{ + gsize newlen; + gpointer nptr; + + newlen = rspamd_fstring_suggest_size(str->len, str->allocated, needed_len); + + nptr = realloc(str, newlen + sizeof(*str)); + + if (nptr == NULL) { + /* Avoid memory leak */ + free(str); + g_error("%s: failed to re-allocate %" G_GSIZE_FORMAT " bytes", + G_STRLOC, newlen + sizeof(*str)); + abort(); + } + + str = nptr; + str->allocated = newlen; + + return str; +} + +rspamd_fstring_t * +rspamd_fstring_append(rspamd_fstring_t *str, const char *in, gsize len) +{ + if (str == NULL) { + str = rspamd_fstring_new_init(in, len); + } + else { + gsize avail = fstravail(str); + + if (avail < len) { + str = rspamd_fstring_grow(str, len); + } + + memcpy(str->str + str->len, in, len); + str->len += len; + } + + return str; +} + +rspamd_fstring_t * +rspamd_fstring_append_chars(rspamd_fstring_t *str, + char c, gsize len) +{ + if (str == NULL) { + str = rspamd_fstring_sized_new(len); + + memset(str->str + str->len, c, len); + str->len += len; + } + else { + gsize avail = fstravail(str); + + if (avail < len) { + str = rspamd_fstring_grow(str, len); + } + + memset(str->str + str->len, c, len); + str->len += len; + } + + return str; +} + +void rspamd_fstring_erase(rspamd_fstring_t *str, gsize pos, gsize len) +{ + if (pos < str->len) { + if (pos + len > str->len) { + len = str->len - pos; + } + + if (len == str->len - pos) { + /* Fast path */ + str->len = pos; + } + else { + memmove(str->str + pos, str->str + pos + len, str->len - pos); + str->len -= pos; + } + } + else { + /* Do nothing */ + } +} + +/* Compat code */ +static guint64 +fstrhash_c(guint64 c, guint64 hval) +{ + return mum_hash_step(hval, c); +} + + +/* + * Return hash value for a string + */ +guint32 +rspamd_fstrhash_lc(const rspamd_ftok_t *str, gboolean is_utf) +{ + gsize i; + guint64 hval; + const gchar *p, *end = NULL; + gunichar uc; + + if (str == NULL) { + return 0; + } + + p = str->begin; + hval = str->len; + end = p + str->len; + + if (is_utf) { + if (rspamd_fast_utf8_validate(p, str->len) != 0) { + return rspamd_fstrhash_lc(str, FALSE); + } + while (p < end) { + uc = g_unichar_tolower(g_utf8_get_char(p)); + hval = fstrhash_c(uc, hval); + p = g_utf8_next_char(p); + } + } + else { + gsize large_steps = str->len / sizeof(guint64); + for (i = 0; i < large_steps; i++, p += sizeof(guint64)) { + /* Copy to the uint64 lowercasing each byte */ + union { + char c[sizeof(guint64)]; + guint64 iu64; + } t; + for (int j = 0; j < sizeof(guint64); j++) { + t.c[j] = g_ascii_tolower(p[j]); + } + hval = fstrhash_c(t.iu64, hval); + } + + gsize remain = str->len % sizeof(guint64); + for (i = 0; i < remain; i++, p++) { + hval = fstrhash_c(g_ascii_tolower(*p), hval); + } + } + + return hval; +} + +gboolean +rspamd_fstring_equal(const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2) +{ + g_assert(s1 != NULL && s2 != NULL); + + if (s1->len == s2->len) { + return (memcmp(s1->str, s2->str, s1->len) == 0); + } + + return FALSE; +} + +gint rspamd_fstring_casecmp(const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2) +{ + gint ret = 0; + + g_assert(s1 != NULL && s2 != NULL); + + if (s1->len == s2->len) { + ret = rspamd_lc_cmp(s1->str, s2->str, s1->len); + } + else { + ret = s1->len - s2->len; + } + + return ret; +} + +gint rspamd_fstring_cmp(const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2) +{ + g_assert(s1 != NULL && s2 != NULL); + + if (s1->len == s2->len) { + return memcmp(s1->str, s2->str, s1->len); + } + + return s1->len - s2->len; +} + +gint rspamd_ftok_casecmp(const rspamd_ftok_t *s1, + const rspamd_ftok_t *s2) +{ + gint ret = 0; + + g_assert(s1 != NULL && s2 != NULL); + + if (s1->len == s2->len) { + ret = rspamd_lc_cmp(s1->begin, s2->begin, s1->len); + } + else { + ret = s1->len - s2->len; + } + + return ret; +} + +gint rspamd_ftok_cmp(const rspamd_ftok_t *s1, + const rspamd_ftok_t *s2) +{ + g_assert(s1 != NULL && s2 != NULL); + + if (s1->len == s2->len) { + return memcmp(s1->begin, s2->begin, s1->len); + } + + return s1->len - s2->len; +} + +gboolean +rspamd_ftok_starts_with(const rspamd_ftok_t *s1, + const rspamd_ftok_t *s2) +{ + g_assert(s1 != NULL && s2 != NULL); + + if (s1->len >= s2->len) { + return !!(memcmp(s1->begin, s2->begin, s2->len) == 0); + } + + return FALSE; +} + +void rspamd_fstring_mapped_ftok_free(gpointer p) +{ + rspamd_ftok_t *tok = p; + rspamd_fstring_t *storage; + + storage = (rspamd_fstring_t *) (tok->begin - 2 * sizeof(gsize)); + rspamd_fstring_free(storage); + g_free(tok); +} + +rspamd_ftok_t * +rspamd_ftok_map(const rspamd_fstring_t *s) +{ + rspamd_ftok_t *tok; + + g_assert(s != NULL); + + tok = g_malloc(sizeof(*tok)); + tok->begin = s->str; + tok->len = s->len; + + return tok; +} + +char * +rspamd_fstring_cstr(const rspamd_fstring_t *s) +{ + char *result; + + if (s == NULL) { + return NULL; + } + + result = g_malloc(s->len + 1); + memcpy(result, s->str, s->len); + result[s->len] = '\0'; + + return result; +} + +char * +rspamd_ftok_cstr(const rspamd_ftok_t *s) +{ + char *result; + + if (s == NULL) { + return NULL; + } + + result = g_malloc(s->len + 1); + memcpy(result, s->begin, s->len); + result[s->len] = '\0'; + + return result; +} + +gboolean +rspamd_ftok_cstr_equal(const rspamd_ftok_t *s, const gchar *pat, + gboolean icase) +{ + gsize slen; + rspamd_ftok_t srch; + + g_assert(s != NULL); + g_assert(pat != NULL); + + slen = strlen(pat); + srch.begin = pat; + srch.len = slen; + + if (icase) { + return (rspamd_ftok_casecmp(s, &srch) == 0); + } + + return (rspamd_ftok_cmp(s, &srch) == 0); +} + +gchar * +rspamd_ftokdup(const rspamd_ftok_t *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = g_malloc(src->len + 1); + memcpy(newstr, src->begin, src->len); + newstr[src->len] = '\0'; + + return newstr; +} + +gchar * +rspamd_fstringdup(const rspamd_fstring_t *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = g_malloc(src->len + 1); + memcpy(newstr, src->str, src->len); + newstr[src->len] = '\0'; + + return newstr; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h new file mode 100644 index 0000000..9eacf21 --- /dev/null +++ b/src/libutil/fstring.h @@ -0,0 +1,231 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef FSTRING_H +#define FSTRING_H + +#include "config.h" +#include "mem_pool.h" +#include <unicode/uchar.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Fixed strings library + * These strings are NOT null-terminated for speed + */ + +typedef struct f_str_s { + gsize len; + gsize allocated; + gchar str[]; +} rspamd_fstring_t; + +#define RSPAMD_FSTRING_DATA(s) ((s)->str) +#define RSPAMD_FSTRING_LEN(s) ((s)->len) +#define RSPAMD_FSTRING_LIT(lit) rspamd_fstring_new_init((lit), sizeof(lit) - 1) + +typedef struct f_str_tok { + gsize len; + const gchar *begin; +} rspamd_ftok_t; + +typedef struct f_str_unicode_tok { + gsize len; /* in UChar32 */ + const UChar32 *begin; +} rspamd_ftok_unicode_t; + +/** + * Create new fixed length string + */ +rspamd_fstring_t *rspamd_fstring_new(void) + G_GNUC_WARN_UNUSED_RESULT; + +/** + * Create new fixed length string with preallocated size + */ +rspamd_fstring_t *rspamd_fstring_sized_new(gsize initial_size) + G_GNUC_WARN_UNUSED_RESULT; + +/** + * Create new fixed length string and initialize it with the initial data + */ +rspamd_fstring_t *rspamd_fstring_new_init(const gchar *init, gsize len) + G_GNUC_WARN_UNUSED_RESULT; + +/** + * Assign new value to fixed string + */ +rspamd_fstring_t *rspamd_fstring_assign(rspamd_fstring_t *str, + const gchar *init, gsize len) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Free fixed length string + */ +void rspamd_fstring_free(rspamd_fstring_t *str); + +/** + * Append data to a fixed length string + */ +rspamd_fstring_t *rspamd_fstring_append(rspamd_fstring_t *str, + const char *in, gsize len) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Append `len` repeated chars `c` to string `str` + */ +rspamd_fstring_t *rspamd_fstring_append_chars(rspamd_fstring_t *str, + char c, gsize len) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Erase `len` characters at position `pos` + */ +void rspamd_fstring_erase(rspamd_fstring_t *str, gsize pos, gsize len); + +#define rspamd_fstring_clear(s) rspamd_fstring_erase(s, 0, s->len) + +/** + * Convert fixed string to a zero terminated string. This string must be + * freed by a caller + */ +char *rspamd_fstring_cstr(const rspamd_fstring_t *str) + G_GNUC_WARN_UNUSED_RESULT; + +/** + * Convert fixed string usign ftok_t to a zero terminated string. This string must be + * freed by a caller + */ +char *rspamd_ftok_cstr(const rspamd_ftok_t *str) + G_GNUC_WARN_UNUSED_RESULT; + +/* + * Return fast hash value for fixed string converted to lowercase + */ +guint32 rspamd_fstrhash_lc(const rspamd_ftok_t *str, gboolean is_utf); + +/** + * Return true if two strings are equal + */ +gboolean rspamd_fstring_equal(const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2); + +/** + * Compare two fixed strings ignoring case + */ +gint rspamd_fstring_casecmp(const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2); + +/** + * Compare two fixed strings + */ +gint rspamd_fstring_cmp(const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2); + +/** + * Compare two fixed tokens ignoring case + */ +gint rspamd_ftok_casecmp(const rspamd_ftok_t *s1, + const rspamd_ftok_t *s2); + +/** + * Compare two fixed tokens + */ +gint rspamd_ftok_cmp(const rspamd_ftok_t *s1, + const rspamd_ftok_t *s2); + +/** + * Returns true if `s1` starts with `s2` + * @param s1 + * @param s2 + * @return + */ +gboolean rspamd_ftok_starts_with(const rspamd_ftok_t *s1, + const rspamd_ftok_t *s2); + +/** + * Return TRUE if ftok is equal to specified C string + */ +gboolean rspamd_ftok_cstr_equal(const rspamd_ftok_t *s, + const gchar *pat, gboolean icase); + +/** + * Free fstring_t that is mapped to ftok_t + * + * | len | allocated | <data> -- fstring_t + * <begin> -- tok + * + * tok is expected to be allocated with g_malloc + */ +void rspamd_fstring_mapped_ftok_free(gpointer p); + +/** + * Map token to a specified string. Token must be freed using g_free + */ +rspamd_ftok_t *rspamd_ftok_map(const rspamd_fstring_t *s); + +/** + * Suggest suitable size to grow fstring + * @param len + * @param allocated + * @param needed_len + * @return + */ +gsize rspamd_fstring_suggest_size(gsize len, gsize allocated, gsize needed_len); + +/** + * Grow the specified fixed string + * @param str + * @param needed_len + * @return + */ +rspamd_fstring_t *rspamd_fstring_grow(rspamd_fstring_t *str, + gsize needed_len) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Copies ftok to zero terminated string (must be freed using g_free) + * @param src + * @return + */ +gchar *rspamd_ftokdup(const rspamd_ftok_t *src) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Copies fstring to zero terminated string (must be freed using g_free) + * @param src + * @return + */ +gchar *rspamd_fstringdup(const rspamd_fstring_t *src) G_GNUC_WARN_UNUSED_RESULT; + +#define RSPAMD_FTOK_ASSIGN(t, lit) \ + do { \ + (t)->begin = (lit); \ + (t)->len = sizeof(lit) - 1; \ + } while (0) +#define RSPAMD_FTOK_FROM_STR(t, str) \ + do { \ + if (G_LIKELY(str)) { \ + (t)->begin = (const char *) (str); \ + (t)->len = strlen(str); \ + } \ + else { \ + (t)->begin = NULL; \ + (t)->len = 0; \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/libutil/hash.c b/src/libutil/hash.c new file mode 100644 index 0000000..d2af88c --- /dev/null +++ b/src/libutil/hash.c @@ -0,0 +1,716 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "hash.h" +#include "util.h" +#include "khash.h" + +/** + * LRU hashing + */ + +static const guint log_base = 10; +static const guint eviction_candidates = 16; +static const gdouble lfu_base_value = 5.0; + +struct rspamd_lru_volatile_element_s; + +struct rspamd_lru_hash_s { + guint maxsize; + guint eviction_min_prio; + guint eviction_used; + struct rspamd_lru_element_s **eviction_pool; + + GDestroyNotify value_destroy; + GDestroyNotify key_destroy; + GHashFunc hfunc; + GEqualFunc eqfunc; + + khint_t n_buckets, size, n_occupied, upper_bound; + khint32_t *flags; + gpointer *keys; + struct rspamd_lru_volatile_element_s *vals; +}; + +enum rspamd_lru_element_flags { + RSPAMD_LRU_ELEMENT_NORMAL = 0, + RSPAMD_LRU_ELEMENT_VOLATILE = (1 << 0), + RSPAMD_LRU_ELEMENT_IMMORTAL = (1 << 1), +}; + +struct rspamd_lru_element_s { + guint16 last; + guint8 lg_usages; + guint8 eviction_pos; + guint8 flags; + gpointer data; +}; + +struct rspamd_lru_volatile_element_s { + struct rspamd_lru_element_s e; + time_t creation_time; + time_t ttl; +}; +typedef struct rspamd_lru_volatile_element_s rspamd_lru_vol_element_t; + +#define TIME_TO_TS(t) ((guint16) (((t) / 60) & 0xFFFFU)) + +static rspamd_lru_vol_element_t * +rspamd_lru_hash_get(const rspamd_lru_hash_t *h, gconstpointer key) +{ + if (h->n_buckets) { + khint_t k, i, last, mask, step = 0; + mask = h->n_buckets - 1; + k = h->hfunc(key); + i = k & mask; + last = i; + + while (!__ac_isempty(h->flags, i) && + (__ac_isdel(h->flags, i) || !h->eqfunc(h->keys[i], key))) { + i = (i + (++step)) & mask; + if (i == last) { + return NULL; + } + } + + return __ac_iseither(h->flags, i) ? NULL : &h->vals[i]; + } + + return NULL; +} + +static int +rspamd_lru_hash_resize(rspamd_lru_hash_t *h, + khint_t new_n_buckets) +{ + /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ + khint32_t *new_flags = 0; + khint_t j = 1; + + kroundup32(new_n_buckets); + if (new_n_buckets < 4) { + new_n_buckets = 4; + } + + if (h->size >= (khint_t) (new_n_buckets * __ac_HASH_UPPER + 0.5)) { + j = 0; + /* requested size is too small */ + } + else { + /* hash table size to be changed (shrink or expand); rehash */ + new_flags = (khint32_t *) g_malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); + + if (!new_flags) { + return -1; + } + + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); + if (h->n_buckets < new_n_buckets) { + /* expand */ + gpointer *new_keys = (gpointer *) g_realloc((void *) h->keys, + new_n_buckets * sizeof(gpointer)); + + if (!new_keys) { + g_free(new_flags); + return -1; + } + + h->keys = new_keys; + rspamd_lru_vol_element_t *new_vals = + (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals, + new_n_buckets * sizeof(rspamd_lru_vol_element_t)); + if (!new_vals) { + g_free(new_flags); + return -1; + } + + h->vals = new_vals; + } + /* Shrink */ + } + + if (j) { + /* rehashing is needed */ + h->eviction_used = 0; + + for (j = 0; j != h->n_buckets; ++j) { + if (__ac_iseither(h->flags, j) == 0) { + gpointer key = h->keys[j]; + rspamd_lru_vol_element_t val; + khint_t new_mask; + new_mask = new_n_buckets - 1; + val = h->vals[j]; + val.e.eviction_pos = (guint8) -1; + __ac_set_isdel_true(h->flags, j); + + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ + khint_t k, i, step = 0; + k = h->hfunc(key); + i = k & new_mask; + + while (!__ac_isempty(new_flags, i)) { + i = (i + (++step)) & new_mask; + } + + __ac_set_isempty_false(new_flags, i); + + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { + /* kick out the existing element */ + { + gpointer tmp = h->keys[i]; + h->keys[i] = key; + key = tmp; + } + { + rspamd_lru_vol_element_t tmp = h->vals[i]; + h->vals[i] = val; + val = tmp; + val.e.eviction_pos = (guint8) -1; + } + __ac_set_isdel_true(h->flags, i); + /* mark it as deleted in the old hash table */ + } + else { /* write the element and jump out of the loop */ + h->keys[i] = key; + h->vals[i] = val; + break; + } + } + } + } + + if (h->n_buckets > new_n_buckets) { + /* shrink the hash table */ + h->keys = (gpointer *) g_realloc((void *) h->keys, + new_n_buckets * sizeof(gpointer)); + h->vals = (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals, + new_n_buckets * sizeof(rspamd_lru_vol_element_t)); + } + + g_free(h->flags); /* free the working space */ + h->flags = new_flags; + h->n_buckets = new_n_buckets; + h->n_occupied = h->size; + h->upper_bound = (khint_t) (h->n_buckets * __ac_HASH_UPPER + 0.5); + } + + return 0; +} + +static rspamd_lru_vol_element_t * +rspamd_lru_hash_put(rspamd_lru_hash_t *h, gpointer key, int *ret) +{ + khint_t x; + + if (h->n_occupied >= h->upper_bound) { + /* update the hash table */ + if (h->n_buckets > (h->size << 1)) { + if (rspamd_lru_hash_resize(h, h->n_buckets - 1) < 0) { + /* clear "deleted" elements */ + *ret = -1; + return NULL; + } + } + else if (rspamd_lru_hash_resize(h, h->n_buckets + 1) < 0) { + /* expand the hash table */ + *ret = -1; + return NULL; + } + } + + khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; + x = site = h->n_buckets; + k = h->hfunc(key); + i = k & mask; + + if (__ac_isempty(h->flags, i)) { + x = i; /* for speed up */ + } + else { + last = i; + while (!__ac_isempty(h->flags, i) && + (__ac_isdel(h->flags, i) || + !h->eqfunc(h->keys[i], key))) { + if (__ac_isdel(h->flags, i)) { + site = i; + } + + i = (i + (++step)) & mask; + + if (i == last) { + x = site; + break; + } + } + + if (x == h->n_buckets) { + if (__ac_isempty(h->flags, i) && site != h->n_buckets) { + x = site; + } + else { + x = i; + } + } + } + + if (__ac_isempty(h->flags, x)) { /* not present at all */ + h->keys[x] = key; + __ac_set_isboth_false(h->flags, x); + ++h->size; + ++h->n_occupied; + *ret = 1; + } + else if (__ac_isdel(h->flags, x)) { /* deleted */ + h->keys[x] = key; + __ac_set_isboth_false(h->flags, x); + ++h->size; + *ret = 2; + } + else { + /* Don't touch h->keys[x] if present and not deleted */ + *ret = 0; + } + + return &h->vals[x]; +} + +static void +rspamd_lru_hash_del(rspamd_lru_hash_t *h, rspamd_lru_vol_element_t *elt) +{ + khint_t x = elt - h->vals; + + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { + __ac_set_isdel_true(h->flags, x); + --h->size; + + if (h->key_destroy) { + h->key_destroy(h->keys[x]); + } + + if (h->value_destroy) { + h->value_destroy(elt->e.data); + } + } +} + +static void +rspamd_lru_hash_remove_evicted(rspamd_lru_hash_t *hash, + rspamd_lru_element_t *elt) +{ + guint i; + rspamd_lru_element_t *cur; + + g_assert(hash->eviction_used > 0); + g_assert(elt->eviction_pos < hash->eviction_used); + + memmove(&hash->eviction_pool[elt->eviction_pos], + &hash->eviction_pool[elt->eviction_pos + 1], + sizeof(rspamd_lru_element_t *) * + (eviction_candidates - elt->eviction_pos - 1)); + + hash->eviction_used--; + + if (hash->eviction_used > 0) { + /* We also need to update min_prio and renumber eviction list */ + hash->eviction_min_prio = G_MAXUINT; + + for (i = 0; i < hash->eviction_used; i++) { + cur = hash->eviction_pool[i]; + + if (hash->eviction_min_prio > cur->lg_usages) { + hash->eviction_min_prio = cur->lg_usages; + } + + cur->eviction_pos = i; + } + } + else { + hash->eviction_min_prio = G_MAXUINT; + } +} + +static void +rspamd_lru_hash_update_counter(rspamd_lru_element_t *elt) +{ + guint8 counter = elt->lg_usages; + + if (counter != 255) { + double r, baseval, p; + + r = rspamd_random_double_fast(); + baseval = counter - lfu_base_value; + + if (baseval < 0) { + baseval = 0; + } + + p = 1.0 / (baseval * log_base + 1); + + if (r < p) { + elt->lg_usages++; + } + } +} + +static inline void +rspamd_lru_hash_decrease_counter(rspamd_lru_element_t *elt, time_t now) +{ + if (now - elt->last > lfu_base_value) { + /* Penalise counters for outdated records */ + elt->lg_usages /= 2; + } +} + +static gboolean +rspamd_lru_hash_maybe_evict(rspamd_lru_hash_t *hash, + rspamd_lru_element_t *elt) +{ + guint i; + rspamd_lru_element_t *cur; + + if (elt->eviction_pos == (guint8) -1) { + if (hash->eviction_used < eviction_candidates) { + /* There are free places in eviction pool */ + hash->eviction_pool[hash->eviction_used] = elt; + elt->eviction_pos = hash->eviction_used; + hash->eviction_used++; + + if (hash->eviction_min_prio > elt->lg_usages) { + hash->eviction_min_prio = elt->lg_usages; + } + + return TRUE; + } + else { + /* Find any candidate that has higher usage count */ + for (i = 0; i < hash->eviction_used; i++) { + cur = hash->eviction_pool[i]; + + if (cur->lg_usages > elt->lg_usages) { + cur->eviction_pos = -1; + elt->eviction_pos = i; + hash->eviction_pool[i] = elt; + + if (hash->eviction_min_prio > elt->lg_usages) { + hash->eviction_min_prio = elt->lg_usages; + } + + return TRUE; + } + } + } + } + else { + /* Already in the eviction list */ + return TRUE; + } + + return FALSE; +} + +static void +rspamd_lru_hash_remove_node(rspamd_lru_hash_t *hash, rspamd_lru_element_t *elt) +{ + if (elt->eviction_pos != (guint8) -1) { + rspamd_lru_hash_remove_evicted(hash, elt); + } + + rspamd_lru_hash_del(hash, (rspamd_lru_vol_element_t *) elt); +} + +static void +rspamd_lru_hash_evict(rspamd_lru_hash_t *hash, time_t now) +{ + double r; + guint i; + rspamd_lru_element_t *elt = NULL; + guint nexpired = 0; + + /* + * We either evict one node from the eviction list + * or, at some probability scan all table and update eviction + * list first + */ + r = rspamd_random_double_fast(); + + if (r < ((double) eviction_candidates) / hash->maxsize) { + /* Full hash scan */ + rspamd_lru_vol_element_t *cur; + rspamd_lru_element_t *selected = NULL; + + kh_foreach_value_ptr(hash, cur, { + rspamd_lru_element_t *node = &cur->e; + + if (node->flags & RSPAMD_LRU_ELEMENT_IMMORTAL) { + continue; + } + + if (node->flags & RSPAMD_LRU_ELEMENT_VOLATILE) { + /* If element is expired, just remove it */ + if (now - cur->creation_time > cur->ttl) { + rspamd_lru_hash_remove_node(hash, node); + + nexpired++; + continue; + } + } + else { + rspamd_lru_hash_decrease_counter(node, now); + + if (rspamd_lru_hash_maybe_evict(hash, node)) { + if (selected && node->lg_usages < selected->lg_usages) { + selected = node; + } + else if (selected == NULL) { + selected = node; + } + } + } + }); + + if (selected) { + elt = selected; + } + } + else { + /* Fast random eviction */ + for (i = 0; i < hash->eviction_used; i++) { + elt = hash->eviction_pool[i]; + + if (elt->lg_usages <= hash->eviction_min_prio) { + break; + } + } + } + + /* Evict if nothing else has been cleaned */ + if (elt && nexpired == 0) { + rspamd_lru_hash_remove_node(hash, elt); + } +} + +rspamd_lru_hash_t * +rspamd_lru_hash_new_full(gint maxsize, + GDestroyNotify key_destroy, + GDestroyNotify value_destroy, + GHashFunc hf, + GEqualFunc cmpf) +{ + rspamd_lru_hash_t *h; + + if (maxsize < eviction_candidates * 2) { + maxsize = eviction_candidates * 2; + } + + h = g_malloc0(sizeof(rspamd_lru_hash_t)); + h->hfunc = hf; + h->eqfunc = cmpf; + h->eviction_pool = g_malloc0(sizeof(rspamd_lru_element_t *) * + eviction_candidates); + h->maxsize = maxsize; + h->value_destroy = value_destroy; + h->key_destroy = key_destroy; + h->eviction_min_prio = G_MAXUINT; + + /* Preallocate some elements */ + rspamd_lru_hash_resize(h, MIN(h->maxsize, 128)); + + return h; +} + +rspamd_lru_hash_t * +rspamd_lru_hash_new(gint maxsize, + GDestroyNotify key_destroy, + GDestroyNotify value_destroy) +{ + return rspamd_lru_hash_new_full(maxsize, + key_destroy, value_destroy, + rspamd_strcase_hash, rspamd_strcase_equal); +} + +gpointer +rspamd_lru_hash_lookup(rspamd_lru_hash_t *hash, gconstpointer key, time_t now) +{ + rspamd_lru_element_t *res; + rspamd_lru_vol_element_t *vnode; + + vnode = rspamd_lru_hash_get(hash, (gpointer) key); + if (vnode != NULL) { + res = &vnode->e; + + if (res->flags & RSPAMD_LRU_ELEMENT_VOLATILE) { + /* Check ttl */ + + if (now - vnode->creation_time > vnode->ttl) { + rspamd_lru_hash_remove_node(hash, res); + + return NULL; + } + } + + now = TIME_TO_TS(now); + res->last = MAX(res->last, now); + rspamd_lru_hash_update_counter(res); + rspamd_lru_hash_maybe_evict(hash, res); + + return res->data; + } + + return NULL; +} + +gboolean +rspamd_lru_hash_remove(rspamd_lru_hash_t *hash, + gconstpointer key) +{ + rspamd_lru_vol_element_t *res; + + res = rspamd_lru_hash_get(hash, key); + + if (res != NULL) { + rspamd_lru_hash_remove_node(hash, &res->e); + + return TRUE; + } + + return FALSE; +} + +void rspamd_lru_hash_insert(rspamd_lru_hash_t *hash, + gpointer key, + gpointer value, + time_t now, + guint ttl) +{ + rspamd_lru_element_t *node; + rspamd_lru_vol_element_t *vnode; + gint ret; + + vnode = rspamd_lru_hash_put(hash, key, &ret); + node = &vnode->e; + + if (ret == 0) { + /* Existing element, be careful about destructors */ + if (hash->value_destroy) { + /* Remove old data */ + hash->value_destroy(vnode->e.data); + } + + if (hash->key_destroy) { + /* Here are dragons! */ + goffset off = vnode - hash->vals; + + hash->key_destroy(hash->keys[off]); + hash->keys[off] = key; + } + } + + + if (ttl == 0) { + node->flags = RSPAMD_LRU_ELEMENT_NORMAL; + } + else { + vnode->creation_time = now; + vnode->ttl = ttl; + node->flags = RSPAMD_LRU_ELEMENT_VOLATILE; + } + + node->data = value; + node->lg_usages = (guint8) lfu_base_value; + node->last = TIME_TO_TS(now); + node->eviction_pos = (guint8) -1; + + if (ret != 0) { + /* Also need to check maxsize */ + if (kh_size(hash) >= hash->maxsize) { + node->flags |= RSPAMD_LRU_ELEMENT_IMMORTAL; + rspamd_lru_hash_evict(hash, now); + node->flags &= ~RSPAMD_LRU_ELEMENT_IMMORTAL; + } + } + + rspamd_lru_hash_maybe_evict(hash, node); +} + +void rspamd_lru_hash_destroy(rspamd_lru_hash_t *hash) +{ + if (hash) { + if (hash->key_destroy || hash->value_destroy) { + gpointer k; + rspamd_lru_vol_element_t cur; + + kh_foreach(hash, k, cur, { + if (hash->key_destroy) { + hash->key_destroy(k); + } + if (hash->value_destroy) { + hash->value_destroy(cur.e.data); + } + }); + } + + g_free(hash->keys); + g_free(hash->vals); + g_free(hash->flags); + g_free(hash->eviction_pool); + g_free(hash); + } +} + +gpointer +rspamd_lru_hash_element_data(rspamd_lru_element_t *elt) +{ + return elt->data; +} + +int rspamd_lru_hash_foreach(rspamd_lru_hash_t *h, int it, gpointer *k, + gpointer *v) +{ + gint i; + g_assert(it >= 0); + + for (i = it; i != kh_end(h); ++i) { + if (!kh_exist(h, i)) { + continue; + } + + *k = h->keys[i]; + *v = h->vals[i].e.data; + + break; + } + + if (i == kh_end(h)) { + return -1; + } + + return i + 1; +} + + +guint rspamd_lru_hash_size(rspamd_lru_hash_t *hash) +{ + return kh_size(hash); +} + +/** + * Returns hash capacity + * @param hash hash object + */ +guint rspamd_lru_hash_capacity(rspamd_lru_hash_t *hash) +{ + return hash->maxsize; +}
\ No newline at end of file diff --git a/src/libutil/hash.h b/src/libutil/hash.h new file mode 100644 index 0000000..3882ce5 --- /dev/null +++ b/src/libutil/hash.h @@ -0,0 +1,114 @@ +/** + * @file hash.h + * Hash table implementation that allows using memory pools for storage as well as using + * shared memory for this purpose + */ + +#ifndef RSPAMD_HASH_H +#define RSPAMD_HASH_H + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_lru_hash_s; +typedef struct rspamd_lru_hash_s rspamd_lru_hash_t; +struct rspamd_lru_element_s; +typedef struct rspamd_lru_element_s rspamd_lru_element_t; + + +/** + * Create new lru hash + * @param maxsize maximum elements in a hash + * @param maxage maximum age of element + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t *rspamd_lru_hash_new(gint maxsize, + GDestroyNotify key_destroy, + GDestroyNotify value_destroy); + + +/** + * Create new lru hash + * @param maxsize maximum elements in a hash + * @param maxage maximum age of element + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t *rspamd_lru_hash_new_full(gint maxsize, + GDestroyNotify key_destroy, + GDestroyNotify value_destroy, + GHashFunc hfunc, + GEqualFunc eqfunc); + +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer rspamd_lru_hash_lookup(rspamd_lru_hash_t *hash, + gconstpointer key, + time_t now); + +/** + * Removes key from LRU cache + * @param hash + * @param key + * @return TRUE if key has been found and removed + */ +gboolean rspamd_lru_hash_remove(rspamd_lru_hash_t *hash, + gconstpointer key); + +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void rspamd_lru_hash_insert(rspamd_lru_hash_t *hash, + gpointer key, + gpointer value, + time_t now, + guint ttl); + +/** + * Remove lru hash + * @param hash hash object + */ + +void rspamd_lru_hash_destroy(rspamd_lru_hash_t *hash); + +/** + * Iterate over lru hash. Iterations must start from it=0 and are done when it==-1 + * @param hash + * @param it + * @param k + * @param v + * @return new it or -1 if iteration has been reached over + */ +int rspamd_lru_hash_foreach(rspamd_lru_hash_t *hash, int it, gpointer *k, + gpointer *v); + +/** + * Returns number of elements in a hash + * @param hash hash object + */ +guint rspamd_lru_hash_size(rspamd_lru_hash_t *hash); + +/** + * Returns hash capacity + * @param hash hash object + */ +guint rspamd_lru_hash_capacity(rspamd_lru_hash_t *hash); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libutil/heap.c b/src/libutil/heap.c new file mode 100644 index 0000000..8ce70cf --- /dev/null +++ b/src/libutil/heap.c @@ -0,0 +1,197 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "libutil/heap.h" + +struct rspamd_min_heap { + GPtrArray *ar; +}; + +#define __SWAP(a, b) \ + do { \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + a = _b; \ + b = _a; \ + } while (0) +#define heap_swap(h, e1, e2) \ + do { \ + __SWAP((h)->ar->pdata[(e1)->idx - 1], (h)->ar->pdata[(e2)->idx - 1]); \ + __SWAP((e1)->idx, (e2)->idx); \ + } while (0) + +#define min_elt(e1, e2) ((e1)->pri <= (e2)->pri ? (e1) : (e2)) + +/* + * Swims element added (or changed) to preserve heap's invariant + */ +static void +rspamd_min_heap_swim(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt) +{ + struct rspamd_min_heap_elt *parent; + + while (elt->idx > 1) { + parent = g_ptr_array_index(heap->ar, elt->idx / 2 - 1); + + if (parent->pri > elt->pri) { + heap_swap(heap, elt, parent); + } + else { + break; + } + } +} + +/* + * Sinks the element popped (or changed) to preserve heap's invariant + */ +static void +rspamd_min_heap_sink(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt) +{ + struct rspamd_min_heap_elt *c1, *c2, *m; + + while (elt->idx * 2 < heap->ar->len) { + c1 = g_ptr_array_index(heap->ar, elt->idx * 2 - 1); + c2 = g_ptr_array_index(heap->ar, elt->idx * 2); + m = min_elt(c1, c2); + + if (elt->pri > m->pri) { + heap_swap(heap, elt, m); + } + else { + break; + } + } + + if (elt->idx * 2 - 1 < heap->ar->len) { + m = g_ptr_array_index(heap->ar, elt->idx * 2 - 1); + if (elt->pri > m->pri) { + heap_swap(heap, elt, m); + } + } +} + +struct rspamd_min_heap * +rspamd_min_heap_create(gsize reserved_size) +{ + struct rspamd_min_heap *heap; + + heap = g_malloc(sizeof(*heap)); + heap->ar = g_ptr_array_sized_new(reserved_size); + + return heap; +} + +void rspamd_min_heap_push(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt) +{ + g_assert(heap != NULL); + g_assert(elt != NULL); + + /* Add to the end */ + elt->idx = heap->ar->len + 1; + g_ptr_array_add(heap->ar, elt); + /* Now swim it up */ + rspamd_min_heap_swim(heap, elt); +} + +struct rspamd_min_heap_elt * +rspamd_min_heap_pop(struct rspamd_min_heap *heap) +{ + struct rspamd_min_heap_elt *elt, *last; + + g_assert(heap != NULL); + + if (heap->ar->len == 0) { + return NULL; + } + + elt = g_ptr_array_index(heap->ar, 0); + last = g_ptr_array_index(heap->ar, heap->ar->len - 1); + + if (elt != last) { + /* Now replace elt with the last element and sink it if needed */ + heap_swap(heap, elt, last); + g_ptr_array_remove_index_fast(heap->ar, heap->ar->len - 1); + rspamd_min_heap_sink(heap, last); + } + else { + g_ptr_array_remove_index_fast(heap->ar, heap->ar->len - 1); + } + + + return elt; +} + +void rspamd_min_heap_update_elt(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt, guint npri) +{ + guint oldpri; + + g_assert(heap != NULL); + g_assert(elt->idx > 0 && elt->idx <= heap->ar->len); + + oldpri = elt->pri; + elt->pri = npri; + + if (npri > oldpri) { + /* We might need to sink */ + rspamd_min_heap_sink(heap, elt); + } + else if (npri < oldpri) { + /* We might need to swim */ + rspamd_min_heap_swim(heap, elt); + } +} + +void rspamd_min_heap_remove_elt(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt) +{ + struct rspamd_min_heap_elt *first; + + g_assert(heap != NULL); + g_assert(elt->idx > 0 && elt->idx <= heap->ar->len); + + first = g_ptr_array_index(heap->ar, 0); + + if (elt != first) { + elt->pri = first->pri - 1; + rspamd_min_heap_swim(heap, elt); + } + + /* Now the desired element is on the top of queue */ + (void) rspamd_min_heap_pop(heap); +} + +void rspamd_min_heap_destroy(struct rspamd_min_heap *heap) +{ + if (heap) { + g_ptr_array_free(heap->ar, TRUE); + g_free(heap); + } +} + +struct rspamd_min_heap_elt * +rspamd_min_heap_index(struct rspamd_min_heap *heap, guint idx) +{ + g_assert(heap != NULL); + g_assert(idx < heap->ar->len); + + return g_ptr_array_index(heap->ar, idx); +} diff --git a/src/libutil/heap.h b/src/libutil/heap.h new file mode 100644 index 0000000..805f817 --- /dev/null +++ b/src/libutil/heap.h @@ -0,0 +1,97 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_HEAP_H_ +#define SRC_LIBUTIL_HEAP_H_ + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Binary minimal heap interface based on glib + */ + +struct rspamd_min_heap_elt { + gpointer data; + guint pri; + guint idx; +}; + +struct rspamd_min_heap; + +/** + * Creates min heap with the specified reserved size and compare function + * @param reserved_size reserved size in elements + * @return opaque minimal heap + */ +struct rspamd_min_heap *rspamd_min_heap_create(gsize reserved_size); + +/** + * Pushes an element to the heap. `pri` should be initialized to use this function, + * `idx` is used internally by heap interface + * @param heap heap structure + * @param elt element to push + */ +void rspamd_min_heap_push(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt); + +/** + * Pops the minimum element from the heap and reorder the queue + * @param heap heap structure + * @return minimum element + */ +struct rspamd_min_heap_elt *rspamd_min_heap_pop(struct rspamd_min_heap *heap); + +/** + * Updates priority for the element. It must be in queue (so `idx` should be sane) + * @param heap heap structure + * @param elt element to update + * @param npri new priority + */ +void rspamd_min_heap_update_elt(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt, guint npri); + + +/** + * Removes element from the heap + * @param heap + * @param elt + */ +void rspamd_min_heap_remove_elt(struct rspamd_min_heap *heap, + struct rspamd_min_heap_elt *elt); + +/** + * Destroys heap (elements are not destroyed themselves) + * @param heap + */ +void rspamd_min_heap_destroy(struct rspamd_min_heap *heap); + +/** + * Returns element from the heap with the specified index + * @param heap + * @param idx + * @return + */ +struct rspamd_min_heap_elt *rspamd_min_heap_index(struct rspamd_min_heap *heap, + guint idx); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_HEAP_H_ */ diff --git a/src/libutil/libev_helper.c b/src/libutil/libev_helper.c new file mode 100644 index 0000000..770964b --- /dev/null +++ b/src/libutil/libev_helper.c @@ -0,0 +1,111 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "libev_helper.h" + +static void +rspamd_ev_watcher_io_cb(EV_P_ struct ev_io *w, int revents) +{ + struct rspamd_io_ev *ev = (struct rspamd_io_ev *) w->data; + + ev->cb(ev->io.fd, revents, ev->ud); +} + +static void +rspamd_ev_watcher_timer_cb(EV_P_ struct ev_timer *w, int revents) +{ + struct rspamd_io_ev *ev = (struct rspamd_io_ev *) w->data; + + /* + * We now call timeout callback in all the cases, as we assume that all + * timeouts are final + */ + ev->cb(ev->io.fd, EV_TIMER, ev->ud); +} + + +void rspamd_ev_watcher_init(struct rspamd_io_ev *ev, + int fd, + short what, + rspamd_ev_cb cb, + void *ud) +{ + ev_io_init(&ev->io, rspamd_ev_watcher_io_cb, fd, what); + ev->io.data = ev; + ev_init(&ev->tm, rspamd_ev_watcher_timer_cb); + ev->tm.data = ev; + ev->ud = ud; + ev->cb = cb; +} + +void rspamd_ev_watcher_start(struct ev_loop *loop, + struct rspamd_io_ev *ev, + ev_tstamp timeout) +{ + g_assert(ev->cb != NULL); + + ev_io_start(EV_A_ & ev->io); + + if (timeout > 0) { + /* Update timestamp to avoid timers running early */ + ev_now_update_if_cheap(loop); + + ev->timeout = timeout; + ev_timer_set(&ev->tm, timeout, 0.0); + ev_timer_start(EV_A_ & ev->tm); + } +} + +void rspamd_ev_watcher_stop(struct ev_loop *loop, + struct rspamd_io_ev *ev) +{ + if (ev_can_stop(&ev->io)) { + ev_io_stop(EV_A_ & ev->io); + } + + if (ev->timeout > 0) { + ev_timer_stop(EV_A_ & ev->tm); + } +} + +void rspamd_ev_watcher_reschedule(struct ev_loop *loop, + struct rspamd_io_ev *ev, + short what) +{ + g_assert(ev->cb != NULL); + + if (ev_can_stop(&ev->io)) { + ev_io_stop(EV_A_ & ev->io); + ev_io_set(&ev->io, ev->io.fd, what); + ev_io_start(EV_A_ & ev->io); + } + else { + ev->io.data = ev; + ev_io_init(&ev->io, rspamd_ev_watcher_io_cb, ev->io.fd, what); + ev_io_start(EV_A_ & ev->io); + } + + if (ev->timeout > 0) { + if (!(ev_can_stop(&ev->tm))) { + /* Update timestamp to avoid timers running early */ + ev_now_update_if_cheap(loop); + + ev->tm.data = ev; + ev_timer_init(&ev->tm, rspamd_ev_watcher_timer_cb, ev->timeout, 0.0); + ev_timer_start(EV_A_ & ev->tm); + } + } +}
\ No newline at end of file diff --git a/src/libutil/libev_helper.h b/src/libutil/libev_helper.h new file mode 100644 index 0000000..44d1604 --- /dev/null +++ b/src/libutil/libev_helper.h @@ -0,0 +1,86 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_LIBEV_HELPER_H +#define RSPAMD_LIBEV_HELPER_H + +#include "config.h" +#include "contrib/libev/ev.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This module is a little helper to simplify libevent->libev transition + * It allows to create timed IO watchers utilising both + */ + +typedef void (*rspamd_ev_cb)(int fd, short what, void *ud); + +struct rspamd_io_ev { + ev_io io; + ev_timer tm; + rspamd_ev_cb cb; + void *ud; + ev_tstamp timeout; +}; + +/** + * Initialize watcher similar to event_init + * @param ev + * @param fd + * @param what + * @param cb + * @param ud + */ +void rspamd_ev_watcher_init(struct rspamd_io_ev *ev, + int fd, short what, rspamd_ev_cb cb, void *ud); + +/** + * Start watcher with the specific timeout + * @param loop + * @param ev + * @param timeout + */ +void rspamd_ev_watcher_start(struct ev_loop *loop, + struct rspamd_io_ev *ev, + ev_tstamp timeout); + +/** + * Stops watcher and clean it up + * @param loop + * @param ev + */ +void rspamd_ev_watcher_stop(struct ev_loop *loop, + struct rspamd_io_ev *ev); + +/** + * Convenience function to reschedule watcher with different events + * @param loop + * @param ev + * @param what + */ +void rspamd_ev_watcher_reschedule(struct ev_loop *loop, + struct rspamd_io_ev *ev, + short what); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c new file mode 100644 index 0000000..119ade3 --- /dev/null +++ b/src/libutil/mem_pool.c @@ -0,0 +1,1327 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "logger.h" +#include "ottery.h" +#include "unix-std.h" +#include "khash.h" +#include "cryptobox.h" +#include "contrib/uthash/utlist.h" +#include "mem_pool_internal.h" + +#ifdef WITH_JEMALLOC +#include <jemalloc/jemalloc.h> +#if (JEMALLOC_VERSION_MAJOR == 3 && JEMALLOC_VERSION_MINOR >= 6) || (JEMALLOC_VERSION_MAJOR > 3) +#define HAVE_MALLOC_SIZE 1 +#define sys_alloc_size(sz) nallocx(sz, 0) +#endif +#elif defined(__APPLE__) +#include <malloc/malloc.h> +#define HAVE_MALLOC_SIZE 1 +#define sys_alloc_size(sz) malloc_good_size(sz) +#endif + +#ifdef HAVE_SCHED_YIELD +#include <sched.h> +#endif + +/* Sleep time for spin lock in nanoseconds */ +#define MUTEX_SLEEP_TIME 10000000L +#define MUTEX_SPIN_COUNT 100 + +#define POOL_MTX_LOCK() \ + do { \ + } while (0) +#define POOL_MTX_UNLOCK() \ + do { \ + } while (0) + +/* + * This define specify whether we should check all pools for free space for new object + * or just begin scan from current (recently attached) pool + * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower + * but requires less memory). If it is not defined check only current pool and if object is too large + * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than + * greedy method) + */ +#undef MEMORY_GREEDY + + +static inline uint32_t +rspamd_entry_hash(const char *str) +{ + return (guint) rspamd_cryptobox_fast_hash(str, strlen(str), rspamd_hash_seed()); +} + +static inline int +rspamd_entry_equal(const char *k1, const char *k2) +{ + return strcmp(k1, k2) == 0; +} + + +KHASH_INIT(mempool_entry, const gchar *, struct rspamd_mempool_entry_point *, + 1, rspamd_entry_hash, rspamd_entry_equal) + +static khash_t(mempool_entry) *mempool_entries = NULL; + + +/* Internal statistic */ +static rspamd_mempool_stat_t *mem_pool_stat = NULL; +/* Environment variable */ +static gboolean env_checked = FALSE; +static gboolean always_malloc = FALSE; + +/** + * Function that return free space in pool page + * @param x pool page struct + */ +static gsize +pool_chain_free(struct _pool_chain *chain) +{ + gint64 occupied = chain->pos - chain->begin + MIN_MEM_ALIGNMENT; + + return (occupied < (gint64) chain->slice_size ? chain->slice_size - occupied : 0); +} + +/* By default allocate 4Kb chunks of memory */ +#define FIXED_POOL_SIZE 4096 + +static inline struct rspamd_mempool_entry_point * +rspamd_mempool_entry_new(const gchar *loc) +{ + struct rspamd_mempool_entry_point **pentry, *entry; + gint r; + khiter_t k; + + k = kh_put(mempool_entry, mempool_entries, loc, &r); + + if (r >= 0) { + pentry = &kh_value(mempool_entries, k); + entry = g_malloc0(sizeof(*entry)); + *pentry = entry; + memset(entry, 0, sizeof(*entry)); + rspamd_strlcpy(entry->src, loc, sizeof(entry->src)); +#ifdef HAVE_GETPAGESIZE + entry->cur_suggestion = MAX(getpagesize(), FIXED_POOL_SIZE); +#else + entry->cur_suggestion = MAX(sysconf(_SC_PAGESIZE), FIXED_POOL_SIZE); +#endif + } + else { + g_assert_not_reached(); + } + + return entry; +} + +RSPAMD_CONSTRUCTOR(rspamd_mempool_entries_ctor) +{ + if (mempool_entries == NULL) { + mempool_entries = kh_init(mempool_entry); + } +} + +RSPAMD_DESTRUCTOR(rspamd_mempool_entries_dtor) +{ + struct rspamd_mempool_entry_point *elt; + + kh_foreach_value(mempool_entries, elt, { + g_free(elt); + }); + + kh_destroy(mempool_entry, mempool_entries); + mempool_entries = NULL; +} + +static inline struct rspamd_mempool_entry_point * +rspamd_mempool_get_entry(const gchar *loc) +{ + khiter_t k; + struct rspamd_mempool_entry_point *elt; + + if (G_UNLIKELY(!mempool_entries)) { + rspamd_mempool_entries_ctor(); + } + + k = kh_get(mempool_entry, mempool_entries, loc); + + if (k != kh_end(mempool_entries)) { + elt = kh_value(mempool_entries, k); + + return elt; + } + + return rspamd_mempool_entry_new(loc); +} + +static struct _pool_chain * +rspamd_mempool_chain_new(gsize size, gsize alignment, enum rspamd_mempool_chain_type pool_type) +{ + struct _pool_chain *chain; + gsize total_size = size + sizeof(struct _pool_chain) + alignment, + optimal_size = 0; + gpointer map; + + g_assert(size > 0); + + if (pool_type == RSPAMD_MEMPOOL_SHARED) { +#if defined(HAVE_MMAP_ANON) + map = mmap(NULL, + total_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED, + -1, + 0); + if (map == MAP_FAILED) { + g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes", + G_STRLOC, total_size); + abort(); + } + chain = map; + chain->begin = ((guint8 *) chain) + sizeof(struct _pool_chain); +#elif defined(HAVE_MMAP_ZERO) + gint fd; + + fd = open("/dev/zero", O_RDWR); + if (fd == -1) { + return NULL; + } + map = mmap(NULL, + size + sizeof(struct _pool_chain), + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, + 0); + if (map == MAP_FAILED) { + msg_err("cannot allocate %z bytes, aborting", size + + sizeof(struct _pool_chain)); + abort(); + } + chain = map; + chain->begin = ((guint8 *) chain) + sizeof(struct _pool_chain); +#else +#error No mmap methods are defined +#endif + g_atomic_int_inc(&mem_pool_stat->shared_chunks_allocated); + g_atomic_int_add(&mem_pool_stat->bytes_allocated, total_size); + } + else { +#ifdef HAVE_MALLOC_SIZE + optimal_size = sys_alloc_size(total_size); +#endif + total_size = MAX(total_size, optimal_size); + gint ret = posix_memalign(&map, alignment, total_size); + + if (ret != 0 || map == NULL) { + g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes: %d - %s", + G_STRLOC, total_size, ret, strerror(errno)); + abort(); + } + + chain = map; + chain->begin = ((guint8 *) chain) + sizeof(struct _pool_chain); + g_atomic_int_add(&mem_pool_stat->bytes_allocated, total_size); + g_atomic_int_inc(&mem_pool_stat->chunks_allocated); + } + + chain->pos = align_ptr(chain->begin, alignment); + chain->slice_size = total_size - sizeof(struct _pool_chain); + + return chain; +} + + +/** + * Get the current pool of the specified type, creating the corresponding + * array if it's absent + * @param pool + * @param pool_type + * @return + */ +static struct _pool_chain * +rspamd_mempool_get_chain(rspamd_mempool_t *pool, + enum rspamd_mempool_chain_type pool_type) +{ + g_assert(pool_type >= 0 && pool_type < RSPAMD_MEMPOOL_MAX); + + return pool->priv->pools[pool_type]; +} + +static void +rspamd_mempool_append_chain(rspamd_mempool_t *pool, + struct _pool_chain *chain, + enum rspamd_mempool_chain_type pool_type) +{ + g_assert(pool_type >= 0 && pool_type < RSPAMD_MEMPOOL_MAX); + g_assert(chain != NULL); + + LL_PREPEND(pool->priv->pools[pool_type], chain); +} + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ +rspamd_mempool_t * +rspamd_mempool_new_(gsize size, const gchar *tag, gint flags, const gchar *loc) +{ + rspamd_mempool_t *new_pool; + gpointer map; + + /* Allocate statistic structure if it is not allocated before */ + if (mem_pool_stat == NULL) { +#if defined(HAVE_MMAP_ANON) + map = mmap(NULL, + sizeof(rspamd_mempool_stat_t), + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED, + -1, + 0); + if (map == MAP_FAILED) { + msg_err("cannot allocate %z bytes, aborting", + sizeof(rspamd_mempool_stat_t)); + abort(); + } + mem_pool_stat = (rspamd_mempool_stat_t *) map; +#elif defined(HAVE_MMAP_ZERO) + gint fd; + + fd = open("/dev/zero", O_RDWR); + g_assert(fd != -1); + map = mmap(NULL, + sizeof(rspamd_mempool_stat_t), + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, + 0); + if (map == MAP_FAILED) { + msg_err("cannot allocate %z bytes, aborting", + sizeof(rspamd_mempool_stat_t)); + abort(); + } + mem_pool_stat = (rspamd_mempool_stat_t *) map; +#else +#error No mmap methods are defined +#endif + memset(map, 0, sizeof(rspamd_mempool_stat_t)); + } + + if (!env_checked) { + /* Check G_SLICE=always-malloc to allow memory pool debug */ + const char *g_slice; + + g_slice = getenv("VALGRIND"); + if (g_slice != NULL) { + always_malloc = TRUE; + } + env_checked = TRUE; + } + + struct rspamd_mempool_entry_point *entry = rspamd_mempool_get_entry(loc); + gsize total_size; + + if (size == 0 && entry) { + size = entry->cur_suggestion; + } + + total_size = sizeof(rspamd_mempool_t) + + sizeof(struct rspamd_mempool_specific) + + MIN_MEM_ALIGNMENT + + sizeof(struct _pool_chain) + + size; + + if (G_UNLIKELY(flags & RSPAMD_MEMPOOL_DEBUG)) { + total_size += sizeof(GHashTable *); + } + /* + * Memory layout: + * struct rspamd_mempool_t + * <optional debug hash table> + * struct rspamd_mempool_specific + * struct _pool_chain + * alignment (if needed) + * memory chunk + */ + guchar *mem_chunk; + gint ret = posix_memalign((void **) &mem_chunk, MIN_MEM_ALIGNMENT, + total_size); + gsize priv_offset; + + if (ret != 0 || mem_chunk == NULL) { + g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes: %d - %s", + G_STRLOC, total_size, ret, strerror(errno)); + abort(); + } + + /* Set memory layout */ + new_pool = (rspamd_mempool_t *) mem_chunk; + if (G_UNLIKELY(flags & RSPAMD_MEMPOOL_DEBUG)) { + /* Allocate debug table */ + GHashTable *debug_tbl; + + debug_tbl = g_hash_table_new(rspamd_str_hash, rspamd_str_equal); + memcpy(mem_chunk + sizeof(rspamd_mempool_t), &debug_tbl, + sizeof(GHashTable *)); + priv_offset = sizeof(rspamd_mempool_t) + sizeof(GHashTable *); + } + else { + priv_offset = sizeof(rspamd_mempool_t); + } + + new_pool->priv = (struct rspamd_mempool_specific *) (mem_chunk + + priv_offset); + /* Zero memory for specific and for the first chain */ + memset(new_pool->priv, 0, sizeof(struct rspamd_mempool_specific) + sizeof(struct _pool_chain)); + + new_pool->priv->entry = entry; + new_pool->priv->elt_len = size; + new_pool->priv->flags = flags; + + if (tag) { + rspamd_strlcpy(new_pool->tag.tagname, tag, sizeof(new_pool->tag.tagname)); + } + else { + new_pool->tag.tagname[0] = '\0'; + } + + /* Generate new uid */ + uint64_t uid = rspamd_random_uint64_fast(); + rspamd_encode_hex_buf((unsigned char *) &uid, sizeof(uid), + new_pool->tag.uid, sizeof(new_pool->tag.uid) - 1); + new_pool->tag.uid[sizeof(new_pool->tag.uid) - 1] = '\0'; + + mem_pool_stat->pools_allocated++; + + /* Now we can attach one chunk to speed up simple allocations */ + struct _pool_chain *nchain; + + nchain = (struct _pool_chain *) (mem_chunk + + priv_offset + + sizeof(struct rspamd_mempool_specific)); + + guchar *unaligned = mem_chunk + + priv_offset + + sizeof(struct rspamd_mempool_specific) + + sizeof(struct _pool_chain); + + nchain->slice_size = size; + nchain->begin = unaligned; + nchain->slice_size = size; + nchain->pos = align_ptr(unaligned, MIN_MEM_ALIGNMENT); + new_pool->priv->pools[RSPAMD_MEMPOOL_NORMAL] = nchain; + new_pool->priv->used_memory = size; + + /* Adjust stats */ + g_atomic_int_add(&mem_pool_stat->bytes_allocated, + (gint) size); + g_atomic_int_add(&mem_pool_stat->chunks_allocated, 1); + + return new_pool; +} + +static void * +memory_pool_alloc_common(rspamd_mempool_t *pool, gsize size, gsize alignment, + enum rspamd_mempool_chain_type pool_type, + const gchar *loc) + RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL; + + +void rspamd_mempool_notify_alloc_(rspamd_mempool_t *pool, gsize size, const gchar *loc) +{ + if (pool && G_UNLIKELY(pool->priv->flags & RSPAMD_MEMPOOL_DEBUG)) { + GHashTable *debug_tbl = *(GHashTable **) (((guchar *) pool + sizeof(*pool))); + gpointer ptr; + + ptr = g_hash_table_lookup(debug_tbl, loc); + + if (ptr) { + ptr = GSIZE_TO_POINTER(GPOINTER_TO_SIZE(ptr) + size); + } + else { + ptr = GSIZE_TO_POINTER(size); + } + + g_hash_table_insert(debug_tbl, (gpointer) loc, ptr); + } +} + +static void * +memory_pool_alloc_common(rspamd_mempool_t *pool, gsize size, gsize alignment, + enum rspamd_mempool_chain_type pool_type, const gchar *loc) +{ + guint8 *tmp; + struct _pool_chain *new, *cur; + gsize free = 0; + + if (pool) { + POOL_MTX_LOCK(); + pool->priv->used_memory += size; + + if (G_UNLIKELY(pool->priv->flags & RSPAMD_MEMPOOL_DEBUG)) { + rspamd_mempool_notify_alloc_(pool, size, loc); + } + + if (always_malloc && pool_type != RSPAMD_MEMPOOL_SHARED) { + void *ptr; + + if (alignment <= G_MEM_ALIGN) { + ptr = g_malloc(size); + } + else { + ptr = g_malloc(size + alignment); + ptr = align_ptr(ptr, alignment); + } + POOL_MTX_UNLOCK(); + + if (pool->priv->trash_stack == NULL) { + pool->priv->trash_stack = g_ptr_array_sized_new(128); + } + + g_ptr_array_add(pool->priv->trash_stack, ptr); + + return ptr; + } + + cur = rspamd_mempool_get_chain(pool, pool_type); + + /* Find free space in pool chain */ + if (cur) { + free = pool_chain_free(cur); + } + + if (cur == NULL || free < size + alignment) { + if (free < size) { + pool->priv->wasted_memory += free; + } + + /* Allocate new chain element */ + if (pool->priv->elt_len >= size + alignment) { + pool->priv->entry->elts[pool->priv->entry->cur_elts].fragmentation += size; + new = rspamd_mempool_chain_new(pool->priv->elt_len, alignment, + pool_type); + } + else { + mem_pool_stat->oversized_chunks++; + g_atomic_int_add(&mem_pool_stat->fragmented_size, + free); + pool->priv->entry->elts[pool->priv->entry->cur_elts].fragmentation += free; + new = rspamd_mempool_chain_new(size + pool->priv->elt_len, alignment, + pool_type); + } + + /* Connect to pool subsystem */ + rspamd_mempool_append_chain(pool, new, pool_type); + /* No need to align again, aligned by rspamd_mempool_chain_new */ + tmp = new->pos; + new->pos = tmp + size; + POOL_MTX_UNLOCK(); + + return tmp; + } + + /* No need to allocate page */ + tmp = align_ptr(cur->pos, alignment); + cur->pos = tmp + size; + POOL_MTX_UNLOCK(); + + return tmp; + } + + abort(); +} + + +void * +rspamd_mempool_alloc_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) +{ + return memory_pool_alloc_common(pool, size, alignment, RSPAMD_MEMPOOL_NORMAL, loc); +} + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW (1UL << (sizeof(gsize) * 4)) + +void * +rspamd_mempool_alloc_array_(rspamd_mempool_t *pool, gsize nmemb, gsize size, gsize alignment, const gchar *loc) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && G_MAXSIZE / nmemb < size) { + + g_error("alloc_array: overflow %" G_GSIZE_FORMAT " * %" G_GSIZE_FORMAT "", + nmemb, size); + g_abort(); + } + return memory_pool_alloc_common(pool, size * nmemb, alignment, RSPAMD_MEMPOOL_NORMAL, loc); +} + +void * +rspamd_mempool_alloc0_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) +{ + void *pointer = rspamd_mempool_alloc_(pool, size, alignment, loc); + memset(pointer, 0, size); + + return pointer; +} +void * +rspamd_mempool_alloc0_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) +{ + void *pointer = rspamd_mempool_alloc_shared_(pool, size, alignment, loc); + + memset(pointer, 0, size); + return pointer; +} + +void * +rspamd_mempool_alloc_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) +{ + return memory_pool_alloc_common(pool, size, alignment, RSPAMD_MEMPOOL_SHARED, loc); +} + + +gchar * +rspamd_mempool_strdup_(rspamd_mempool_t *pool, const gchar *src, const gchar *loc) +{ + if (src == NULL) { + return NULL; + } + return rspamd_mempool_strdup_len_(pool, src, strlen(src), loc); +} + +gchar * +rspamd_mempool_strdup_len_(rspamd_mempool_t *pool, const gchar *src, gsize len, const gchar *loc) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = rspamd_mempool_alloc_(pool, len + 1, MIN_MEM_ALIGNMENT, loc); + memcpy(newstr, src, len); + newstr[len] = '\0'; + + return newstr; +} + +gchar * +rspamd_mempool_ftokdup_(rspamd_mempool_t *pool, const rspamd_ftok_t *src, + const gchar *loc) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = rspamd_mempool_alloc_(pool, src->len + 1, MIN_MEM_ALIGNMENT, loc); + memcpy(newstr, src->begin, src->len); + newstr[src->len] = '\0'; + + return newstr; +} + +void rspamd_mempool_add_destructor_full(rspamd_mempool_t *pool, + rspamd_mempool_destruct_t func, + void *data, + const gchar *function, + const gchar *line) +{ + struct _pool_destructors *cur; + + POOL_MTX_LOCK(); + cur = rspamd_mempool_alloc_(pool, sizeof(*cur), + RSPAMD_ALIGNOF(struct _pool_destructors), line); + cur->func = func; + cur->data = data; + cur->function = function; + cur->loc = line; + cur->next = NULL; + + if (pool->priv->dtors_tail) { + pool->priv->dtors_tail->next = cur; + pool->priv->dtors_tail = cur; + } + else { + pool->priv->dtors_head = cur; + pool->priv->dtors_tail = cur; + } + + POOL_MTX_UNLOCK(); +} + +void rspamd_mempool_replace_destructor(rspamd_mempool_t *pool, + rspamd_mempool_destruct_t func, + void *old_data, + void *new_data) +{ + struct _pool_destructors *tmp; + + LL_FOREACH(pool->priv->dtors_head, tmp) + { + if (tmp->func == func && tmp->data == old_data) { + tmp->func = func; + tmp->data = new_data; + break; + } + } +} + +static gint +cmp_int(gconstpointer a, gconstpointer b) +{ + gint i1 = *(const gint *) a, i2 = *(const gint *) b; + + return i1 - i2; +} + +static void +rspamd_mempool_adjust_entry(struct rspamd_mempool_entry_point *e) +{ + gint sz[G_N_ELEMENTS(e->elts)], sel_pos, sel_neg; + guint i, jitter; + + for (i = 0; i < G_N_ELEMENTS(sz); i++) { + sz[i] = e->elts[i].fragmentation - (gint) e->elts[i].leftover; + } + + qsort(sz, G_N_ELEMENTS(sz), sizeof(gint), cmp_int); + jitter = rspamd_random_uint64_fast() % 10; + /* + * Take stochastic quantiles + */ + sel_pos = sz[50 + jitter]; + sel_neg = sz[4 + jitter]; + + if (-sel_neg > sel_pos) { + /* We need to reduce current suggestion */ + e->cur_suggestion /= (1 + (((double) -sel_neg) / e->cur_suggestion)) * 1.5; + } + else { + /* We still want to grow */ + e->cur_suggestion *= (1 + (((double) sel_pos) / e->cur_suggestion)) * 1.5; + } + + /* Some sane limits counting mempool architecture */ + if (e->cur_suggestion < 1024) { + e->cur_suggestion = 1024; + } + else if (e->cur_suggestion > 1024 * 1024 * 10) { + e->cur_suggestion = 1024 * 1024 * 10; + } + + memset(e->elts, 0, sizeof(e->elts)); +} + +static void +rspamd_mempool_variables_cleanup(rspamd_mempool_t *pool) +{ + if (pool->priv->variables) { + struct rspamd_mempool_variable *var; + kh_foreach_value_ptr(pool->priv->variables, var, { + if (var->dtor) { + var->dtor(var->data); + } + }); + + if (pool->priv->entry && pool->priv->entry->cur_vars < + kh_size(pool->priv->variables)) { + /* + * Increase preallocated size in two cases: + * 1) Our previous guess was zero + * 2) Our new variables count is not more than twice larger than + * previous count + * 3) Our variables count is less than some hard limit + */ + static const guint max_preallocated_vars = 512; + + guint cur_size = kh_size(pool->priv->variables); + guint old_guess = pool->priv->entry->cur_vars; + guint new_guess; + + if (old_guess == 0) { + new_guess = MIN(cur_size, max_preallocated_vars); + } + else { + if (old_guess * 2 < cur_size) { + new_guess = MIN(cur_size, max_preallocated_vars); + } + else { + /* Too large step */ + new_guess = MIN(old_guess * 2, max_preallocated_vars); + } + } + + pool->priv->entry->cur_vars = new_guess; + } + + kh_destroy(rspamd_mempool_vars_hash, pool->priv->variables); + pool->priv->variables = NULL; + } +} + +void rspamd_mempool_destructors_enforce(rspamd_mempool_t *pool) +{ + struct _pool_destructors *destructor; + + POOL_MTX_LOCK(); + + LL_FOREACH(pool->priv->dtors_head, destructor) + { + /* Avoid calling destructors for NULL pointers */ + if (destructor->data != NULL) { + destructor->func(destructor->data); + } + } + + pool->priv->dtors_head = pool->priv->dtors_tail = NULL; + + rspamd_mempool_variables_cleanup(pool); + + POOL_MTX_UNLOCK(); +} + +struct mempool_debug_elt { + gsize sz; + const gchar *loc; +}; + +static gint +rspamd_mempool_debug_elt_cmp(const void *a, const void *b) +{ + const struct mempool_debug_elt *e1 = a, *e2 = b; + + /* Inverse order */ + return (gint) ((gssize) e2->sz) - ((gssize) e1->sz); +} + +void rspamd_mempool_delete(rspamd_mempool_t *pool) +{ + struct _pool_chain *cur, *tmp; + struct _pool_destructors *destructor; + gpointer ptr; + guint i; + gsize len; + + POOL_MTX_LOCK(); + + cur = pool->priv->pools[RSPAMD_MEMPOOL_NORMAL]; + + if (G_UNLIKELY(pool->priv->flags & RSPAMD_MEMPOOL_DEBUG)) { + GHashTable *debug_tbl = *(GHashTable **) (((guchar *) pool) + sizeof(*pool)); + /* Show debug info */ + gsize ndtor = 0; + LL_COUNT(pool->priv->dtors_head, destructor, ndtor); + msg_info_pool("destructing of the memory pool %p; elt size = %z; " + "used memory = %Hz; wasted memory = %Hd; " + "vars = %z; destructors = %z", + pool, + pool->priv->elt_len, + pool->priv->used_memory, + pool->priv->wasted_memory, + pool->priv->variables ? (gsize) kh_size(pool->priv->variables) : (gsize) 0, + ndtor); + + GHashTableIter it; + gpointer k, v; + GArray *sorted_debug_size = g_array_sized_new(FALSE, FALSE, + sizeof(struct mempool_debug_elt), + g_hash_table_size(debug_tbl)); + + g_hash_table_iter_init(&it, debug_tbl); + + while (g_hash_table_iter_next(&it, &k, &v)) { + struct mempool_debug_elt e; + e.loc = (const gchar *) k; + e.sz = GPOINTER_TO_SIZE(v); + g_array_append_val(sorted_debug_size, e); + } + + g_array_sort(sorted_debug_size, rspamd_mempool_debug_elt_cmp); + + for (guint _i = 0; _i < sorted_debug_size->len; _i++) { + struct mempool_debug_elt *e; + + e = &g_array_index(sorted_debug_size, struct mempool_debug_elt, _i); + msg_info_pool("allocated %Hz from %s", e->sz, e->loc); + } + + g_array_free(sorted_debug_size, TRUE); + g_hash_table_unref(debug_tbl); + } + + if (cur && mempool_entries) { + pool->priv->entry->elts[pool->priv->entry->cur_elts].leftover = + pool_chain_free(cur); + + pool->priv->entry->cur_elts = (pool->priv->entry->cur_elts + 1) % + G_N_ELEMENTS(pool->priv->entry->elts); + + if (pool->priv->entry->cur_elts == 0) { + rspamd_mempool_adjust_entry(pool->priv->entry); + } + } + + /* Call all pool destructors */ + LL_FOREACH(pool->priv->dtors_head, destructor) + { + /* Avoid calling destructors for NULL pointers */ + if (destructor->data != NULL) { + destructor->func(destructor->data); + } + } + + rspamd_mempool_variables_cleanup(pool); + + if (pool->priv->trash_stack) { + for (i = 0; i < pool->priv->trash_stack->len; i++) { + ptr = g_ptr_array_index(pool->priv->trash_stack, i); + g_free(ptr); + } + + g_ptr_array_free(pool->priv->trash_stack, TRUE); + } + + for (i = 0; i < G_N_ELEMENTS(pool->priv->pools); i++) { + if (pool->priv->pools[i]) { + LL_FOREACH_SAFE(pool->priv->pools[i], cur, tmp) + { + g_atomic_int_add(&mem_pool_stat->bytes_allocated, + -((gint) cur->slice_size)); + g_atomic_int_add(&mem_pool_stat->chunks_allocated, -1); + + len = cur->slice_size + sizeof(struct _pool_chain); + + if (i == RSPAMD_MEMPOOL_SHARED) { + munmap((void *) cur, len); + } + else { + /* The last pool is special, it is a part of the initial chunk */ + if (cur->next != NULL) { + free(cur); /* Not g_free as we use system allocator */ + } + } + } + } + } + + g_atomic_int_inc(&mem_pool_stat->pools_freed); + POOL_MTX_UNLOCK(); + free(pool); /* allocated by posix_memalign */ +} + +void rspamd_mempool_stat(rspamd_mempool_stat_t *st) +{ + if (mem_pool_stat != NULL) { + st->pools_allocated = mem_pool_stat->pools_allocated; + st->pools_freed = mem_pool_stat->pools_freed; + st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; + st->bytes_allocated = mem_pool_stat->bytes_allocated; + st->chunks_allocated = mem_pool_stat->chunks_allocated; + st->chunks_freed = mem_pool_stat->chunks_freed; + st->oversized_chunks = mem_pool_stat->oversized_chunks; + } +} + +void rspamd_mempool_stat_reset(void) +{ + if (mem_pool_stat != NULL) { + memset(mem_pool_stat, 0, sizeof(rspamd_mempool_stat_t)); + } +} + +gsize rspamd_mempool_suggest_size_(const char *loc) +{ + return 0; +} + +#if !defined(HAVE_PTHREAD_PROCESS_SHARED) || defined(DISABLE_PTHREAD_MUTEX) +/* + * Own emulation + */ +static inline gint +__mutex_spin(rspamd_mempool_mutex_t *mutex) +{ + /* check spin count */ + if (g_atomic_int_dec_and_test(&mutex->spin)) { + /* This may be deadlock, so check owner of this lock */ + if (mutex->owner == getpid()) { + /* This mutex was locked by calling process, so it is just double lock and we can easily unlock it */ + g_atomic_int_set(&mutex->spin, MUTEX_SPIN_COUNT); + return 0; + } + else if (kill(mutex->owner, 0) == -1) { + /* Owner process was not found, so release lock */ + g_atomic_int_set(&mutex->spin, MUTEX_SPIN_COUNT); + return 0; + } + /* Spin again */ + g_atomic_int_set(&mutex->spin, MUTEX_SPIN_COUNT); + } + +#ifdef HAVE_SCHED_YIELD + (void) sched_yield(); +#elif defined(HAVE_NANOSLEEP) + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = MUTEX_SLEEP_TIME; + /* Spin */ + while (nanosleep(&ts, &ts) == -1 && errno == EINTR) + ; +#else +#error No methods to spin are defined +#endif + return 1; +} + +static void +memory_pool_mutex_spin(rspamd_mempool_mutex_t *mutex) +{ + while (!g_atomic_int_compare_and_exchange(&mutex->lock, 0, 1)) { + if (!__mutex_spin(mutex)) { + return; + } + } +} + +rspamd_mempool_mutex_t * +rspamd_mempool_get_mutex(rspamd_mempool_t *pool) +{ + rspamd_mempool_mutex_t *res; + if (pool != NULL) { + res = + rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_mutex_t)); + res->lock = 0; + res->owner = 0; + res->spin = MUTEX_SPIN_COUNT; + return res; + } + return NULL; +} + +void rspamd_mempool_lock_mutex(rspamd_mempool_mutex_t *mutex) +{ + memory_pool_mutex_spin(mutex); + mutex->owner = getpid(); +} + +void rspamd_mempool_unlock_mutex(rspamd_mempool_mutex_t *mutex) +{ + mutex->owner = 0; + (void) g_atomic_int_compare_and_exchange(&mutex->lock, 1, 0); +} + +rspamd_mempool_rwlock_t * +rspamd_mempool_get_rwlock(rspamd_mempool_t *pool) +{ + rspamd_mempool_rwlock_t *lock; + + lock = rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_rwlock_t)); + lock->__r_lock = rspamd_mempool_get_mutex(pool); + lock->__w_lock = rspamd_mempool_get_mutex(pool); + + return lock; +} + +void rspamd_mempool_rlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + /* Spin on write lock */ + while (g_atomic_int_get(&lock->__w_lock->lock)) { + if (!__mutex_spin(lock->__w_lock)) { + break; + } + } + + g_atomic_int_inc(&lock->__r_lock->lock); + lock->__r_lock->owner = getpid(); +} + +void rspamd_mempool_wlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + /* Spin on write lock first */ + rspamd_mempool_lock_mutex(lock->__w_lock); + /* Now we have write lock set up */ + /* Wait all readers */ + while (g_atomic_int_get(&lock->__r_lock->lock)) { + __mutex_spin(lock->__r_lock); + } +} + +void rspamd_mempool_runlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + if (g_atomic_int_get(&lock->__r_lock->lock)) { + (void) g_atomic_int_dec_and_test(&lock->__r_lock->lock); + } +} + +void rspamd_mempool_wunlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + rspamd_mempool_unlock_mutex(lock->__w_lock); +} +#else + +/* + * Pthread bases shared mutexes + */ +rspamd_mempool_mutex_t * +rspamd_mempool_get_mutex(rspamd_mempool_t *pool) +{ + rspamd_mempool_mutex_t *res; + pthread_mutexattr_t mattr; + + if (pool != NULL) { + res = + rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_mutex_t)); + + pthread_mutexattr_init(&mattr); + pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); + pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST); + pthread_mutex_init(res, &mattr); + rspamd_mempool_add_destructor(pool, + (rspamd_mempool_destruct_t) pthread_mutex_destroy, res); + pthread_mutexattr_destroy(&mattr); + + return res; + } + return NULL; +} + +void rspamd_mempool_lock_mutex(rspamd_mempool_mutex_t *mutex) +{ + pthread_mutex_lock(mutex); +} + +void rspamd_mempool_unlock_mutex(rspamd_mempool_mutex_t *mutex) +{ + pthread_mutex_unlock(mutex); +} + +rspamd_mempool_rwlock_t * +rspamd_mempool_get_rwlock(rspamd_mempool_t *pool) +{ + rspamd_mempool_rwlock_t *res; + pthread_rwlockattr_t mattr; + + if (pool != NULL) { + res = + rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_rwlock_t)); + + pthread_rwlockattr_init(&mattr); + pthread_rwlockattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); + pthread_rwlock_init(res, &mattr); + rspamd_mempool_add_destructor(pool, + (rspamd_mempool_destruct_t) pthread_rwlock_destroy, res); + pthread_rwlockattr_destroy(&mattr); + + return res; + } + return NULL; +} + +void rspamd_mempool_rlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + pthread_rwlock_rdlock(lock); +} + +void rspamd_mempool_wlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + pthread_rwlock_wrlock(lock); +} + +void rspamd_mempool_runlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + pthread_rwlock_unlock(lock); +} + +void rspamd_mempool_wunlock_rwlock(rspamd_mempool_rwlock_t *lock) +{ + pthread_rwlock_unlock(lock); +} +#endif + +#define RSPAMD_MEMPOOL_VARS_HASH_SEED 0xb32ad7c55eb2e647ULL +void rspamd_mempool_set_variable(rspamd_mempool_t *pool, + const gchar *name, + gpointer value, + rspamd_mempool_destruct_t destructor) +{ + if (pool->priv->variables == NULL) { + + pool->priv->variables = kh_init(rspamd_mempool_vars_hash); + + if (pool->priv->entry->cur_vars > 0) { + /* Preallocate */ + kh_resize(rspamd_mempool_vars_hash, + pool->priv->variables, + pool->priv->entry->cur_vars); + } + } + + gint hv = rspamd_cryptobox_fast_hash(name, strlen(name), + RSPAMD_MEMPOOL_VARS_HASH_SEED); + khiter_t it; + gint r; + + it = kh_put(rspamd_mempool_vars_hash, pool->priv->variables, hv, &r); + + if (it == kh_end(pool->priv->variables)) { + g_assert_not_reached(); + } + else { + struct rspamd_mempool_variable *pvar; + + if (r == 0) { + /* Existing entry, maybe need cleanup */ + pvar = &kh_val(pool->priv->variables, it); + + if (pvar->dtor) { + pvar->dtor(pvar->data); + } + } + + pvar = &kh_val(pool->priv->variables, it); + pvar->data = value; + pvar->dtor = destructor; + } +} + +gpointer +rspamd_mempool_get_variable(rspamd_mempool_t *pool, const gchar *name) +{ + if (pool->priv->variables == NULL) { + return NULL; + } + + khiter_t it; + gint hv = rspamd_cryptobox_fast_hash(name, strlen(name), + RSPAMD_MEMPOOL_VARS_HASH_SEED); + + it = kh_get(rspamd_mempool_vars_hash, pool->priv->variables, hv); + + if (it != kh_end(pool->priv->variables)) { + struct rspamd_mempool_variable *pvar; + + pvar = &kh_val(pool->priv->variables, it); + return pvar->data; + } + + return NULL; +} + +gpointer +rspamd_mempool_steal_variable(rspamd_mempool_t *pool, const gchar *name) +{ + if (pool->priv->variables == NULL) { + return NULL; + } + + khiter_t it; + gint hv = rspamd_cryptobox_fast_hash(name, strlen(name), + RSPAMD_MEMPOOL_VARS_HASH_SEED); + + it = kh_get(rspamd_mempool_vars_hash, pool->priv->variables, hv); + + if (it != kh_end(pool->priv->variables)) { + struct rspamd_mempool_variable *pvar; + + pvar = &kh_val(pool->priv->variables, it); + kh_del(rspamd_mempool_vars_hash, pool->priv->variables, it); + + return pvar->data; + } + + return NULL; +} + +void rspamd_mempool_remove_variable(rspamd_mempool_t *pool, const gchar *name) +{ + if (pool->priv->variables != NULL) { + khiter_t it; + gint hv = rspamd_cryptobox_fast_hash(name, strlen(name), + RSPAMD_MEMPOOL_VARS_HASH_SEED); + + it = kh_get(rspamd_mempool_vars_hash, pool->priv->variables, hv); + + if (it != kh_end(pool->priv->variables)) { + struct rspamd_mempool_variable *pvar; + + pvar = &kh_val(pool->priv->variables, it); + + if (pvar->dtor) { + pvar->dtor(pvar->data); + } + + kh_del(rspamd_mempool_vars_hash, pool->priv->variables, it); + } + } +} + +GList * +rspamd_mempool_glist_prepend(rspamd_mempool_t *pool, GList *l, gpointer p) +{ + GList *cell; + + cell = rspamd_mempool_alloc(pool, sizeof(*cell)); + cell->prev = NULL; + cell->data = p; + + if (l == NULL) { + cell->next = NULL; + } + else { + cell->next = l; + l->prev = cell; + } + + return cell; +} + +GList * +rspamd_mempool_glist_append(rspamd_mempool_t *pool, GList *l, gpointer p) +{ + GList *cell, *cur; + + cell = rspamd_mempool_alloc(pool, sizeof(*cell)); + cell->next = NULL; + cell->data = p; + + if (l) { + for (cur = l; cur->next != NULL; cur = cur->next) {} + cur->next = cell; + cell->prev = cur; + } + else { + l = cell; + l->prev = NULL; + } + + return l; +} + +gsize rspamd_mempool_get_used_size(rspamd_mempool_t *pool) +{ + return pool->priv->used_memory; +} + +gsize rspamd_mempool_get_wasted_size(rspamd_mempool_t *pool) +{ + return pool->priv->wasted_memory; +} diff --git a/src/libutil/mem_pool.h b/src/libutil/mem_pool.h new file mode 100644 index 0000000..de0fea1 --- /dev/null +++ b/src/libutil/mem_pool.h @@ -0,0 +1,470 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file mem_pool.h + * \brief Memory pools library. + * + * Memory pools library. Library is designed to implement efficient way to + * store data in memory avoiding calling of many malloc/free. It has overhead + * because of fact that objects live in pool for rather long time and are not freed + * immediately after use, but if we know certainly when these objects can be used, we + * can use pool for them + */ + +#ifndef RSPAMD_MEM_POOL_H +#define RSPAMD_MEM_POOL_H + +#include "config.h" + + +#if defined(HAVE_PTHREAD_PROCESS_SHARED) && !defined(DISABLE_PTHREAD_MUTEX) +#include <pthread.h> +#endif + +#ifdef __cplusplus +#define MEMPOOL_STR_FUNC __FUNCTION__ +#else +#define MEMPOOL_STR_FUNC G_STRFUNC +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct f_str_s; + +#ifdef __has_attribute +#if __has_attribute(alloc_size) +#define RSPAMD_ATTR_ALLOC_SIZE(pos) __attribute__((alloc_size(pos))) +#else +#define RSPAMD_ATTR_ALLOC_SIZE(pos) +#endif + +#if __has_attribute(assume_aligned) +#define RSPAMD_ATTR_ALLOC_ALIGN(al) __attribute__((assume_aligned(al))) +#else +#define RSPAMD_ATTR_ALLOC_ALIGN(al) +#endif +#if __has_attribute(returns_nonnull) +#define RSPAMD_ATTR_RETURNS_NONNUL __attribute__((returns_nonnull)) +#else +#define RSPAMD_ATTR_RETURNS_NONNUL +#endif +#else +#define RSPAMD_ATTR_ALLOC_SIZE(pos) +#define RSPAMD_ATTR_ALLOC_ALIGN(al) +#define RSPAMD_ATTR_RETURNS_NONNUL +#endif + +#define MEMPOOL_TAG_LEN 16 +#define MEMPOOL_UID_LEN 16 +/* All pointers are aligned as this variable */ +#define MIN_MEM_ALIGNMENT G_MEM_ALIGN + +/** + * Destructor type definition + */ +typedef void (*rspamd_mempool_destruct_t)(void *ptr); + +/** + * Pool mutex structure + */ +#if !defined(HAVE_PTHREAD_PROCESS_SHARED) || defined(DISABLE_PTHREAD_MUTEX) +typedef struct memory_pool_mutex_s { + gint lock; + pid_t owner; + guint spin; +} rspamd_mempool_mutex_t; +/** + * Rwlock for locking shared memory regions + */ +typedef struct memory_pool_rwlock_s { + rspamd_mempool_mutex_t *__r_lock; /**< read mutex (private) */ + rspamd_mempool_mutex_t *__w_lock; /**< write mutex (private) */ +} rspamd_mempool_rwlock_t; +#else +typedef pthread_mutex_t rspamd_mempool_mutex_t; +typedef pthread_rwlock_t rspamd_mempool_rwlock_t; +#endif + +/** + * Tag to use for logging purposes + */ +struct rspamd_mempool_tag { + gchar tagname[MEMPOOL_TAG_LEN]; /**< readable name */ + gchar uid[MEMPOOL_UID_LEN]; /**< unique id */ +}; + +enum rspamd_mempool_flags { + RSPAMD_MEMPOOL_DEBUG = (1u << 0u), +}; + +/** + * Memory pool type + */ +struct rspamd_mempool_entry_point; +struct rspamd_mutex_s; +struct rspamd_mempool_specific; +typedef struct memory_pool_s { + struct rspamd_mempool_specific *priv; + struct rspamd_mempool_tag tag; /**< memory pool tag */ +} rspamd_mempool_t; + +/** + * Statistics structure + */ +typedef struct memory_pool_stat_s { + guint pools_allocated; /**< total number of allocated pools */ + guint pools_freed; /**< number of freed pools */ + guint bytes_allocated; /**< bytes that are allocated with pool allocator */ + guint chunks_allocated; /**< number of chunks that are allocated */ + guint shared_chunks_allocated; /**< shared chunks allocated */ + guint chunks_freed; /**< chunks freed */ + guint oversized_chunks; /**< oversized chunks */ + guint fragmented_size; /**< fragmentation size */ +} rspamd_mempool_stat_t; + + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ +rspamd_mempool_t *rspamd_mempool_new_(gsize size, const gchar *tag, gint flags, + const gchar *loc); + +#define rspamd_mempool_new(size, tag, flags) \ + rspamd_mempool_new_((size), (tag), (flags), G_STRLOC) +#define rspamd_mempool_new_default(tag, flags) \ + rspamd_mempool_new_(rspamd_mempool_suggest_size_(G_STRLOC), (tag), (flags), G_STRLOC) + +/** + * Get memory from pool + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void *rspamd_mempool_alloc_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) + RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL; +/** + * Allocates array handling potential integer overflow + * @param pool + * @param nmemb + * @param size + * @param alignment + * @param loc + * @return + */ +void *rspamd_mempool_alloc_array_(rspamd_mempool_t *pool, gsize nmemb, gsize size, gsize alignment, const gchar *loc) + RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL; +#define rspamd_mempool_alloc(pool, size) \ + rspamd_mempool_alloc_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC)) +#define rspamd_mempool_alloc_array(pool, nmemb, size) \ + rspamd_mempool_alloc_array_((pool), (nmemb), (size), MIN_MEM_ALIGNMENT, (G_STRLOC)) +#define rspamd_mempool_alloc_array_type(pool, nmemb, type) \ + (type *) rspamd_mempool_alloc_array_((pool), (nmemb), sizeof(type), MIN_MEM_ALIGNMENT, (G_STRLOC)) +#define rspamd_mempool_alloc_type(pool, type) \ + (type *) (rspamd_mempool_alloc_((pool), sizeof(type), \ + MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC))) +#define rspamd_mempool_alloc_buffer(pool, buflen) \ + (char *) (rspamd_mempool_alloc_((pool), sizeof(char) * (buflen), MIN_MEM_ALIGNMENT, (G_STRLOC))) +/** + * Notify external memory usage for memory pool + * @param pool + * @param size + * @param loc + */ +void rspamd_mempool_notify_alloc_(rspamd_mempool_t *pool, gsize size, const gchar *loc); +#define rspamd_mempool_notify_alloc(pool, size) \ + rspamd_mempool_notify_alloc_((pool), (size), (G_STRLOC)) + +/** + * Get memory and set it to zero + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void *rspamd_mempool_alloc0_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) + RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL; +#define rspamd_mempool_alloc0(pool, size) \ + rspamd_mempool_alloc0_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC)) +#define rspamd_mempool_alloc0_type(pool, type) \ + (type *) (rspamd_mempool_alloc0_((pool), sizeof(type), \ + MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC))) + +/** + * Make a copy of string in pool + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ +gchar *rspamd_mempool_strdup_(rspamd_mempool_t *pool, const gchar *src, const gchar *loc) + RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT); +#define rspamd_mempool_strdup(pool, src) \ + rspamd_mempool_strdup_((pool), (src), (G_STRLOC)) +gchar *rspamd_mempool_strdup_len_(rspamd_mempool_t *pool, const gchar *src, gsize len, const gchar *loc) + RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT); +#define rspamd_mempool_strdup_len(pool, src, len) \ + rspamd_mempool_strdup_len_((pool), (src), (len), (G_STRLOC)) + +struct f_str_tok; + +/** + * Make a copy of fixed string token in pool as null terminated string + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ +gchar *rspamd_mempool_ftokdup_(rspamd_mempool_t *pool, + const struct f_str_tok *src, + const gchar *loc) + RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT); +#define rspamd_mempool_ftokdup(pool, src) \ + rspamd_mempool_ftokdup_((pool), (src), (G_STRLOC)) + +/** + * Allocate piece of shared memory + * @param pool memory pool object + * @param size bytes to allocate + */ +void *rspamd_mempool_alloc_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) + RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL; +#define rspamd_mempool_alloc_shared(pool, size) \ + rspamd_mempool_alloc_shared_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC)) +#define rspamd_mempool_alloc_shared_type(pool, type) \ + (type *) (rspamd_mempool_alloc_shared_((pool), sizeof(type), \ + MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC))) + +void *rspamd_mempool_alloc0_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc) + RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL; +#define rspamd_mempool_alloc0_shared(pool, size) \ + rspamd_mempool_alloc0_shared_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC)) +#define rspamd_mempool_alloc0_shared_type(pool, type) \ + (type *) (rspamd_mempool_alloc0_shared_((pool), sizeof(type), \ + MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC))) + +/** + * Add destructor callback to pool + * @param pool memory pool object + * @param func pointer to function-destructor + * @param data pointer to data that would be passed to destructor + */ +void rspamd_mempool_add_destructor_full(rspamd_mempool_t *pool, + rspamd_mempool_destruct_t func, + void *data, + const gchar *function, + const gchar *line); + +/* Macros for common usage */ +#define rspamd_mempool_add_destructor(pool, func, data) \ + rspamd_mempool_add_destructor_full(pool, func, data, (MEMPOOL_STR_FUNC), (G_STRLOC)) + +/** + * Replace destructor callback to pool for specified pointer + * @param pool memory pool object + * @param func pointer to function-destructor + * @param old_data pointer to old data + * @param new_data pointer to data that would be passed to destructor + */ +void rspamd_mempool_replace_destructor(rspamd_mempool_t *pool, + rspamd_mempool_destruct_t func, + void *old_data, void *new_data); + +/** + * Calls all destructors associated with the specific memory pool without removing + * of the pool itself + * @param pool + */ +void rspamd_mempool_destructors_enforce(rspamd_mempool_t *pool); + +/** + * Delete pool, free all its chunks and call destructors chain + * @param pool memory pool object + */ +void rspamd_mempool_delete(rspamd_mempool_t *pool); + +/** + * Get new mutex from pool (allocated in shared memory) + * @param pool memory pool object + * @return mutex object + */ +rspamd_mempool_mutex_t *rspamd_mempool_get_mutex(rspamd_mempool_t *pool); + +/** + * Lock mutex + * @param mutex mutex to lock + */ +void rspamd_mempool_lock_mutex(rspamd_mempool_mutex_t *mutex); + +/** + * Unlock mutex + * @param mutex mutex to unlock + */ +void rspamd_mempool_unlock_mutex(rspamd_mempool_mutex_t *mutex); + +/** + * Create new rwlock and place it in shared memory + * @param pool memory pool object + * @return rwlock object + */ +rspamd_mempool_rwlock_t *rspamd_mempool_get_rwlock(rspamd_mempool_t *pool); + +/** + * Acquire read lock + * @param lock rwlock object + */ +void rspamd_mempool_rlock_rwlock(rspamd_mempool_rwlock_t *lock); + +/** + * Acquire write lock + * @param lock rwlock object + */ +void rspamd_mempool_wlock_rwlock(rspamd_mempool_rwlock_t *lock); + +/** + * Release read lock + * @param lock rwlock object + */ +void rspamd_mempool_runlock_rwlock(rspamd_mempool_rwlock_t *lock); + +/** + * Release write lock + * @param lock rwlock object + */ +void rspamd_mempool_wunlock_rwlock(rspamd_mempool_rwlock_t *lock); + +/** + * Get pool allocator statistics + * @param st stat pool struct + */ +void rspamd_mempool_stat(rspamd_mempool_stat_t *st); + +/** + * Reset memory pool stat + */ +void rspamd_mempool_stat_reset(void); + +/** + * Get optimal pool size based on page size for this system + * @return size of memory page in system + */ +#define rspamd_mempool_suggest_size() rspamd_mempool_suggest_size_(G_STRLOC) + +gsize rspamd_mempool_suggest_size_(const char *loc); + +gsize rspamd_mempool_get_used_size(rspamd_mempool_t *pool); +gsize rspamd_mempool_get_wasted_size(rspamd_mempool_t *pool); + +/** + * Set memory pool variable + * @param pool memory pool object + * @param name name of variable + * @param gpointer value of variable + * @param destructor pointer to function-destructor + */ +void rspamd_mempool_set_variable(rspamd_mempool_t *pool, + const gchar *name, + gpointer value, + rspamd_mempool_destruct_t destructor); + +/** + * Get memory pool variable + * @param pool memory pool object + * @param name name of variable + * @return NULL or pointer to variable data + */ +gpointer rspamd_mempool_get_variable(rspamd_mempool_t *pool, + const gchar *name); +/** + * Steal memory pool variable + * @param pool + * @param name + * @return + */ +gpointer rspamd_mempool_steal_variable(rspamd_mempool_t *pool, + const gchar *name); + +/** + * Removes variable from memory pool + * @param pool memory pool object + * @param name name of variable + */ +void rspamd_mempool_remove_variable(rspamd_mempool_t *pool, + const gchar *name); + +/** + * Prepend element to a list creating it in the memory pool + * @param l + * @param p + * @return + */ +GList *rspamd_mempool_glist_prepend(rspamd_mempool_t *pool, + GList *l, gpointer p) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Append element to a list creating it in the memory pool + * @param l + * @param p + * @return + */ +GList *rspamd_mempool_glist_append(rspamd_mempool_t *pool, + GList *l, gpointer p) G_GNUC_WARN_UNUSED_RESULT; + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +#include <stdexcept> /* For std::runtime_error */ + +namespace rspamd { + +template<class T> +class mempool_allocator { +public: + typedef T value_type; + + mempool_allocator() = delete; + template<class U> + mempool_allocator(const mempool_allocator<U> &other) + : pool(other.pool) + { + } + mempool_allocator(rspamd_mempool_t *_pool) + : pool(_pool) + { + } + [[nodiscard]] constexpr T *allocate(std::size_t n) + { + if (G_MAXSIZE / 2 / sizeof(T) > n) { + throw std::runtime_error("integer overflow"); + } + return reinterpret_cast<T *>(rspamd_mempool_alloc(pool, n * sizeof(T))); + } + constexpr void deallocate(T *p, std::size_t n) + { + /* Do nothing */ + } + +private: + rspamd_mempool_t *pool; +}; + +}// namespace rspamd +#endif + +#endif diff --git a/src/libutil/mem_pool_internal.h b/src/libutil/mem_pool_internal.h new file mode 100644 index 0000000..4fea839 --- /dev/null +++ b/src/libutil/mem_pool_internal.h @@ -0,0 +1,92 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MEM_POOL_INTERNAL_H +#define RSPAMD_MEM_POOL_INTERNAL_H + +/* + * Internal memory pool stuff + */ + +#define align_ptr(p, a) \ + ((guint8 *) ((uintptr_t) (p) + ((-(intptr_t) (p)) & ((a) -1)))) + +enum rspamd_mempool_chain_type { + RSPAMD_MEMPOOL_NORMAL = 0, + RSPAMD_MEMPOOL_SHARED, + RSPAMD_MEMPOOL_MAX +}; +#define ENTRY_LEN 128 +#define ENTRY_NELTS 64 + +struct entry_elt { + guint32 fragmentation; + guint32 leftover; +}; + +struct rspamd_mempool_entry_point { + gchar src[ENTRY_LEN]; + guint32 cur_suggestion; + guint32 cur_elts; + guint32 cur_vars; + struct entry_elt elts[ENTRY_NELTS]; +}; + +/** + * Destructors list item structure + */ +struct _pool_destructors { + rspamd_mempool_destruct_t func; /**< pointer to destructor */ + void *data; /**< data to free */ + const gchar *function; /**< function from which this destructor was added */ + const gchar *loc; /**< line number */ + struct _pool_destructors *next; +}; + + +struct rspamd_mempool_variable { + gpointer data; + rspamd_mempool_destruct_t dtor; +}; + +KHASH_INIT(rspamd_mempool_vars_hash, + guint32, struct rspamd_mempool_variable, 1, + kh_int_hash_func, kh_int_hash_equal); + +struct rspamd_mempool_specific { + struct _pool_chain *pools[RSPAMD_MEMPOOL_MAX]; + struct _pool_destructors *dtors_head, *dtors_tail; + GPtrArray *trash_stack; + khash_t(rspamd_mempool_vars_hash) * variables; + struct rspamd_mempool_entry_point *entry; + gsize elt_len; /**< size of an element */ + gsize used_memory; + guint wasted_memory; + gint flags; +}; + +/** + * Pool page structure + */ +struct _pool_chain { + guint8 *begin; /**< begin of pool chain block */ + guint8 *pos; /**< current start of free space in block */ + gsize slice_size; /**< length of block */ + struct _pool_chain *next; +}; + + +#endif diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c new file mode 100644 index 0000000..630b1f9 --- /dev/null +++ b/src/libutil/multipattern.c @@ -0,0 +1,821 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "libutil/multipattern.h" +#include "libutil/str_util.h" +#include "libcryptobox/cryptobox.h" + +#ifdef WITH_HYPERSCAN +#include "logger.h" +#include "unix-std.h" +#include "hs.h" +#include "libserver/hyperscan_tools.h" +#endif +#include "acism.h" +#include "libutil/regexp.h" +#include <stdalign.h> + +#define MAX_SCRATCH 4 + +enum rspamd_hs_check_state { + RSPAMD_HS_UNCHECKED = 0, + RSPAMD_HS_SUPPORTED, + RSPAMD_HS_UNSUPPORTED +}; + +static const char *hs_cache_dir = NULL; +static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED; + + +struct RSPAMD_ALIGNED(64) rspamd_multipattern { +#ifdef WITH_HYPERSCAN + rspamd_cryptobox_hash_state_t hash_state; + rspamd_hyperscan_t *hs_db; + hs_scratch_t *scratch[MAX_SCRATCH]; + GArray *hs_pats; + GArray *hs_ids; + GArray *hs_flags; + guint scratch_used; +#endif + ac_trie_t *t; + GArray *pats; + GArray *res; + + gboolean compiled; + guint cnt; + enum rspamd_multipattern_flags flags; +}; + +static GQuark +rspamd_multipattern_quark(void) +{ + return g_quark_from_static_string("multipattern"); +} + +static inline gboolean +rspamd_hs_check(void) +{ +#ifdef WITH_HYPERSCAN + if (G_UNLIKELY(hs_suitable_cpu == RSPAMD_HS_UNCHECKED)) { + if (hs_valid_platform() == HS_SUCCESS) { + hs_suitable_cpu = RSPAMD_HS_SUPPORTED; + } + else { + hs_suitable_cpu = RSPAMD_HS_UNSUPPORTED; + } + } +#endif + + return hs_suitable_cpu == RSPAMD_HS_SUPPORTED; +} + +void rspamd_multipattern_library_init(const gchar *cache_dir) +{ + hs_cache_dir = cache_dir; +#ifdef WITH_HYPERSCAN + rspamd_hs_check(); +#endif +} + +#ifdef WITH_HYPERSCAN +static gchar * +rspamd_multipattern_escape_tld_hyperscan(const gchar *pattern, gsize slen, + gsize *dst_len) +{ + gsize len; + const gchar *p, *prefix, *suffix; + gchar *res; + + /* + * We understand the following cases + * 1) blah -> .blah\b + * 2) *.blah -> ..*\\.blah\b|$ + * 3) ??? + */ + + if (pattern[0] == '*') { + p = strchr(pattern, '.'); + + if (p == NULL) { + /* XXX: bad */ + p = pattern; + } + else { + p++; + } + + prefix = "\\."; + len = slen + strlen(prefix); + } + else { + prefix = "\\."; + p = pattern; + len = slen + strlen(prefix); + } + + suffix = "(:?\\b|$)"; + len += strlen(suffix); + + res = g_malloc(len + 1); + slen = rspamd_strlcpy(res, prefix, len + 1); + slen += rspamd_strlcpy(res + slen, p, len + 1 - slen); + slen += rspamd_strlcpy(res + slen, suffix, len + 1 - slen); + + *dst_len = slen; + + return res; +} + +#endif +static gchar * +rspamd_multipattern_escape_tld_acism(const gchar *pattern, gsize len, + gsize *dst_len) +{ + gsize dlen, slen; + const gchar *p, *prefix; + gchar *res; + + /* + * We understand the following cases + * 1) blah -> \\.blah + * 2) *.blah -> \\..*\\.blah + * 3) ??? + */ + slen = len; + + if (pattern[0] == '*') { + dlen = slen; + p = memchr(pattern, '.', len); + + if (p == NULL) { + /* XXX: bad */ + p = pattern; + } + else { + p++; + } + + dlen -= p - pattern; + prefix = "."; + dlen++; + } + else { + dlen = slen + 1; + prefix = "."; + p = pattern; + } + + res = g_malloc(dlen + 1); + slen = strlen(prefix); + memcpy(res, prefix, slen); + rspamd_strlcpy(res + slen, p, dlen - slen + 1); + + *dst_len = dlen; + + return res; +} + +/* + * Escapes special characters from specific pattern + */ +static gchar * +rspamd_multipattern_pattern_filter(const gchar *pattern, gsize len, + enum rspamd_multipattern_flags flags, + gsize *dst_len) +{ + gchar *ret = NULL; + gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII; + + if (flags & RSPAMD_MULTIPATTERN_UTF8) { + gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF; + } + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + if (flags & RSPAMD_MULTIPATTERN_TLD) { + gchar *tmp; + gsize tlen; + tmp = rspamd_multipattern_escape_tld_hyperscan(pattern, len, &tlen); + + ret = rspamd_str_regexp_escape(tmp, tlen, dst_len, + gl_flags | RSPAMD_REGEXP_ESCAPE_RE); + g_free(tmp); + } + else if (flags & RSPAMD_MULTIPATTERN_RE) { + ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags | RSPAMD_REGEXP_ESCAPE_RE); + } + else if (flags & RSPAMD_MULTIPATTERN_GLOB) { + ret = rspamd_str_regexp_escape(pattern, len, dst_len, + gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB); + } + else { + ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags); + } + + return ret; + } +#endif + + if (flags & RSPAMD_MULTIPATTERN_TLD) { + ret = rspamd_multipattern_escape_tld_acism(pattern, len, dst_len); + } + else if (flags & RSPAMD_MULTIPATTERN_RE) { + ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags | RSPAMD_REGEXP_ESCAPE_RE); + } + else if (flags & RSPAMD_MULTIPATTERN_GLOB) { + ret = rspamd_str_regexp_escape(pattern, len, dst_len, + gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB); + } + else { + ret = malloc(len + 1); + *dst_len = rspamd_strlcpy(ret, pattern, len + 1); + } + + return ret; +} + +struct rspamd_multipattern * +rspamd_multipattern_create(enum rspamd_multipattern_flags flags) +{ + struct rspamd_multipattern *mp; + + /* Align due to blake2b state */ + (void) !posix_memalign((void **) &mp, RSPAMD_ALIGNOF(struct rspamd_multipattern), + sizeof(*mp)); + g_assert(mp != NULL); + memset(mp, 0, sizeof(*mp)); + mp->flags = flags; + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + mp->hs_pats = g_array_new(FALSE, TRUE, sizeof(gchar *)); + mp->hs_flags = g_array_new(FALSE, TRUE, sizeof(gint)); + mp->hs_ids = g_array_new(FALSE, TRUE, sizeof(gint)); + rspamd_cryptobox_hash_init(&mp->hash_state, NULL, 0); + + return mp; + } +#endif + + mp->pats = g_array_new(FALSE, TRUE, sizeof(ac_trie_pat_t)); + + return mp; +} + +struct rspamd_multipattern * +rspamd_multipattern_create_sized(guint npatterns, + enum rspamd_multipattern_flags flags) +{ + struct rspamd_multipattern *mp; + + /* Align due to blake2b state */ + (void) !posix_memalign((void **) &mp, RSPAMD_ALIGNOF(struct rspamd_multipattern), sizeof(*mp)); + g_assert(mp != NULL); + memset(mp, 0, sizeof(*mp)); + mp->flags = flags; + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + mp->hs_pats = g_array_sized_new(FALSE, TRUE, sizeof(gchar *), npatterns); + mp->hs_flags = g_array_sized_new(FALSE, TRUE, sizeof(gint), npatterns); + mp->hs_ids = g_array_sized_new(FALSE, TRUE, sizeof(gint), npatterns); + rspamd_cryptobox_hash_init(&mp->hash_state, NULL, 0); + + return mp; + } +#endif + + mp->pats = g_array_sized_new(FALSE, TRUE, sizeof(ac_trie_pat_t), npatterns); + + return mp; +} + +void rspamd_multipattern_add_pattern(struct rspamd_multipattern *mp, + const gchar *pattern, gint flags) +{ + g_assert(pattern != NULL); + + rspamd_multipattern_add_pattern_len(mp, pattern, strlen(pattern), flags); +} + +void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp, + const gchar *pattern, gsize patlen, gint flags) +{ + gsize dlen; + + g_assert(pattern != NULL); + g_assert(mp != NULL); + g_assert(!mp->compiled); + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + gchar *np; + gint fl = HS_FLAG_SOM_LEFTMOST; + gint adjusted_flags = mp->flags | flags; + + if (adjusted_flags & RSPAMD_MULTIPATTERN_ICASE) { + fl |= HS_FLAG_CASELESS; + } + if (adjusted_flags & RSPAMD_MULTIPATTERN_UTF8) { + if (adjusted_flags & RSPAMD_MULTIPATTERN_TLD) { + fl |= HS_FLAG_UTF8; + } + else { + fl |= HS_FLAG_UTF8 | HS_FLAG_UCP; + } + } + if (adjusted_flags & RSPAMD_MULTIPATTERN_DOTALL) { + fl |= HS_FLAG_DOTALL; + } + if (adjusted_flags & RSPAMD_MULTIPATTERN_SINGLEMATCH) { + fl |= HS_FLAG_SINGLEMATCH; + fl &= ~HS_FLAG_SOM_LEFTMOST; /* According to hyperscan docs */ + } + if (adjusted_flags & RSPAMD_MULTIPATTERN_NO_START) { + fl &= ~HS_FLAG_SOM_LEFTMOST; + } + + g_array_append_val(mp->hs_flags, fl); + np = rspamd_multipattern_pattern_filter(pattern, patlen, flags, &dlen); + g_array_append_val(mp->hs_pats, np); + fl = mp->cnt; + g_array_append_val(mp->hs_ids, fl); + rspamd_cryptobox_hash_update(&mp->hash_state, np, dlen); + + mp->cnt++; + + return; + } +#endif + ac_trie_pat_t pat; + + pat.ptr = rspamd_multipattern_pattern_filter(pattern, patlen, flags, &dlen); + pat.len = dlen; + + g_array_append_val(mp->pats, pat); + + mp->cnt++; +} + +struct rspamd_multipattern * +rspamd_multipattern_create_full(const gchar **patterns, + guint npatterns, enum rspamd_multipattern_flags flags) +{ + struct rspamd_multipattern *mp; + guint i; + + g_assert(npatterns > 0); + g_assert(patterns != NULL); + + mp = rspamd_multipattern_create_sized(npatterns, flags); + + for (i = 0; i < npatterns; i++) { + rspamd_multipattern_add_pattern(mp, patterns[i], flags); + } + + return mp; +} + +#ifdef WITH_HYPERSCAN +static gboolean +rspamd_multipattern_try_load_hs(struct rspamd_multipattern *mp, + const guchar *hash) +{ + gchar fp[PATH_MAX]; + + if (hs_cache_dir == NULL) { + return FALSE; + } + + rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hsmp", hs_cache_dir, + (gint) rspamd_cryptobox_HASHBYTES / 2, hash); + mp->hs_db = rspamd_hyperscan_maybe_load(fp, 0); + + return mp->hs_db != NULL; +} + +static void +rspamd_multipattern_try_save_hs(struct rspamd_multipattern *mp, + const guchar *hash) +{ + gchar fp[PATH_MAX], np[PATH_MAX]; + char *bytes = NULL; + gsize len; + gint fd; + + if (hs_cache_dir == NULL) { + return; + } + + rspamd_snprintf(fp, sizeof(fp), "%s%shsmp-XXXXXXXXXXXXX", G_DIR_SEPARATOR_S, + hs_cache_dir); + + if ((fd = g_mkstemp_full(fp, O_CREAT | O_EXCL | O_WRONLY, 00644)) != -1) { + int ret; + if ((ret = hs_serialize_database(rspamd_hyperscan_get_database(mp->hs_db), &bytes, &len)) == HS_SUCCESS) { + if (write(fd, bytes, len) == -1) { + msg_warn("cannot write hyperscan cache to %s: %s", + fp, strerror(errno)); + unlink(fp); + free(bytes); + } + else { + free(bytes); + fsync(fd); + + rspamd_snprintf(np, sizeof(np), "%s/%*xs.hsmp", hs_cache_dir, + (gint) rspamd_cryptobox_HASHBYTES / 2, hash); + + if (rename(fp, np) == -1) { + msg_warn("cannot rename hyperscan cache from %s to %s: %s", + fp, np, strerror(errno)); + unlink(fp); + } + else { + rspamd_hyperscan_notice_known(np); + } + } + } + else { + msg_warn("cannot serialize hyperscan cache to %s: error code %d", + fp, ret); + unlink(fp); + } + + + close(fd); + } + else { + msg_warn("cannot open a temp file %s to write hyperscan cache: %s", fp, strerror(errno)); + } +} +#endif + +gboolean +rspamd_multipattern_compile(struct rspamd_multipattern *mp, GError **err) +{ + g_assert(mp != NULL); + g_assert(!mp->compiled); + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + guint i; + hs_platform_info_t plt; + hs_compile_error_t *hs_errors; + guchar hash[rspamd_cryptobox_HASHBYTES]; + + if (mp->cnt > 0) { + g_assert(hs_populate_platform(&plt) == HS_SUCCESS); + rspamd_cryptobox_hash_update(&mp->hash_state, (void *) &plt, sizeof(plt)); + rspamd_cryptobox_hash_final(&mp->hash_state, hash); + + if (!rspamd_multipattern_try_load_hs(mp, hash)) { + hs_database_t *db = NULL; + + if (hs_compile_multi((const char *const *) mp->hs_pats->data, + (const unsigned int *) mp->hs_flags->data, + (const unsigned int *) mp->hs_ids->data, + mp->cnt, + HS_MODE_BLOCK, + &plt, + &db, + &hs_errors) != HS_SUCCESS) { + + g_set_error(err, rspamd_multipattern_quark(), EINVAL, + "cannot create tree of regexp when processing '%s': %s", + g_array_index(mp->hs_pats, char *, hs_errors->expression), + hs_errors->message); + hs_free_compile_error(hs_errors); + + return FALSE; + } + + if (hs_cache_dir != NULL) { + char fpath[PATH_MAX]; + rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmp", hs_cache_dir, + (gint) rspamd_cryptobox_HASHBYTES / 2, hash); + mp->hs_db = rspamd_hyperscan_from_raw_db(db, fpath); + } + else { + /* Should not happen in the real life */ + mp->hs_db = rspamd_hyperscan_from_raw_db(db, NULL); + } + + rspamd_multipattern_try_save_hs(mp, hash); + } + + for (i = 0; i < MAX_SCRATCH; i++) { + mp->scratch[i] = NULL; + } + + for (i = 0; i < MAX_SCRATCH; i++) { + int ret; + + if ((ret = hs_alloc_scratch(rspamd_hyperscan_get_database(mp->hs_db), &mp->scratch[i])) != HS_SUCCESS) { + msg_err("cannot allocate scratch space for hyperscan: error code %d", ret); + + /* Clean all scratches that are non-NULL */ + for (int ii = 0; ii < MAX_SCRATCH; ii++) { + if (mp->scratch[ii] != NULL) { + hs_free_scratch(mp->scratch[ii]); + } + } + g_set_error(err, rspamd_multipattern_quark(), EINVAL, + "cannot allocate scratch space for hyperscan: error code %d", ret); + + rspamd_hyperscan_free(mp->hs_db, true); + mp->hs_db = NULL; + + return FALSE; + } + } + } + + mp->compiled = TRUE; + + return TRUE; + } +#endif + + if (mp->cnt > 0) { + + if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB | RSPAMD_MULTIPATTERN_RE)) { + /* Fallback to pcre... */ + rspamd_regexp_t *re; + mp->res = g_array_sized_new(FALSE, TRUE, + sizeof(rspamd_regexp_t *), mp->cnt); + + for (guint i = 0; i < mp->cnt; i++) { + const ac_trie_pat_t *pat; + const gchar *pat_flags = NULL; + + if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) { + pat_flags = "u"; + } + + pat = &g_array_index(mp->pats, ac_trie_pat_t, i); + re = rspamd_regexp_new(pat->ptr, pat_flags, err); + + if (re == NULL) { + return FALSE; + } + + g_array_append_val(mp->res, re); + } + } + else { + mp->t = acism_create((const ac_trie_pat_t *) mp->pats->data, mp->cnt); + } + } + + mp->compiled = TRUE; + + return TRUE; +} + +struct rspamd_multipattern_cbdata { + struct rspamd_multipattern *mp; + const gchar *in; + gsize len; + rspamd_multipattern_cb_t cb; + gpointer ud; + guint nfound; + gint ret; +}; + +#ifdef WITH_HYPERSCAN +static gint +rspamd_multipattern_hs_cb(unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void *ud) +{ + struct rspamd_multipattern_cbdata *cbd = ud; + gint ret = 0; + + if (to > 0) { + + if (from == HS_OFFSET_PAST_HORIZON) { + from = 0; + } + + ret = cbd->cb(cbd->mp, id, from, to, cbd->in, cbd->len, cbd->ud); + + cbd->nfound++; + cbd->ret = ret; + } + + return ret; +} +#endif + +static gint +rspamd_multipattern_acism_cb(int strnum, int textpos, void *context) +{ + struct rspamd_multipattern_cbdata *cbd = context; + gint ret; + ac_trie_pat_t pat; + + pat = g_array_index(cbd->mp->pats, ac_trie_pat_t, strnum); + ret = cbd->cb(cbd->mp, strnum, textpos - pat.len, + textpos, cbd->in, cbd->len, cbd->ud); + + cbd->nfound++; + cbd->ret = ret; + + return ret; +} + +gint rspamd_multipattern_lookup(struct rspamd_multipattern *mp, + const gchar *in, gsize len, rspamd_multipattern_cb_t cb, + gpointer ud, guint *pnfound) +{ + struct rspamd_multipattern_cbdata cbd; + gint ret = 0; + + g_assert(mp != NULL); + + if (mp->cnt == 0 || !mp->compiled || len == 0) { + return 0; + } + + cbd.mp = mp; + cbd.in = in; + cbd.len = len; + cbd.cb = cb; + cbd.ud = ud; + cbd.nfound = 0; + cbd.ret = 0; + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + hs_scratch_t *scr = NULL; + guint i; + + for (i = 0; i < MAX_SCRATCH; i++) { + if (!(mp->scratch_used & (1 << i))) { + mp->scratch_used |= (1 << i); + scr = mp->scratch[i]; + break; + } + } + + g_assert(scr != NULL); + + ret = hs_scan(rspamd_hyperscan_get_database(mp->hs_db), in, len, 0, scr, + rspamd_multipattern_hs_cb, &cbd); + + mp->scratch_used &= ~(1 << i); + + if (ret == HS_SUCCESS) { + ret = 0; + } + else if (ret == HS_SCAN_TERMINATED) { + ret = cbd.ret; + } + + if (pnfound) { + *pnfound = cbd.nfound; + } + + return ret; + } +#endif + + gint state = 0; + + if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB | RSPAMD_MULTIPATTERN_RE)) { + /* Terribly inefficient, but who cares - just use hyperscan */ + for (guint i = 0; i < mp->cnt; i++) { + rspamd_regexp_t *re = g_array_index(mp->res, rspamd_regexp_t *, i); + const gchar *start = NULL, *end = NULL; + + while (rspamd_regexp_search(re, + in, + len, + &start, + &end, + TRUE, + NULL)) { + if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) { + goto out; + } + } + } + out: + ret = cbd.ret; + + if (pnfound) { + *pnfound = cbd.nfound; + } + } + else { + /* Plain trie */ + ret = acism_lookup(mp->t, in, len, rspamd_multipattern_acism_cb, &cbd, + &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE); + + if (pnfound) { + *pnfound = cbd.nfound; + } + } + + return ret; +} + + +void rspamd_multipattern_destroy(struct rspamd_multipattern *mp) +{ + guint i; + + if (mp) { +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + gchar *p; + + if (mp->compiled && mp->cnt > 0) { + for (i = 0; i < MAX_SCRATCH; i++) { + hs_free_scratch(mp->scratch[i]); + } + + if (mp->hs_db) { + rspamd_hyperscan_free(mp->hs_db, false); + } + } + + for (i = 0; i < mp->cnt; i++) { + p = g_array_index(mp->hs_pats, gchar *, i); + g_free(p); + } + + g_array_free(mp->hs_pats, TRUE); + g_array_free(mp->hs_ids, TRUE); + g_array_free(mp->hs_flags, TRUE); + free(mp); /* Due to posix_memalign */ + + return; + } +#endif + ac_trie_pat_t pat; + + if (mp->compiled && mp->cnt > 0) { + acism_destroy(mp->t); + } + + for (i = 0; i < mp->cnt; i++) { + pat = g_array_index(mp->pats, ac_trie_pat_t, i); + g_free((gchar *) pat.ptr); + } + + g_array_free(mp->pats, TRUE); + + g_free(mp); + } +} + +const gchar * +rspamd_multipattern_get_pattern(struct rspamd_multipattern *mp, + guint index) +{ + g_assert(mp != NULL); + g_assert(index < mp->cnt); + +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check()) { + return g_array_index(mp->hs_pats, gchar *, index); + } +#endif + + ac_trie_pat_t pat; + + pat = g_array_index(mp->pats, ac_trie_pat_t, index); + + return pat.ptr; +} + +guint rspamd_multipattern_get_npatterns(struct rspamd_multipattern *mp) +{ + g_assert(mp != NULL); + + return mp->cnt; +} + +gboolean +rspamd_multipattern_has_hyperscan(void) +{ + return rspamd_hs_check(); +} diff --git a/src/libutil/multipattern.h b/src/libutil/multipattern.h new file mode 100644 index 0000000..9302766 --- /dev/null +++ b/src/libutil/multipattern.h @@ -0,0 +1,173 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_LIBUTIL_MULTIPATTERN_H_ +#define SRC_LIBUTIL_MULTIPATTERN_H_ + +#include "config.h" + +/** + * @file multipattern.h + * + * This file defines structure that acts like a transparent bridge between + * hyperscan and ac-trie + */ + +#ifdef __cplusplus +extern "C" { +#endif + +enum rspamd_multipattern_flags { + RSPAMD_MULTIPATTERN_DEFAULT = 0, + RSPAMD_MULTIPATTERN_ICASE = (1 << 0), + RSPAMD_MULTIPATTERN_UTF8 = (1 << 1), + RSPAMD_MULTIPATTERN_TLD = (1 << 2), + /* Not supported by acism */ + RSPAMD_MULTIPATTERN_GLOB = (1 << 3), + RSPAMD_MULTIPATTERN_RE = (1 << 4), + RSPAMD_MULTIPATTERN_DOTALL = (1 << 5), + RSPAMD_MULTIPATTERN_SINGLEMATCH = (1 << 6), + RSPAMD_MULTIPATTERN_NO_START = (1 << 7), +}; + +struct rspamd_multipattern; +struct rspamd_cryptobox_library_ctx; + +/** + * Called on pattern match + * @param mp multipattern structure + * @param strnum number of pattern matched + * @param textpos position in the text + * @param text input text + * @param len length of input text + * @param context userdata + * @return if 0 then search for another pattern, otherwise return this value to caller + */ +typedef gint (*rspamd_multipattern_cb_t)(struct rspamd_multipattern *mp, + guint strnum, + gint match_start, + gint match_pos, + const gchar *text, + gsize len, + void *context); + +/** + * Init multipart library and set the appropriate cache dir + * @param cache_dir + */ +void rspamd_multipattern_library_init(const gchar *cache_dir); + +/** + * Creates empty multipattern structure + * @param flags + * @return + */ +struct rspamd_multipattern *rspamd_multipattern_create( + enum rspamd_multipattern_flags flags); + +/** + * Creates multipattern with preallocated number of patterns to speed up loading + * @param flags + * @param reserved + * @return + */ +struct rspamd_multipattern *rspamd_multipattern_create_sized(guint reserved, + enum rspamd_multipattern_flags flags); + +/** + * Creates new multipattern structure + * @param patterns vector of null terminated strings + * @param npatterns number of patterns + * @param flags flags applied to all patterns + * @return new multipattern structure + */ +struct rspamd_multipattern *rspamd_multipattern_create_full( + const gchar **patterns, + guint npatterns, + enum rspamd_multipattern_flags flags); + +/** + * Adds new pattern to match engine from zero-terminated string + * @param mp + * @param pattern + */ +void rspamd_multipattern_add_pattern(struct rspamd_multipattern *mp, + const gchar *pattern, gint flags); + +/** + * Adds new pattern from arbitrary string + * @param mp + * @param pattern + * @param patlen + * @param flags + */ +void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp, + const gchar *pattern, gsize patlen, gint flags); + +/** + * Compiles multipattern structure + * @param mp + * @return + */ +gboolean rspamd_multipattern_compile(struct rspamd_multipattern *mp, + GError **err); + +/** + * Lookups for patterns in a text using the specified callback function + * @param mp + * @param in + * @param len + * @param cb if callback returns non-zero, then search is terminated and that value is returned + * @param ud callback data + * @return + */ +gint rspamd_multipattern_lookup(struct rspamd_multipattern *mp, + const gchar *in, gsize len, rspamd_multipattern_cb_t cb, + gpointer ud, guint *pnfound); + +/** + * Get pattern string from multipattern identified by index + * @param mp + * @param index + * @return + */ +const gchar *rspamd_multipattern_get_pattern(struct rspamd_multipattern *mp, + guint index); + +/** + * Returns number of patterns in a multipattern matcher + * @param mp + * @return + */ +guint rspamd_multipattern_get_npatterns(struct rspamd_multipattern *mp); + +/** + * Destroys multipattern structure + * @param mp + */ +void rspamd_multipattern_destroy(struct rspamd_multipattern *mp); + +/** + * Returns TRUE if hyperscan is supported + * @return + */ +gboolean rspamd_multipattern_has_hyperscan(void); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_MULTIPATTERN_H_ */ diff --git a/src/libutil/printf.c b/src/libutil/printf.c new file mode 100644 index 0000000..ba53b56 --- /dev/null +++ b/src/libutil/printf.c @@ -0,0 +1,1097 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright (C) 2002-2015 Igor Sysoev + * Copyright (C) 2011-2015 Nginx, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "printf.h" +#include "str_util.h" +#include "contrib/fpconv/fpconv.h" + +/** + * From FreeBSD libutil code + */ +static const int maxscale = 6; +static const gchar _hex[] = "0123456789abcdef"; +static const gchar _HEX[] = "0123456789ABCDEF"; + +static gchar * +rspamd_humanize_number(gchar *buf, gchar *last, gint64 num, gboolean bytes) +{ + const gchar *prefixes; + int i, r, remainder, sign; + gint64 divisor; + gsize len = last - buf; + + remainder = 0; + + if (!bytes) { + divisor = 1000; + prefixes = "\0\0\0\0k\0\0\0M\0\0\0G\0\0\0T\0\0\0P\0\0\0E"; + } + else { + divisor = 1024; + prefixes = "B\0\0\0KiB\0MiB\0GiB\0TiB\0PiB\0EiB"; + } + +#define SCALE2PREFIX(scale) (&prefixes[(scale) *4]) + + if (num < 0) { + sign = -1; + num = -num; + } + else { + sign = 1; + } + + /* + * Divide the number until it fits the given column. + * If there will be an overflow by the rounding below, + * divide once more. + */ + for (i = 0; i < maxscale && num > divisor; i++) { + remainder = num % divisor; + num /= divisor; + } + + if (remainder == 0 || num > divisor / 2) { + r = rspamd_snprintf(buf, len, "%L%s", + sign * (num + (remainder + 50) / divisor), + SCALE2PREFIX(i)); + } + else { + /* Floating point version */ + r = rspamd_snprintf(buf, len, "%.2f%s", + sign * (num + remainder / (gdouble) divisor), + SCALE2PREFIX(i)); + } + +#undef SCALE2PREFIX + + return buf + r; +} + + +static inline unsigned +rspamd_decimal_digits32(guint32 val) +{ + static const guint32 powers_of_10[] = { + 0, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000}; + unsigned tmp; + +#if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse(&r, val | 1); + tmp = (r + 1) * 1233 >> 12; +#elif defined(__GNUC__) && (__GNUC__ >= 3) + tmp = (32 - __builtin_clz(val | 1U)) * 1233 >> 12; + +#else /* Software version */ + static const unsigned debruijn_tbl[32] = {0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + guint32 v = val | 1; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + tmp = (1 + debruijn_tbl[(v * 0x07C4ACDDU) >> 27]) * 1233 >> 12; +#endif + return tmp - (val < powers_of_10[tmp]) + 1; +} + +static inline unsigned +rspamd_decimal_digits64(guint64 val) +{ + static const guint64 powers_of_10[] = { + 0, + 10ULL, + 100ULL, + 1000ULL, + 10000ULL, + 100000ULL, + 1000000ULL, + 10000000ULL, + 100000000ULL, + 1000000000ULL, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL}; + unsigned tmp; + +#if defined(_MSC_VER) +#if _M_IX86 + unsigned long r = 0; + guint64 m = val | 1; + if (_BitScanReverse(&r, m >> 32)) { + r += 32; + } + else { + _BitScanReverse(&r, m & 0xFFFFFFFF); + } + tmp = (r + 1) * 1233 >> 12; +#else + unsigned long r = 0; + _BitScanReverse64(&r, val | 1); + tmp = (r + 1) * 1233 >> 12; +#endif +#elif defined(__GNUC__) && (__GNUC__ >= 3) + tmp = (64 - __builtin_clzll(val | 1ULL)) * 1233 >> 12; +#else /* Software version */ + static const unsigned debruijn_tbl[32] = {0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + guint32 v = val >> 32; + + if (v) { + v |= 1; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + tmp = 32 + debruijn_tbl[(v * 0x07C4ACDDU) >> 27]; + } + else { + v = val & 0xFFFFFFFF; + v |= 1; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + tmp = debruijn_tbl[(v * 0x07C4ACDDU) >> 27]; + } + + + tmp = (tmp + 1) * 1233 >> 12; +#endif + + return tmp - (val < powers_of_10[tmp]) + 1; +} + +/* + * Idea from https://github.com/miloyip/itoa-benchmark: + * Uses lookup table (LUT) of digit pairs for division/modulo of 100. + * + * Mentioned in: + * https://www.slideshare.net/andreialexandrescu1/three-optimization-tips-for-c-15708507 + */ + +static const char int_lookup_table[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', + '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', + '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', + '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', + '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', + '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', + '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', + '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', + '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', + '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', + '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'}; + +static inline guint +rspamd_uint32_print(guint32 in, gchar *out) +{ + guint ndigits = rspamd_decimal_digits32(in); + gchar *p; + + p = out + ndigits - 1; + + while (in >= 100) { + unsigned idx = (in % 100) * 2; + + /* Do two digits at once */ + *p-- = int_lookup_table[idx + 1]; + *p-- = int_lookup_table[idx]; + + in /= 100; + } + + if (in < 10) { + *p = ((char) in) + '0'; + } + else { + unsigned idx = in * 2; + + *p-- = int_lookup_table[idx + 1]; + *p = int_lookup_table[idx]; + } + + return ndigits; +} + +static inline guint +rspamd_uint64_print(guint64 in, gchar *out) +{ + guint ndigits = rspamd_decimal_digits64(in); + guint32 v32; + gchar *p; + + p = out + ndigits - 1; + + while (in >= 100000000) { + v32 = (guint32) (in % 100000000); + guint32 a, b, a1, a2, b1, b2; + + /* Initial spill */ + a = v32 / 10000; + b = v32 % 10000; + a1 = (a / 100) * 2; + a2 = (a % 100) * 2; + b1 = (b / 100) * 2; + b2 = (b % 100) * 2; + + /* Fill 8 digits at once */ + *p-- = int_lookup_table[b2 + 1]; + *p-- = int_lookup_table[b2]; + *p-- = int_lookup_table[b1 + 1]; + *p-- = int_lookup_table[b1]; + *p-- = int_lookup_table[a2 + 1]; + *p-- = int_lookup_table[a2]; + *p-- = int_lookup_table[a1 + 1]; + *p-- = int_lookup_table[a1]; + + in /= 100000000; + } + + /* Remaining 32 bit */ + v32 = (guint32) in; + + while (v32 >= 100) { + unsigned idx = (v32 % 100) << 1; + + /* Do 2 digits at once */ + *p-- = int_lookup_table[idx + 1]; + *p-- = int_lookup_table[idx]; + + v32 /= 100; + } + + if (v32 < 10) { + *p = ((char) v32) + '0'; + } + else { + unsigned idx = v32 * 2; + + *p-- = int_lookup_table[idx + 1]; + *p = int_lookup_table[idx]; + } + + return ndigits; +} + +static inline int +rspamd_ffsll(long long n) +{ +#ifdef __has_builtin +#if __has_builtin(__builtin_ffsll) + return __builtin_ffsll(n); +#elif __has_builtin(__builtin_ctzll) + if (n == 0) { + return 0; + } + + return __builtin_ctzll(n) + 1; +#endif +#endif /* __has_builtin */ + +#ifdef HAVE_FFSL + return ffsl(n); +#else + if (n == 0) { + return 0; + } + + int bit; + for (bit = 1; !(n & 1); bit++) { + n = ((unsigned long long) n) >> 1; + } + return bit; +#endif +} + +static gchar * +rspamd_sprintf_num(gchar *buf, gchar *last, guint64 ui64, gchar zero, + guint hexadecimal, guint binary, guint width) +{ + gchar *p, temp[64]; + size_t len = 0; + + if (G_LIKELY(hexadecimal == 0 && binary == 0)) { + p = temp; + + if (ui64 < G_MAXUINT32) { + len = rspamd_uint32_print((guint32) ui64, temp); + } + else { + len = rspamd_uint64_print(ui64, temp); + } + } + else if (hexadecimal == 1) { + p = temp + sizeof(temp); + do { + *--p = _hex[(guint32) (ui64 & 0xf)]; + } while (ui64 >>= 4); + + len = (temp + sizeof(temp)) - p; + } + else if (hexadecimal == 2) { /* hexadecimal == 2 */ + p = temp + sizeof(temp); + do { + *--p = _HEX[(guint32) (ui64 & 0xf)]; + } while (ui64 >>= 4); + + len = (temp + sizeof(temp)) - p; + } + else if (binary > 0) { + int first_bit = MIN(sizeof(temp), rspamd_ffsll(ui64)); + + p = temp + sizeof(temp); + for (int i = 0; i <= first_bit; i++, ui64 >>= 1) { + *--p = '0' + (ui64 & 0x1); + } + + len = (temp + sizeof(temp)) - p; + } + + /* zero or space padding */ + + if (len < width) { + width -= len; + + while (width-- > 0 && buf < last) { + *buf++ = zero; + } + } + + /* number safe copy */ + + if (buf + len > last) { + len = last - buf; + } + + return ((gchar *) memcpy(buf, p, len)) + len; +} + +struct rspamd_printf_char_buf { + char *begin; + char *pos; + glong remain; +}; + +static glong +rspamd_printf_append_char(const gchar *buf, glong buflen, gpointer ud) +{ + struct rspamd_printf_char_buf *dst = (struct rspamd_printf_char_buf *) ud; + glong wr; + + if (dst->remain <= 0) { + return dst->remain; + } + + wr = MIN(dst->remain, buflen); + memcpy(dst->pos, buf, wr); + dst->remain -= wr; + dst->pos += wr; + + return wr; +} + +static glong +rspamd_printf_append_file(const gchar *buf, glong buflen, gpointer ud) +{ + FILE *dst = (FILE *) ud; + if (buflen > 0) { + return fwrite(buf, 1, buflen, dst); + } + else { + return 0; + } +} + +static glong +rspamd_printf_append_gstring(const gchar *buf, glong buflen, gpointer ud) +{ + GString *dst = (GString *) ud; + + if (buflen > 0) { + g_string_append_len(dst, buf, buflen); + } + + return buflen; +} + +static glong +rspamd_printf_append_fstring(const gchar *buf, glong buflen, gpointer ud) +{ + rspamd_fstring_t **dst = ud; + + if (buflen > 0) { + *dst = rspamd_fstring_append(*dst, buf, buflen); + } + + return buflen; +} + +glong rspamd_fprintf(FILE *f, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start(args, fmt); + r = rspamd_vprintf_common(rspamd_printf_append_file, f, fmt, args); + va_end(args); + + return r; +} + +glong rspamd_printf(const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start(args, fmt); + r = rspamd_vprintf_common(rspamd_printf_append_file, stdout, fmt, args); + va_end(args); + + return r; +} + +glong rspamd_log_fprintf(FILE *f, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start(args, fmt); + r = rspamd_vprintf_common(rspamd_printf_append_file, f, fmt, args); + va_end(args); + + fflush(f); + + return r; +} + + +glong rspamd_snprintf(gchar *buf, glong max, const gchar *fmt, ...) +{ + gchar *r; + va_list args; + + va_start(args, fmt); + r = rspamd_vsnprintf(buf, max, fmt, args); + va_end(args); + + return (r - buf); +} + +gchar * +rspamd_vsnprintf(gchar *buf, glong max, const gchar *fmt, va_list args) +{ + struct rspamd_printf_char_buf dst; + + dst.begin = buf; + dst.pos = dst.begin; + dst.remain = max - 1; + (void) rspamd_vprintf_common(rspamd_printf_append_char, &dst, fmt, args); + *dst.pos = '\0'; + + return dst.pos; +} + +glong rspamd_printf_gstring(GString *s, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start(args, fmt); + r = rspamd_vprintf_gstring(s, fmt, args); + va_end(args); + + return r; +} + +glong rspamd_vprintf_gstring(GString *s, const gchar *fmt, va_list args) +{ + return rspamd_vprintf_common(rspamd_printf_append_gstring, s, fmt, args); +} + +glong rspamd_printf_fstring(rspamd_fstring_t **s, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start(args, fmt); + r = rspamd_vprintf_fstring(s, fmt, args); + va_end(args); + + return r; +} + +glong rspamd_vprintf_fstring(rspamd_fstring_t **s, const gchar *fmt, va_list args) +{ + return rspamd_vprintf_common(rspamd_printf_append_fstring, s, fmt, args); +} + +#define RSPAMD_PRINTF_APPEND(buf, len) \ + do { \ + RSPAMD_PRINTF_APPEND_BUF(buf, len); \ + fmt++; \ + buf_start = fmt; \ + } while (0) + +#define RSPAMD_PRINTF_APPEND_BUF(buf, len) \ + do { \ + wr = func((buf), (len), apd); \ + if (wr < (__typeof(wr)) (len)) { \ + goto oob; \ + } \ + written += wr; \ + } while (0) + +glong rspamd_vprintf_common(rspamd_printf_append_func func, + gpointer apd, + const gchar *fmt, + va_list args) +{ + gchar zero, numbuf[G_ASCII_DTOSTR_BUF_SIZE], dtoabuf[32], *p, *last; + guchar c; + const gchar *buf_start = fmt; + gint d; + gdouble f; + glong written = 0, wr, slen; + gint64 i64; + guint64 ui64; + guint width, sign, hex, humanize, bytes, frac_width, b32, b64; + rspamd_fstring_t *v; + rspamd_ftok_t *tok; + GString *gs; + GError *err; + + while (*fmt) { + + /* + * "buf < last" means that we could copy at least one character: + * the plain character, "%%", "%c", and minus without the checking + */ + + if (*fmt == '%') { + + /* Append what we have in buf */ + if (fmt > buf_start) { + wr = func(buf_start, fmt - buf_start, apd); + if (wr <= 0) { + goto oob; + } + written += wr; + } + + i64 = 0; + ui64 = 0; + + zero = (gchar) ((*++fmt == '0') ? '0' : ' '); + width = 0; + sign = 1; + hex = 0; + b32 = 0; + b64 = 0; + bytes = 0; + humanize = 0; + frac_width = 0; + slen = -1; + + while (*fmt >= '0' && *fmt <= '9') { + width = width * 10 + *fmt++ - '0'; + } + + + for (;;) { + switch (*fmt) { + + case 'u': + sign = 0; + fmt++; + continue; + + case 'm': + fmt++; + continue; + + case 'X': + hex = 2; + sign = 0; + fmt++; + continue; + + case 'x': + hex = 1; + sign = 0; + fmt++; + continue; + case 'b': + b32 = 1; + sign = 0; + fmt++; + continue; + case 'B': + b64 = 1; + sign = 0; + fmt++; + continue; + case 'H': + humanize = 1; + bytes = 1; + sign = 0; + fmt++; + continue; + case 'h': + humanize = 1; + sign = 0; + fmt++; + continue; + case '.': + fmt++; + + if (*fmt == '*') { + d = (gint) va_arg(args, gint); + if (G_UNLIKELY(d < 0)) { + return 0; + } + frac_width = (guint) d; + fmt++; + } + else { + while (*fmt >= '0' && *fmt <= '9') { + frac_width = frac_width * 10 + *fmt++ - '0'; + } + } + + break; + + case '*': + d = (gint) va_arg(args, gint); + if (G_UNLIKELY(d < 0)) { + return 0; + } + slen = (glong) d; + fmt++; + continue; + + default: + break; + } + + break; + } + + + switch (*fmt) { + + case 'V': + v = va_arg(args, rspamd_fstring_t *); + + if (v) { + slen = v->len; + + if (G_UNLIKELY(width != 0)) { + slen = MIN(v->len, width); + } + + RSPAMD_PRINTF_APPEND(v->str, slen); + } + else { + RSPAMD_PRINTF_APPEND("(NULL)", 6); + } + + continue; + + case 'T': + tok = va_arg(args, rspamd_ftok_t *); + + if (tok) { + slen = tok->len; + + if (G_UNLIKELY(width != 0)) { + slen = MIN(tok->len, width); + } + RSPAMD_PRINTF_APPEND(tok->begin, slen); + } + else { + RSPAMD_PRINTF_APPEND("(NULL)", 6); + } + continue; + + case 'v': + gs = va_arg(args, GString *); + + if (gs) { + slen = gs->len; + + if (G_UNLIKELY(width != 0)) { + slen = MIN(gs->len, width); + } + + RSPAMD_PRINTF_APPEND(gs->str, slen); + } + else { + RSPAMD_PRINTF_APPEND("(NULL)", 6); + } + + continue; + + case 'e': + err = va_arg(args, GError *); + + if (err) { + p = err->message; + + if (p == NULL) { + p = "(NULL)"; + } + } + else { + p = "unknown error"; + } + + slen = strlen(p); + RSPAMD_PRINTF_APPEND(p, slen); + + continue; + + case 's': + p = va_arg(args, gchar *); + if (p == NULL) { + p = "(NULL)"; + slen = sizeof("(NULL)") - 1; + } + + if (G_UNLIKELY(b32)) { + gchar *b32buf; + + if (G_UNLIKELY(slen == -1)) { + if (G_LIKELY(width != 0)) { + slen = width; + } + else { + /* NULL terminated string */ + slen = strlen(p); + } + } + + b32buf = rspamd_encode_base32(p, slen, RSPAMD_BASE32_DEFAULT); + + if (b32buf) { + RSPAMD_PRINTF_APPEND(b32buf, strlen(b32buf)); + g_free(b32buf); + } + else { + RSPAMD_PRINTF_APPEND("(NULL)", sizeof("(NULL)") - 1); + } + } + else if (G_UNLIKELY(hex)) { + gchar hexbuf[2]; + + if (G_UNLIKELY(slen == -1)) { + if (G_LIKELY(width != 0)) { + slen = width; + } + else { + /* NULL terminated string */ + slen = strlen(p); + } + } + + while (slen) { + hexbuf[0] = hex == 2 ? _HEX[(*p >> 4u) & 0xfu] : _hex[(*p >> 4u) & 0xfu]; + hexbuf[1] = hex == 2 ? _HEX[*p & 0xfu] : _hex[*p & 0xfu]; + RSPAMD_PRINTF_APPEND_BUF(hexbuf, 2); + p++; + slen--; + } + + fmt++; + buf_start = fmt; + } + else if (G_UNLIKELY(b64)) { + gchar *b64buf; + gsize olen = 0; + + if (G_UNLIKELY(slen == -1)) { + if (G_LIKELY(width != 0)) { + slen = width; + } + else { + /* NULL terminated string */ + slen = strlen(p); + } + } + + b64buf = rspamd_encode_base64(p, slen, 0, &olen); + + if (b64buf) { + RSPAMD_PRINTF_APPEND(b64buf, olen); + g_free(b64buf); + } + else { + RSPAMD_PRINTF_APPEND("(NULL)", sizeof("(NULL)") - 1); + } + } + else { + if (slen == -1) { + /* NULL terminated string */ + slen = strlen(p); + } + + if (G_UNLIKELY(width != 0)) { + slen = MIN(slen, width); + } + + RSPAMD_PRINTF_APPEND(p, slen); + } + + continue; + + case 'O': + i64 = (gint64) va_arg(args, off_t); + sign = 1; + break; + + case 'P': + i64 = (gint64) va_arg(args, pid_t); + sign = 1; + break; + + case 't': + i64 = (gint64) va_arg(args, time_t); + sign = 1; + break; + + case 'z': + if (sign) { + i64 = (gint64) va_arg(args, ssize_t); + } + else { + ui64 = (guint64) va_arg(args, size_t); + } + break; + + case 'd': + if (sign) { + i64 = (gint64) va_arg(args, gint); + } + else { + ui64 = (guint64) va_arg(args, guint); + } + break; + + case 'l': + if (sign) { + i64 = (gint64) va_arg(args, glong); + } + else { + ui64 = (guint64) va_arg(args, gulong); + } + break; + + case 'D': + if (sign) { + i64 = (gint64) va_arg(args, gint32); + } + else { + ui64 = (guint64) va_arg(args, guint32); + } + break; + + case 'L': + if (sign) { + i64 = va_arg(args, gint64); + } + else { + ui64 = va_arg(args, guint64); + } + break; + + + case 'f': + f = (gdouble) va_arg(args, double); + slen = fpconv_dtoa(f, dtoabuf, frac_width, false); + + RSPAMD_PRINTF_APPEND(dtoabuf, slen); + + continue; + + case 'g': + f = (gdouble) va_arg(args, double); + slen = fpconv_dtoa(f, dtoabuf, 0, true); + RSPAMD_PRINTF_APPEND(dtoabuf, slen); + + continue; + + case 'F': + f = (gdouble) va_arg(args, long double); + slen = fpconv_dtoa(f, dtoabuf, frac_width, false); + + RSPAMD_PRINTF_APPEND(dtoabuf, slen); + + continue; + + case 'G': + f = (gdouble) va_arg(args, long double); + slen = fpconv_dtoa(f, dtoabuf, 0, true); + RSPAMD_PRINTF_APPEND(dtoabuf, slen); + + continue; + + case 'p': + ui64 = (uintptr_t) va_arg(args, void *); + hex = 2; + sign = 0; + zero = '0'; + width = sizeof(void *) * 2; + break; + + case 'c': + c = va_arg(args, gint); + c &= 0xffu; + if (G_UNLIKELY(hex)) { + gchar hexbuf[2]; + hexbuf[0] = hex == 2 ? _HEX[(c >> 4u) & 0xfu] : _hex[(c >> 4u) & 0xfu]; + hexbuf[1] = hex == 2 ? _HEX[c & 0xfu] : _hex[c & 0xfu]; + + RSPAMD_PRINTF_APPEND(hexbuf, 2); + } + else { + RSPAMD_PRINTF_APPEND(&c, 1); + } + + continue; + + case 'Z': + c = '\0'; + RSPAMD_PRINTF_APPEND(&c, 1); + + continue; + + case 'N': + c = '\n'; + RSPAMD_PRINTF_APPEND(&c, 1); + + continue; + + case '%': + c = '%'; + RSPAMD_PRINTF_APPEND(&c, 1); + + continue; + + default: + c = *fmt; + RSPAMD_PRINTF_APPEND(&c, 1); + + continue; + } + + /* Print number */ + p = numbuf; + last = p + sizeof(numbuf); + if (sign) { + if (i64 < 0) { + *p++ = '-'; + ui64 = (guint64) -i64; + } + else { + ui64 = (guint64) i64; + } + } + + if (!humanize) { + p = rspamd_sprintf_num(p, last, ui64, zero, hex, b64 + b32, width); + } + else { + p = rspamd_humanize_number(p, last, ui64, bytes); + } + slen = p - numbuf; + RSPAMD_PRINTF_APPEND(numbuf, slen); + } + else { + fmt++; + } + } + + /* Finish buffer */ + if (fmt > buf_start) { + wr = func(buf_start, fmt - buf_start, apd); + if (wr <= 0) { + goto oob; + } + written += wr; + } + +oob: + return written; +} diff --git a/src/libutil/printf.h b/src/libutil/printf.h new file mode 100644 index 0000000..a9420b2 --- /dev/null +++ b/src/libutil/printf.h @@ -0,0 +1,96 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef PRINTF_H_ +#define PRINTF_H_ + +#include "config.h" +#include "fstring.h" + +#ifdef __cplusplus +extern "C" { +#endif +/* + * supported formats: + * %[0][width][x][X]O off_t + * %[0][width]T time_t + * %[0][width][u][x|X|h|H|b|B]z ssize_t/size_t + * %[0][width][u][x|X|h|H|b|B]d gint/guint + * %[0][width][u][x|X|h|H|b|B]l long + * %[0][width][u][x|X|h|H|b|B]D gint32/guint32 + * %[0][width][u][x|X|h|H|b|B]L gint64/guint64 + * %[0][width][.width]f double + * %[0][width][.width]F long double + * %[0][width][.width]g double + * %[0][width][.width]G long double + * %P pid_t + * %r rlim_t + * %p void * + * %V rspamd_fstring_t * + * %T rspamd_ftok_t + * %v GString * + * %s null-terminated string + * %xs hex encoded string + * %bs base32 encoded string + * %Bs base64 encoded string + * %*s length and string + * %Z '\0' + * %N '\n' + * %c gchar + * %t time_t + * %e GError * + * %% % + * + */ + +/** + * Callback used for common printf operations + * @param buf buffer to append + * @param buflen length of the buffer + * @param ud opaque pointer + * @return number of characters written + */ +typedef glong (*rspamd_printf_append_func)(const gchar *buf, glong buflen, + gpointer ud); + +glong rspamd_fprintf(FILE *f, const gchar *fmt, ...); + +glong rspamd_printf(const gchar *fmt, ...); + +glong rspamd_log_fprintf(FILE *f, const gchar *fmt, ...); + +glong rspamd_snprintf(gchar *buf, glong max, const gchar *fmt, ...); + +gchar *rspamd_vsnprintf(gchar *buf, glong max, const gchar *fmt, + va_list args); + +glong rspamd_printf_gstring(GString *s, const gchar *fmt, ...); + +glong rspamd_vprintf_gstring(GString *s, const gchar *fmt, va_list args); + +glong rspamd_printf_fstring(rspamd_fstring_t **s, const gchar *fmt, ...); + +glong rspamd_vprintf_fstring(rspamd_fstring_t **s, const gchar *fmt, va_list args); + +glong rspamd_vprintf_common(rspamd_printf_append_func func, + gpointer apd, + const gchar *fmt, + va_list args); + +#ifdef __cplusplus +} +#endif + +#endif /* PRINTF_H_ */ diff --git a/src/libutil/radix.c b/src/libutil/radix.c new file mode 100644 index 0000000..93c728c --- /dev/null +++ b/src/libutil/radix.c @@ -0,0 +1,434 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "radix.h" +#include "rspamd.h" +#include "mem_pool.h" +#include "btrie.h" + +#define msg_err_radix(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "radix", tree->pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_radix(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ + "radix", tree->pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_radix(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "radix", tree->pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_radix(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_radix_log_id, "radix", tree->pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(radix) + +struct radix_tree_compressed { + rspamd_mempool_t *pool; + struct btrie *tree; + const gchar *name; + size_t size; + guint duplicates; + gboolean own_pool; +}; + +uintptr_t +radix_find_compressed(radix_compressed_t *tree, const guint8 *key, gsize keylen) +{ + gconstpointer ret; + + g_assert(tree != NULL); + + ret = btrie_lookup(tree->tree, key, keylen * NBBY); + + if (ret == NULL) { + return RADIX_NO_VALUE; + } + + return (uintptr_t) ret; +} + + +uintptr_t +radix_insert_compressed(radix_compressed_t *tree, + guint8 *key, gsize keylen, + gsize masklen, + uintptr_t value) +{ + static const guint max_duplicates = 32; + guint keybits = keylen * NBBY; + uintptr_t old; + gchar ip_str[INET6_ADDRSTRLEN + 1]; + int ret; + + g_assert(tree != NULL); + g_assert(keybits >= masklen); + + msg_debug_radix("%s: want insert value %p with mask %z, key: %*xs", + tree->name, (gpointer) value, keybits - masklen, (int) keylen, key); + + old = radix_find_compressed(tree, key, keylen); + + ret = btrie_add_prefix(tree->tree, key, keybits - masklen, + (gconstpointer) value); + + if (ret != BTRIE_OKAY) { + tree->duplicates++; + + if (tree->duplicates == max_duplicates) { + msg_err_radix("%s: maximum duplicates limit reached: %d, " + "suppress further errors", + tree->name, max_duplicates); + } + else if (tree->duplicates < max_duplicates) { + memset(ip_str, 0, sizeof(ip_str)); + + if (keybits == 32) { + msg_err_radix("%s: cannot insert %p, key: %s/%d, duplicate value", + tree->name, + (gpointer) value, + inet_ntop(AF_INET, key, ip_str, sizeof(ip_str) - 1), + (gint) (keybits - masklen)); + } + else if (keybits == 128) { + msg_err_radix("%s: cannot insert %p, key: [%s]/%d, duplicate value", + tree->name, + (gpointer) value, + inet_ntop(AF_INET6, key, ip_str, sizeof(ip_str) - 1), + (gint) (keybits - masklen)); + } + else { + msg_err_radix("%s: cannot insert %p with mask %z, key: %*xs, duplicate value", + tree->name, + (gpointer) value, + keybits - masklen, + (int) keylen, key); + } + } + } + else { + tree->size++; + } + + return old; +} + + +radix_compressed_t * +radix_create_compressed(const gchar *tree_name) +{ + radix_compressed_t *tree; + + tree = g_malloc(sizeof(*tree)); + if (tree == NULL) { + return NULL; + } + + tree->pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), NULL, 0); + tree->size = 0; + tree->duplicates = 0; + tree->tree = btrie_init(tree->pool); + tree->own_pool = TRUE; + tree->name = tree_name; + + return tree; +} + +radix_compressed_t * +radix_create_compressed_with_pool(rspamd_mempool_t *pool, const gchar *tree_name) +{ + radix_compressed_t *tree; + + tree = rspamd_mempool_alloc(pool, sizeof(*tree)); + tree->pool = pool; + tree->size = 0; + tree->duplicates = 0; + tree->tree = btrie_init(tree->pool); + tree->own_pool = FALSE; + tree->name = tree_name; + + return tree; +} + +void radix_destroy_compressed(radix_compressed_t *tree) +{ + if (tree) { + if (tree->own_pool) { + rspamd_mempool_delete(tree->pool); + g_free(tree); + } + } +} + +uintptr_t +radix_find_compressed_addr(radix_compressed_t *tree, + const rspamd_inet_addr_t *addr) +{ + const guchar *key; + guint klen = 0; + guchar buf[16]; + + if (addr == NULL) { + return RADIX_NO_VALUE; + } + + key = rspamd_inet_address_get_hash_key(addr, &klen); + + if (key && klen) { + if (klen == 4) { + /* Map to ipv6 */ + memset(buf, 0, 10); + buf[10] = 0xffu; + buf[11] = 0xffu; + memcpy(buf + 12, key, klen); + + key = buf; + klen = sizeof(buf); + } + + return radix_find_compressed(tree, key, klen); + } + + return RADIX_NO_VALUE; +} + +gint rspamd_radix_add_iplist(const gchar *list, const gchar *separators, + radix_compressed_t *tree, gconstpointer value, + gboolean resolve, const gchar *tree_name) +{ + gchar *token, *ipnet, *err_str, **strv, **cur, *brace; + union { + struct in_addr ina; + struct in6_addr ina6; + guchar buf[16]; + } addr_buf; + guint k = G_MAXINT; + gint af; + gint res = 0, r; + struct addrinfo hints, *ai_res, *cur_ai; + + /* Split string if there are multiple items inside a single string */ + strv = g_strsplit_set(list, separators, 0); + cur = strv; + while (*cur) { + af = AF_UNSPEC; + if (**cur == '\0') { + cur++; + continue; + } + + /* Extract ipnet */ + ipnet = g_strstrip(*cur); + token = strsep(&ipnet, "/"); + + if (ipnet != NULL) { + errno = 0; + /* Get mask */ + k = strtoul(ipnet, &err_str, 10); + if (errno != 0) { + msg_warn_radix( + "%s: invalid netmask, error detected on symbol: %s, error: %s", + tree_name, + err_str, + strerror(errno)); + k = G_MAXINT; + } + } + + /* Check IP */ + if (token[0] == '[') { + /* Braced IPv6 */ + brace = strrchr(token, ']'); + + if (brace != NULL) { + token++; + *brace = '\0'; + + if (inet_pton(AF_INET6, token, &addr_buf.ina6) == 1) { + af = AF_INET6; + } + else { + msg_warn_radix("invalid IP address: %s", token); + + cur++; + continue; + } + } + else { + msg_warn_radix("invalid IP address: %s", token); + + cur++; + continue; + } + } + else { + if (inet_pton(AF_INET, token, &addr_buf.ina) == 1) { + af = AF_INET; + } + else if (inet_pton(AF_INET6, token, &addr_buf.ina6) == 1) { + af = AF_INET6; + } + else { + + if (resolve) { + memset(&hints, 0, sizeof(hints)); + hints.ai_socktype = SOCK_STREAM; /* Type of the socket */ + hints.ai_flags = AI_NUMERICSERV; + hints.ai_family = AF_UNSPEC; + + if ((r = getaddrinfo(token, NULL, &hints, &ai_res)) == 0) { + for (cur_ai = ai_res; cur_ai != NULL; + cur_ai = cur_ai->ai_next) { + + if (cur_ai->ai_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *) cur_ai->ai_addr; + if (k > 32) { + k = 32; + } + + /* Convert to IPv4 mapped IPv6 */ + memset(addr_buf.buf, 0, 10); + addr_buf.buf[10] = 0xffu; + addr_buf.buf[11] = 0xffu; + memcpy(addr_buf.buf + 12, + &sin->sin_addr, 4); + + k += 96; + + radix_insert_compressed(tree, + addr_buf.buf, + sizeof(addr_buf.buf), + 128 - k, (uintptr_t) value); + res++; + } + else if (cur_ai->ai_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *) cur_ai->ai_addr; + if (k > 128) { + k = 128; + } + + memcpy(addr_buf.buf, &sin6->sin6_addr, + sizeof(sin6->sin6_addr)); + radix_insert_compressed(tree, + addr_buf.buf, + sizeof(addr_buf.buf), + 128 - k, (uintptr_t) value); + res++; + } + } + + freeaddrinfo(ai_res); + } + else { + msg_warn_radix("getaddrinfo failed for %s: %s", token, + gai_strerror(r)); + } + + cur++; + continue; + } + else { + msg_warn_radix("invalid IP address: %s", token); + + cur++; + continue; + } + } + } + + if (af == AF_INET) { + if (k > 32) { + k = 32; + } + + /* Move to the last part of the address */ + memmove(addr_buf.buf + 12, &addr_buf.ina, 4); + memset(addr_buf.buf, 0, 10); + addr_buf.buf[10] = 0xffu; + addr_buf.buf[11] = 0xffu; + k += 96; + radix_insert_compressed(tree, addr_buf.buf, sizeof(addr_buf.buf), + 128 - k, (uintptr_t) value); + res++; + } + else if (af == AF_INET6) { + if (k > 128) { + k = 128; + } + + radix_insert_compressed(tree, addr_buf.buf, sizeof(addr_buf), + 128 - k, (uintptr_t) value); + res++; + } + cur++; + } + + g_strfreev(strv); + + return res; +} + +gboolean +radix_add_generic_iplist(const gchar *ip_list, radix_compressed_t **tree, + gboolean resolve, const gchar *tree_name) +{ + static const char fill_ptr[] = "1"; + + if (*tree == NULL) { + *tree = radix_create_compressed(tree_name); + } + + return (rspamd_radix_add_iplist(ip_list, ",; ", *tree, + fill_ptr, resolve, tree_name) > 0); +} + + +gsize radix_get_size(radix_compressed_t *tree) +{ + if (tree != NULL) { + return tree->size; + } + + return 0; +} + + +rspamd_mempool_t * +radix_get_pool(radix_compressed_t *tree) +{ + + if (tree != NULL) { + return tree->pool; + } + + return NULL; +} + +const gchar * +radix_get_info(radix_compressed_t *tree) +{ + if (tree == NULL) { + return NULL; + } + + return btrie_stats(tree->tree, tree->duplicates); +} diff --git a/src/libutil/radix.h b/src/libutil/radix.h new file mode 100644 index 0000000..a85da5b --- /dev/null +++ b/src/libutil/radix.h @@ -0,0 +1,123 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RADIX_H +#define RADIX_H + +#include "config.h" +#include "mem_pool.h" +#include "util.h" + +#define RADIX_NO_VALUE (uintptr_t) - 1 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct radix_tree_compressed radix_compressed_t; + +/** + * Insert new key to the radix trie + * @param tree radix trie + * @param key key to insert (bitstring) + * @param keylen length of the key (in bytes) + * @param masklen length of mask that should be applied to the key (in bits) + * @param value opaque value pointer + * @return previous value of the key or `RADIX_NO_VALUE` + */ +uintptr_t +radix_insert_compressed(radix_compressed_t *tree, + guint8 *key, gsize keylen, + gsize masklen, + uintptr_t value); + +/** + * Find a key in a radix trie + * @param tree radix trie + * @param key key to find (bitstring) + * @param keylen length of a key + * @return opaque pointer or `RADIX_NO_VALUE` if no value has been found + */ +uintptr_t radix_find_compressed(radix_compressed_t *tree, const guint8 *key, + gsize keylen); + +/** + * Find specified address in tree (works for IPv4 or IPv6 addresses) + * @param tree + * @param addr + * @return + */ +uintptr_t radix_find_compressed_addr(radix_compressed_t *tree, + const rspamd_inet_addr_t *addr); + +/** + * Destroy the complete radix trie + * @param tree + */ +void radix_destroy_compressed(radix_compressed_t *tree); + +/** + * Create new radix trie + * @return + */ +radix_compressed_t *radix_create_compressed(const gchar *tree_name); + +radix_compressed_t *radix_create_compressed_with_pool(rspamd_mempool_t *pool, const gchar *tree_name); + +/** + * Insert list of ip addresses and masks to the radix tree + * @param list string line of addresses + * @param separators string of characters used as separators + * @param tree target tree + * @return number of elements inserted + */ +gint rspamd_radix_add_iplist(const gchar *list, const gchar *separators, + radix_compressed_t *tree, gconstpointer value, + gboolean resolve, const gchar *tree_name); + +/** + * Generic version of @see rspamd_radix_add_iplist. This function creates tree + * if `tree` is NULL. + */ +gboolean +radix_add_generic_iplist(const gchar *ip_list, + radix_compressed_t **tree, + gboolean resolve, + const gchar *tree_name); + +/** + * Returns number of elements in the tree + * @param tree + * @return + */ +gsize radix_get_size(radix_compressed_t *tree); + +/** + * Return string that describes this radix tree (memory, nodes, compression etc) + * @param tree + * @return constant string + */ +const gchar *radix_get_info(radix_compressed_t *tree); + +/** + * Returns memory pool associated with the radix tree + */ +rspamd_mempool_t *radix_get_pool(radix_compressed_t *tree); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libutil/ref.h b/src/libutil/ref.h new file mode 100644 index 0000000..2a3fd8d --- /dev/null +++ b/src/libutil/ref.h @@ -0,0 +1,91 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef REF_H_ +#define REF_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +/** + * @file ref.h + * A set of macros to handle refcounts + */ + +typedef void (*ref_dtor_cb_t)(void *data); + +typedef struct ref_entry_s { + unsigned int refcount; + ref_dtor_cb_t dtor; +} ref_entry_t; + +#define REF_INIT(obj, dtor_cb) \ + do { \ + if ((obj) != NULL) { \ + (obj)->ref.refcount = 0; \ + (obj)->ref.dtor = (ref_dtor_cb_t) (dtor_cb); \ + } \ + } while (0) + +#define REF_INIT_RETAIN(obj, dtor_cb) \ + do { \ + if ((obj) != NULL) { \ + (obj)->ref.refcount = 1; \ + (obj)->ref.dtor = (ref_dtor_cb_t) (dtor_cb); \ + } \ + } while (0) + +#ifdef HAVE_ATOMIC_BUILTINS +#define REF_RETAIN_ATOMIC(obj) \ + do { \ + if ((obj) != NULL) { \ + __atomic_add_fetch(&(obj)->ref.refcount, 1, __ATOMIC_RELEASE); \ + } \ + } while (0) + +#define REF_RELEASE_ATOMIC(obj) \ + do { \ + if ((obj) != NULL) { \ + unsigned int _rc_priv = __atomic_sub_fetch(&(obj)->ref.refcount, 1, __ATOMIC_ACQ_REL); \ + if (_rc_priv == 0 && (obj)->ref.dtor) { \ + (obj)->ref.dtor(obj); \ + } \ + } \ + } while (0) + +#else +#define REF_RETAIN_ATOMIC REF_RETAIN +#define REF_RELEASE_ATOMIC REF_RELEASE_ATOMIC +#endif + +#define REF_RETAIN(obj) \ + do { \ + if ((obj) != NULL) { \ + (obj)->ref.refcount++; \ + } \ + } while (0) + +#define REF_RELEASE(obj) \ + do { \ + if ((obj) != NULL) { \ + if (--(obj)->ref.refcount == 0 && (obj)->ref.dtor) { \ + (obj)->ref.dtor(obj); \ + } \ + } \ + } while (0) + +#endif /* REF_H_ */ diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c new file mode 100644 index 0000000..9f143ac --- /dev/null +++ b/src/libutil/regexp.c @@ -0,0 +1,1359 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "regexp.h" +#include "cryptobox.h" +#include "ref.h" +#include "util.h" +#include "rspamd.h" +#include "contrib/fastutf8/fastutf8.h" + +#ifndef WITH_PCRE2 +/* Normal pcre path */ +#include <pcre.h> +#define PCRE_T pcre +#define PCRE_EXTRA_T pcre_extra +#define PCRE_JIT_T pcre_jit_stack +#define PCRE_FREE pcre_free +#define PCRE_JIT_STACK_FREE pcre_jit_stack_free +#define PCRE_FLAG(x) G_PASTE(PCRE_, x) +#else +/* PCRE 2 path */ +#ifndef PCRE2_CODE_UNIT_WIDTH +#define PCRE2_CODE_UNIT_WIDTH 8 +#endif + +#include <pcre2.h> +#define PCRE_T pcre2_code +#define PCRE_JIT_T pcre2_jit_stack +#define PCRE_FREE pcre2_code_free +#define PCRE_JIT_STACK_FREE pcre2_jit_stack_free + +#define PCRE_FLAG(x) G_PASTE(PCRE2_, x) +#endif + +typedef guchar regexp_id_t[rspamd_cryptobox_HASHBYTES]; + +#undef DISABLE_JIT_FAST + +struct rspamd_regexp_s { + gdouble exec_time; + gchar *pattern; + PCRE_T *re; + PCRE_T *raw_re; +#ifndef WITH_PCRE2 + PCRE_EXTRA_T *extra; + PCRE_EXTRA_T *raw_extra; +#else + pcre2_match_context *mcontext; + pcre2_match_context *raw_mcontext; +#endif + regexp_id_t id; + ref_entry_t ref; + gpointer ud; + gpointer re_class; + guint64 cache_id; + gsize match_limit; + guint max_hits; + gint flags; + gint pcre_flags; + gint ncaptures; +}; + +struct rspamd_regexp_cache { + GHashTable *tbl; +#ifdef HAVE_PCRE_JIT + PCRE_JIT_T *jstack; +#endif +}; + +static struct rspamd_regexp_cache *global_re_cache = NULL; +static gboolean can_jit = FALSE; +static gboolean check_jit = TRUE; +static const int max_re_cache_size = 8192; + +#ifdef WITH_PCRE2 +static pcre2_compile_context *pcre2_ctx = NULL; +#endif + +static GQuark +rspamd_regexp_quark(void) +{ + return g_quark_from_static_string("rspamd-regexp"); +} + +static void +rspamd_regexp_generate_id(const gchar *pattern, const gchar *flags, + regexp_id_t out) +{ + rspamd_cryptobox_hash_state_t st; + + rspamd_cryptobox_hash_init(&st, NULL, 0); + + if (flags) { + rspamd_cryptobox_hash_update(&st, flags, strlen(flags)); + } + + rspamd_cryptobox_hash_update(&st, pattern, strlen(pattern)); + rspamd_cryptobox_hash_final(&st, out); +} + +static void +rspamd_regexp_dtor(rspamd_regexp_t *re) +{ + if (re) { + if (re->raw_re && re->raw_re != re->re) { +#ifndef WITH_PCRE2 + /* PCRE1 version */ +#ifdef HAVE_PCRE_JIT + if (re->raw_extra) { + pcre_free_study(re->raw_extra); + } +#endif +#else + /* PCRE 2 version */ + if (re->raw_mcontext) { + pcre2_match_context_free(re->raw_mcontext); + } +#endif + PCRE_FREE(re->raw_re); + } + + if (re->re) { +#ifndef WITH_PCRE2 + /* PCRE1 version */ +#ifdef HAVE_PCRE_JIT + if (re->extra) { + pcre_free_study(re->extra); + } +#endif +#else + /* PCRE 2 version */ + if (re->mcontext) { + pcre2_match_context_free(re->mcontext); + } +#endif + PCRE_FREE(re->re); + } + + if (re->pattern) { + g_free(re->pattern); + } + + g_free(re); + } +} + +static void +rspamd_regexp_post_process(rspamd_regexp_t *r) +{ + if (global_re_cache == NULL) { + rspamd_regexp_library_init(NULL); + } +#if defined(WITH_PCRE2) + static const guint max_recursion_depth = 100000, max_backtrack = 1000000; + + /* Create match context */ + r->mcontext = pcre2_match_context_create(NULL); + g_assert(r->mcontext != NULL); + pcre2_set_recursion_limit(r->mcontext, max_recursion_depth); + pcre2_set_match_limit(r->mcontext, max_backtrack); + + if (r->raw_re && r->re != r->raw_re) { + r->raw_mcontext = pcre2_match_context_create(NULL); + g_assert(r->raw_mcontext != NULL); + pcre2_set_recursion_limit(r->raw_mcontext, max_recursion_depth); + pcre2_set_match_limit(r->raw_mcontext, max_backtrack); + } + else if (r->raw_re) { + r->raw_mcontext = r->mcontext; + } + else { + r->raw_mcontext = NULL; + } + +#ifdef HAVE_PCRE_JIT + guint jit_flags = can_jit ? PCRE2_JIT_COMPLETE : 0; + gsize jsz; + PCRE2_UCHAR errstr[128]; + int errcode; + + if (can_jit) { + if ((errcode = pcre2_jit_compile(r->re, jit_flags)) < 0) { + pcre2_get_error_message(errcode, errstr, G_N_ELEMENTS(errstr)); + msg_err("jit compilation is not supported: %s; pattern: \"%s\"", errstr, r->pattern); + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + else { + if (!(pcre2_pattern_info(r->re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) { + msg_err("cannot exec pcre2_pattern_info(PCRE2_INFO_JITSIZE) on \"%s\"", r->pattern); + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + } + } + else { + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + + if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) { + pcre2_jit_stack_assign(r->mcontext, NULL, global_re_cache->jstack); + } + + if (r->raw_re && r->re != r->raw_re && !(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) { + if ((errcode = pcre2_jit_compile(r->raw_re, jit_flags)) < 0) { + pcre2_get_error_message(errcode, errstr, G_N_ELEMENTS(errstr)); + msg_debug("jit compilation is not supported for raw regexp: %s; pattern: \"%s\"", errstr, r->pattern); + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + else { + if (!(pcre2_pattern_info(r->raw_re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) { + msg_err("cannot exec pcre2_pattern_info(PCRE2_INFO_JITSIZE) on \"%s\"", r->pattern); + } + else if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) { + g_assert(r->raw_mcontext != NULL); + pcre2_jit_stack_assign(r->raw_mcontext, NULL, global_re_cache->jstack); + } + } + } +#endif + +#else + const gchar *err_str = "unknown"; + gboolean try_jit = TRUE, try_raw_jit = TRUE; + gint study_flags = 0; + +#if defined(HAVE_PCRE_JIT) + study_flags |= PCRE_STUDY_JIT_COMPILE; +#endif + + /* Pcre 1 needs study */ + if (r->re) { + r->extra = pcre_study(r->re, study_flags, &err_str); + + if (r->extra == NULL) { + msg_debug("cannot optimize regexp pattern: '%s': %s", + r->pattern, err_str); + try_jit = FALSE; + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + } + else { + g_assert_not_reached(); + } + + if (r->raw_re && r->raw_re != r->re) { + r->raw_extra = pcre_study(r->re, study_flags, &err_str); + } + else if (r->raw_re == r->re) { + r->raw_extra = r->extra; + } + + if (r->raw_extra == NULL) { + + msg_debug("cannot optimize raw regexp pattern: '%s': %s", + r->pattern, err_str); + try_raw_jit = FALSE; + } + /* JIT path */ + if (try_jit) { +#ifdef HAVE_PCRE_JIT + gint jit, n; + + if (can_jit) { + jit = 0; + n = pcre_fullinfo(r->re, r->extra, + PCRE_INFO_JIT, &jit); + + if (n != 0 || jit != 1) { + msg_debug("jit compilation of %s is not supported", r->pattern); + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + else { + pcre_assign_jit_stack(r->extra, NULL, global_re_cache->jstack); + } + } +#endif + } + else { + msg_debug("cannot optimize regexp pattern: '%s': %s", + r->pattern, err_str); + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + + if (try_raw_jit) { +#ifdef HAVE_PCRE_JIT + gint jit, n; + + if (can_jit) { + + if (r->raw_re != r->re) { + jit = 0; + n = pcre_fullinfo(r->raw_re, r->raw_extra, + PCRE_INFO_JIT, &jit); + + if (n != 0 || jit != 1) { + msg_debug("jit compilation of %s is not supported", r->pattern); + r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT; + } + else { + pcre_assign_jit_stack(r->raw_extra, NULL, + global_re_cache->jstack); + } + } + } +#endif + } +#endif /* WITH_PCRE2 */ +} + +rspamd_regexp_t * +rspamd_regexp_new_len(const gchar *pattern, gsize len, const gchar *flags, + GError **err) +{ + const gchar *start = pattern, *end = start + len, *flags_str = NULL, *flags_end = NULL; + gchar *err_str; + rspamd_regexp_t *res; + gboolean explicit_utf = FALSE; + PCRE_T *r; + gchar sep = 0, *real_pattern; +#ifndef WITH_PCRE2 + gint err_off; +#else + gsize err_off; +#endif + gint regexp_flags = 0, rspamd_flags = 0, err_code, ncaptures; + gboolean strict_flags = FALSE; + + rspamd_regexp_library_init(NULL); + + if (pattern == NULL) { + g_set_error(err, rspamd_regexp_quark(), EINVAL, + "cannot create regexp from a NULL pattern"); + return NULL; + } + + if (flags == NULL && start + 1 < end) { + /* We need to parse pattern and detect flags set */ + if (*start == '/') { + sep = '/'; + } + else if (*start == 'm' && start[1] != '\\' && g_ascii_ispunct(start[1])) { + start++; + sep = *start; + + /* Paired braces */ + if (sep == '{') { + sep = '}'; + } + + rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH; + } + if (sep == 0) { + /* We have no flags, no separators and just use all line as expr */ + start = pattern; + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH; + } + else { + gchar *last_sep = rspamd_memrchr(pattern, sep, len); + + if (last_sep == NULL || last_sep <= start) { + g_set_error(err, rspamd_regexp_quark(), EINVAL, + "pattern is not enclosed with %c: %s", + sep, pattern); + return NULL; + } + flags_str = last_sep + 1; + flags_end = end; + end = last_sep; + start++; + } + } + else { + /* Strictly check all flags */ + strict_flags = TRUE; + start = pattern; + flags_str = flags; + if (flags) { + flags_end = flags + strlen(flags); + } + } + + rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW; + +#ifndef WITH_PCRE2 + regexp_flags &= ~PCRE_FLAG(UTF8); + regexp_flags |= PCRE_FLAG(NEWLINE_ANYCRLF); +#else + regexp_flags &= ~PCRE_FLAG(UTF); +#endif + + if (flags_str != NULL) { + while (flags_str < flags_end) { + switch (*flags_str) { + case 'i': + regexp_flags |= PCRE_FLAG(CASELESS); + break; + case 'm': + regexp_flags |= PCRE_FLAG(MULTILINE); + break; + case 's': + regexp_flags |= PCRE_FLAG(DOTALL); + break; + case 'x': + regexp_flags |= PCRE_FLAG(EXTENDED); + break; + case 'u': + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW; + rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF; +#ifndef WITH_PCRE2 + regexp_flags |= PCRE_FLAG(UTF8); +#else + regexp_flags |= PCRE_FLAG(UTF); +#endif + explicit_utf = TRUE; + break; + case 'O': + /* We optimize all regexps by default */ + rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT; + break; + case 'L': + /* SOM_LEFTMOST hyperscan flag */ + rspamd_flags |= RSPAMD_REGEXP_FLAG_LEFTMOST; + break; + case 'r': + rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW; + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF; +#ifndef WITH_PCRE2 + regexp_flags &= ~PCRE_FLAG(UTF8); +#else + regexp_flags &= ~PCRE_FLAG(UTF); +#endif + break; + default: + if (strict_flags) { + g_set_error(err, rspamd_regexp_quark(), EINVAL, + "invalid regexp flag: %c in pattern %s", + *flags_str, pattern); + return NULL; + } + msg_warn("invalid flag '%c' in pattern %s", *flags_str, pattern); + goto fin; + break; + } + flags_str++; + } + } +fin: + + real_pattern = g_malloc(end - start + 1); + rspamd_strlcpy(real_pattern, start, end - start + 1); + +#ifndef WITH_PCRE2 + r = pcre_compile(real_pattern, regexp_flags, + (const char **) &err_str, &err_off, NULL); + (void) err_code; +#else + r = pcre2_compile(real_pattern, PCRE2_ZERO_TERMINATED, + regexp_flags, + &err_code, &err_off, pcre2_ctx); + + if (r == NULL) { + err_str = g_alloca(1024); + memset(err_str, 0, 1024); + pcre2_get_error_message(err_code, err_str, 1024); + } +#endif + + if (r == NULL) { + g_set_error(err, rspamd_regexp_quark(), EINVAL, + "regexp parsing error: '%s' at position %d; pattern: %s", + err_str, (gint) err_off, real_pattern); + g_free(real_pattern); + + return NULL; + } + + /* Now allocate the target structure */ + res = g_malloc0(sizeof(*res)); + REF_INIT_RETAIN(res, rspamd_regexp_dtor); + res->flags = rspamd_flags; + res->pattern = real_pattern; + res->cache_id = RSPAMD_INVALID_ID; + res->pcre_flags = regexp_flags; + res->max_hits = 0; + res->re = r; + + if (rspamd_flags & RSPAMD_REGEXP_FLAG_RAW) { + res->raw_re = r; + } + else if (!explicit_utf) { +#ifndef WITH_PCRE2 + res->raw_re = pcre_compile(real_pattern, regexp_flags & ~PCRE_FLAG(UTF8), + (const char **) &err_str, &err_off, NULL); + (void) err_code; +#else + res->raw_re = pcre2_compile(real_pattern, PCRE2_ZERO_TERMINATED, + regexp_flags & ~PCRE_FLAG(UTF), + &err_code, &err_off, pcre2_ctx); + if (res->raw_re == NULL) { + err_str = g_alloca(1024); + memset(err_str, 0, 1024); + pcre2_get_error_message(err_code, err_str, 1024); + } +#endif + if (res->raw_re == NULL) { + msg_warn("raw regexp parsing error: '%s': '%s' at position %d", + err_str, real_pattern, (gint) err_off); + } + } + + rspamd_regexp_post_process(res); + rspamd_regexp_generate_id(pattern, flags, res->id); + +#ifndef WITH_PCRE2 + /* Check number of captures */ + if (pcre_fullinfo(res->raw_re, res->extra, PCRE_INFO_CAPTURECOUNT, + &ncaptures) == 0) { + res->ncaptures = ncaptures; + } +#else + /* Check number of captures */ + if (pcre2_pattern_info(res->raw_re, PCRE2_INFO_CAPTURECOUNT, + &ncaptures) == 0) { + res->ncaptures = ncaptures; + } +#endif + + return res; +} + +rspamd_regexp_t * +rspamd_regexp_new(const gchar *pattern, const gchar *flags, + GError **err) +{ + return rspamd_regexp_new_len(pattern, strlen(pattern), flags, err); +} + +#ifndef WITH_PCRE2 +gboolean +rspamd_regexp_search(const rspamd_regexp_t *re, const gchar *text, gsize len, + const gchar **start, const gchar **end, gboolean raw, + GArray *captures) +{ + pcre *r; + pcre_extra *ext; +#if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST) + pcre_jit_stack *st = NULL; +#endif + const gchar *mt; + gsize remain = 0; + gint rc, match_flags = 0, *ovec, ncaptures, i; + const int junk = 0xdeadbabe; + + g_assert(re != NULL); + g_assert(text != NULL); + + if (len == 0) { + len = strlen(text); + } + + if (re->match_limit > 0 && len > re->match_limit) { + len = re->match_limit; + } + + if (end != NULL && *end != NULL) { + /* Incremental search */ + mt = (*end); + + if ((gint) len > (mt - text)) { + remain = len - (mt - text); + } + } + else { + mt = text; + remain = len; + } + + if (remain == 0) { + return FALSE; + } + + match_flags = PCRE_NEWLINE_ANYCRLF; + + if ((re->flags & RSPAMD_REGEXP_FLAG_RAW) || raw) { + r = re->raw_re; + ext = re->raw_extra; +#if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST) + st = global_re_cache->jstack; +#endif + } + else { + r = re->re; + ext = re->extra; +#if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST) + if (rspamd_fast_utf8_validate(mt, remain) == 0) { + st = global_re_cache->jstack; + } + else { + msg_err("bad utf8 input for JIT re '%s'", re->pattern); + return FALSE; + } +#endif + } + + if (r == NULL) { + /* Invalid regexp type for the specified input */ + return FALSE; + } + + ncaptures = (re->ncaptures + 1) * 3; + ovec = g_alloca(sizeof(gint) * ncaptures); + + + for (i = 0; i < ncaptures; i++) { + ovec[i] = junk; + } + + if (!(re->flags & RSPAMD_REGEXP_FLAG_NOOPT)) { +#ifdef HAVE_PCRE_JIT +#if defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST) + /* XXX: flags seems to be broken with jit fast path */ + g_assert(remain > 0); + g_assert(mt != NULL); + + if (st != NULL && !(re->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT) && can_jit) { + rc = pcre_jit_exec(r, ext, mt, remain, 0, 0, ovec, + ncaptures, st); + } + else { + rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec, + ncaptures); + } +#else + rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec, + ncaptures); +#endif +#else + rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec, + ncaptures); +#endif + } + else { + rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec, + ncaptures); + } + + if (rc >= 0) { + if (rc > 0) { + if (start) { + *start = mt + ovec[0]; + } + if (end) { + *end = mt + ovec[1]; + } + } + else { + if (start) { + *start = mt; + } + if (end) { + *end = mt + remain; + } + } + + if (captures != NULL && rc >= 1) { + struct rspamd_re_capture *elt; + + g_assert(g_array_get_element_size(captures) == + sizeof(struct rspamd_re_capture)); + g_array_set_size(captures, rc); + + for (i = 0; i < rc; i++) { + if (ovec[i * 2] != junk && ovec[i * 2] >= 0) { + elt = &g_array_index(captures, struct rspamd_re_capture, i); + elt->p = mt + ovec[i * 2]; + elt->len = (mt + ovec[i * 2 + 1]) - elt->p; + } + else { + /* Runtime match returned fewer captures than expected */ + g_array_set_size(captures, i); + break; + } + } + } + + if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) { + /* We also ensure that the match is full */ + if (ovec[0] != 0 || (guint) ovec[1] < len) { + return FALSE; + } + } + + return TRUE; + } + + return FALSE; +} +#else +/* PCRE 2 version */ +gboolean +rspamd_regexp_search(const rspamd_regexp_t *re, const gchar *text, gsize len, + const gchar **start, const gchar **end, gboolean raw, + GArray *captures) +{ + pcre2_match_data *match_data; + pcre2_match_context *mcontext; + PCRE_T *r; + const gchar *mt; + PCRE2_SIZE remain = 0, *ovec; + const PCRE2_SIZE junk = 0xdeadbabeeeeeeeeULL; + gint rc, match_flags, novec, i; + gboolean ret = FALSE; + + g_assert(re != NULL); + g_assert(text != NULL); + + if (len == 0) { + len = strlen(text); + } + + if (re->match_limit > 0 && len > re->match_limit) { + len = re->match_limit; + } + + if (end != NULL && *end != NULL) { + /* Incremental search */ + mt = (*end); + + if ((gint) len > (mt - text)) { + remain = len - (mt - text); + } + } + else { + mt = text; + remain = len; + } + + if (remain == 0) { + return FALSE; + } + + match_flags = 0; + + if (raw || re->re == re->raw_re) { + r = re->raw_re; + mcontext = re->raw_mcontext; + } + else { + r = re->re; + mcontext = re->mcontext; + } + + if (r == NULL) { + /* Invalid regexp type for the specified input */ + return FALSE; + } + + match_data = pcre2_match_data_create(re->ncaptures + 1, NULL); + novec = pcre2_get_ovector_count(match_data); + ovec = pcre2_get_ovector_pointer(match_data); + + /* Fill ovec with crap, so we can stop if actual matches is less than announced */ + for (i = 0; i < novec; i++) { + ovec[i * 2] = junk; + ovec[i * 2 + 1] = junk; + } + +#ifdef HAVE_PCRE_JIT + if (!(re->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT) && can_jit) { + if (re->re != re->raw_re && rspamd_fast_utf8_validate(mt, remain) != 0) { + msg_err("bad utf8 input for JIT re '%s'", re->pattern); + return FALSE; + } + + rc = pcre2_jit_match(r, mt, remain, 0, match_flags, match_data, + mcontext); + } + else { + rc = pcre2_match(r, mt, remain, 0, match_flags, match_data, + mcontext); + } +#else + rc = pcre2_match(r, mt, remain, 0, match_flags, match_data, + mcontext); +#endif + + if (rc >= 0) { + if (novec > 0) { + if (start) { + *start = mt + ovec[0]; + } + if (end) { + *end = mt + ovec[1]; + } + } + else { + if (start) { + *start = mt; + } + if (end) { + *end = mt + remain; + } + } + + if (captures != NULL && novec >= 1) { + struct rspamd_re_capture *elt; + + g_assert(g_array_get_element_size(captures) == + sizeof(struct rspamd_re_capture)); + g_array_set_size(captures, novec); + + for (i = 0; i < novec; i++) { + if (ovec[i * 2] != junk && ovec[i * 2] != PCRE2_UNSET) { + elt = &g_array_index(captures, struct rspamd_re_capture, i); + elt->p = mt + ovec[i * 2]; + elt->len = (mt + ovec[i * 2 + 1]) - elt->p; + } + else { + g_array_set_size(captures, i); + break; + } + } + } + + ret = TRUE; + + if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) { + /* We also ensure that the match is full */ + if (ovec[0] != 0 || (guint) ovec[1] < len) { + ret = FALSE; + } + } + } + + pcre2_match_data_free(match_data); + + return ret; +} +#endif + +const char * +rspamd_regexp_get_pattern(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->pattern; +} + +guint rspamd_regexp_set_flags(rspamd_regexp_t *re, guint new_flags) +{ + guint old_flags; + + g_assert(re != NULL); + old_flags = re->flags; + re->flags = new_flags; + + return old_flags; +} + +guint rspamd_regexp_get_flags(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->flags; +} + +guint rspamd_regexp_get_pcre_flags(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->pcre_flags; +} + +guint rspamd_regexp_get_maxhits(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->max_hits; +} + +guint rspamd_regexp_set_maxhits(rspamd_regexp_t *re, guint new_maxhits) +{ + guint old_hits; + + g_assert(re != NULL); + old_hits = re->max_hits; + re->max_hits = new_maxhits; + + return old_hits; +} + +guint64 +rspamd_regexp_get_cache_id(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->cache_id; +} + +guint64 +rspamd_regexp_set_cache_id(rspamd_regexp_t *re, guint64 id) +{ + guint64 old; + + g_assert(re != NULL); + old = re->cache_id; + re->cache_id = id; + + return old; +} + +gsize rspamd_regexp_get_match_limit(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->match_limit; +} + +gsize rspamd_regexp_set_match_limit(rspamd_regexp_t *re, gsize lim) +{ + gsize old; + + g_assert(re != NULL); + old = re->match_limit; + re->match_limit = lim; + + return old; +} + +gboolean +rspamd_regexp_match(const rspamd_regexp_t *re, const gchar *text, gsize len, + gboolean raw) +{ + const gchar *start = NULL, *end = NULL; + + g_assert(re != NULL); + g_assert(text != NULL); + + if (len == 0) { + len = strlen(text); + } + + if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) { + if (start == text && end == text + len) { + return TRUE; + } + } + + return FALSE; +} + +void rspamd_regexp_unref(rspamd_regexp_t *re) +{ + REF_RELEASE(re); +} + +rspamd_regexp_t * +rspamd_regexp_ref(rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + REF_RETAIN(re); + + return re; +} + +void rspamd_regexp_set_ud(rspamd_regexp_t *re, gpointer ud) +{ + g_assert(re != NULL); + + re->ud = ud; +} + +gpointer +rspamd_regexp_get_ud(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->ud; +} + +gboolean +rspamd_regexp_equal(gconstpointer a, gconstpointer b) +{ + const guchar *ia = a, *ib = b; + + return (memcmp(ia, ib, sizeof(regexp_id_t)) == 0); +} + +guint32 +rspamd_regexp_hash(gconstpointer a) +{ + const guchar *ia = a; + guint32 res; + + memcpy(&res, ia, sizeof(res)); + + return res; +} + +gboolean +rspamd_regexp_cmp(gconstpointer a, gconstpointer b) +{ + const guchar *ia = a, *ib = b; + + return memcmp(ia, ib, sizeof(regexp_id_t)); +} + +struct rspamd_regexp_cache * +rspamd_regexp_cache_new(void) +{ + struct rspamd_regexp_cache *ncache; + + ncache = g_malloc0(sizeof(*ncache)); + ncache->tbl = g_hash_table_new_full(rspamd_regexp_hash, rspamd_regexp_equal, + NULL, (GDestroyNotify) rspamd_regexp_unref); +#ifdef HAVE_PCRE_JIT +#ifdef WITH_PCRE2 + ncache->jstack = pcre2_jit_stack_create(32 * 1024, 1024 * 1024, NULL); +#else + ncache->jstack = pcre_jit_stack_alloc(32 * 1024, 1024 * 1024); +#endif +#endif + return ncache; +} + + +rspamd_regexp_t * +rspamd_regexp_cache_query(struct rspamd_regexp_cache *cache, + const gchar *pattern, + const gchar *flags) +{ + rspamd_regexp_t *res = NULL; + regexp_id_t id; + + if (cache == NULL) { + rspamd_regexp_library_init(NULL); + cache = global_re_cache; + } + + g_assert(cache != NULL); + rspamd_regexp_generate_id(pattern, flags, id); + + res = g_hash_table_lookup(cache->tbl, id); + + return res; +} + + +rspamd_regexp_t * +rspamd_regexp_cache_create(struct rspamd_regexp_cache *cache, + const gchar *pattern, + const gchar *flags, GError **err) +{ + rspamd_regexp_t *res; + + if (cache == NULL) { + rspamd_regexp_library_init(NULL); + cache = global_re_cache; + } + + g_assert(cache != NULL); + res = rspamd_regexp_cache_query(cache, pattern, flags); + + if (res != NULL) { + return res; + } + + res = rspamd_regexp_new(pattern, flags, err); + + if (res) { + /* REF_RETAIN (res); */ + if (g_hash_table_size(cache->tbl) < max_re_cache_size) { + g_hash_table_insert(cache->tbl, res->id, res); + } + else { + msg_warn("cannot insert regexp to the cache: maximum size is reached (%d expressions); " + "it might be cached regexp misuse; regexp pattern: %s", + max_re_cache_size, pattern); + } + } + + return res; +} + +gboolean +rspamd_regexp_cache_remove(struct rspamd_regexp_cache *cache, + rspamd_regexp_t *re) +{ + if (cache == NULL) { + cache = global_re_cache; + } + + g_assert(cache != NULL); + g_assert(re != NULL); + + return g_hash_table_remove(cache->tbl, re->id); +} + +void rspamd_regexp_cache_destroy(struct rspamd_regexp_cache *cache) +{ + if (cache != NULL) { + g_hash_table_destroy(cache->tbl); +#ifdef HAVE_PCRE_JIT +#ifdef WITH_PCRE2 + if (cache->jstack) { + pcre2_jit_stack_free(cache->jstack); + } +#else + if (cache->jstack) { + pcre_jit_stack_free(cache->jstack); + } +#endif +#endif + g_free(cache); + } +} + +RSPAMD_CONSTRUCTOR(rspamd_re_static_pool_ctor) +{ + global_re_cache = rspamd_regexp_cache_new(); +#ifdef WITH_PCRE2 + pcre2_ctx = pcre2_compile_context_create(NULL); + pcre2_set_newline(pcre2_ctx, PCRE_FLAG(NEWLINE_ANY)); +#endif +} + +RSPAMD_DESTRUCTOR(rspamd_re_static_pool_dtor) +{ + rspamd_regexp_cache_destroy(global_re_cache); +#ifdef WITH_PCRE2 + pcre2_compile_context_free(pcre2_ctx); +#endif +} + + +void rspamd_regexp_library_init(struct rspamd_config *cfg) +{ + if (cfg) { + if (cfg->disable_pcre_jit) { + can_jit = FALSE; + check_jit = FALSE; + } + else if (!can_jit) { + check_jit = TRUE; + } + } + + if (check_jit) { +#ifdef HAVE_PCRE_JIT + gint jit, rc; + gchar *str; + +#ifndef WITH_PCRE2 + rc = pcre_config(PCRE_CONFIG_JIT, &jit); +#else + rc = pcre2_config(PCRE2_CONFIG_JIT, &jit); +#endif + + if (rc == 0 && jit == 1) { +#ifndef WITH_PCRE2 +#ifdef PCRE_CONFIG_JITTARGET + pcre_config(PCRE_CONFIG_JITTARGET, &str); + msg_info("pcre is compiled with JIT for %s", str); +#else + msg_info("pcre is compiled with JIT for unknown target"); +#endif +#else + rc = pcre2_config(PCRE2_CONFIG_JITTARGET, NULL); + + if (rc > 0) { + str = g_alloca(rc); + pcre2_config(PCRE2_CONFIG_JITTARGET, str); + msg_info("pcre2 is compiled with JIT for %s", str); + } + else { + msg_info("pcre2 is compiled with JIT for unknown"); + } + +#endif /* WITH_PCRE2 */ + + if (getenv("VALGRIND") == NULL) { + can_jit = TRUE; + } + else { + msg_info("disabling PCRE jit as it does not play well with valgrind"); + can_jit = FALSE; + } + } + else { + msg_info("pcre is compiled without JIT support, so many optimizations" + " are impossible"); + can_jit = FALSE; + } +#else + msg_info("pcre is too old and has no JIT support, so many optimizations" + " are impossible"); + can_jit = FALSE; +#endif + check_jit = FALSE; + } +} + +gpointer +rspamd_regexp_get_id(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return (gpointer) re->id; +} + +gpointer +rspamd_regexp_get_class(const rspamd_regexp_t *re) +{ + g_assert(re != NULL); + + return re->re_class; +} + +gpointer +rspamd_regexp_set_class(rspamd_regexp_t *re, gpointer re_class) +{ + gpointer old_class; + + g_assert(re != NULL); + + old_class = re->re_class; + re->re_class = re_class; + + return old_class; +} + +rspamd_regexp_t * +rspamd_regexp_from_glob(const gchar *gl, gsize sz, GError **err) +{ + GString *out; + rspamd_regexp_t *re; + const gchar *end; + gboolean escaping = FALSE; + gint nbraces = 0; + + g_assert(gl != NULL); + + if (sz == 0) { + sz = strlen(gl); + } + + end = gl + sz; + out = g_string_sized_new(sz + 2); + g_string_append_c(out, '^'); + + while (gl < end) { + switch (*gl) { + case '*': + if (escaping) { + g_string_append(out, "\\*"); + } + else { + g_string_append(out, ".*"); + } + + escaping = FALSE; + break; + case '?': + if (escaping) { + g_string_append(out, "\\?"); + } + else { + g_string_append(out, "."); + } + + escaping = FALSE; + break; + case '.': + case '(': + case ')': + case '+': + case '|': + case '^': + case '$': + case '@': + case '%': + g_string_append_c(out, '\\'); + g_string_append_c(out, *gl); + escaping = FALSE; + break; + case '\\': + if (escaping) { + g_string_append(out, "\\\\"); + escaping = FALSE; + } + else { + escaping = TRUE; + } + break; + case '{': + if (escaping) { + g_string_append(out, "\\{"); + } + else { + g_string_append_c(out, '('); + nbraces++; + } + + escaping = FALSE; + break; + case '}': + if (nbraces > 0 && !escaping) { + g_string_append_c(out, ')'); + nbraces--; + } + else if (escaping) { + g_string_append(out, "\\}"); + } + else { + g_string_append(out, "}"); + } + + escaping = FALSE; + break; + case ',': + if (nbraces > 0 && !escaping) { + g_string_append_c(out, '|'); + } + else if (escaping) { + g_string_append(out, "\\,"); + } + else { + g_string_append_c(out, ','); + } + + break; + default: + escaping = FALSE; + g_string_append_c(out, *gl); + break; + } + + gl++; + } + + g_string_append_c(out, '$'); + re = rspamd_regexp_new(out->str, "i", err); + g_string_free(out, TRUE); + + return re; +} diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h new file mode 100644 index 0000000..6222ba6 --- /dev/null +++ b/src/libutil/regexp.h @@ -0,0 +1,276 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef REGEXP_H_ +#define REGEXP_H_ + +#include "config.h" + +#ifndef WITH_PCRE2 +#define PCRE_FLAG(x) G_PASTE(PCRE_, x) +#else +#ifndef PCRE2_CODE_UNIT_WIDTH +#define PCRE2_CODE_UNIT_WIDTH 8 +#endif +#define PCRE_FLAG(x) G_PASTE(PCRE2_, x) +#endif + +#define RSPAMD_INVALID_ID ((guint64) -1LL) +#define RSPAMD_REGEXP_FLAG_RAW (1 << 1) +#define RSPAMD_REGEXP_FLAG_NOOPT (1 << 2) +#define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3) +#define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4) +#define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5) +#define RSPAMD_REGEXP_FLAG_UTF (1 << 6) +#define RSPAMD_REGEXP_FLAG_LEFTMOST (1 << 7) + + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_config; + +typedef struct rspamd_regexp_s rspamd_regexp_t; +struct rspamd_regexp_cache; +struct rspamd_re_capture { + const char *p; + gsize len; +}; + +/** + * Create new rspamd regexp + * @param pattern regexp pattern + * @param flags flags (may be enclosed inside pattern) + * @param err error pointer set if compilation failed + * @return new regexp object + */ +rspamd_regexp_t *rspamd_regexp_new(const gchar *pattern, const gchar *flags, + GError **err); + +/** + * Create new rspamd regexp + * @param pattern regexp pattern + * @param flags flags (may be enclosed inside pattern) + * @param err error pointer set if compilation failed + * @return new regexp object + */ +rspamd_regexp_t *rspamd_regexp_new_len(const gchar *pattern, gsize len, const gchar *flags, + GError **err); + +/** + * Search the specified regexp in the text + * @param re + * @param text + * @param len + * @param start position of start of match + * @param start position of end of match + * @param raw + * @param captures array of captured strings of type rspamd_fstring_capture or NULL + * @return + */ +gboolean rspamd_regexp_search(const rspamd_regexp_t *re, + const gchar *text, gsize len, + const gchar **start, const gchar **end, gboolean raw, + GArray *captures); + + +/** + * Exact match of the specified text against the regexp + * @param re + * @param text + * @param len + * @return + */ +gboolean rspamd_regexp_match(const rspamd_regexp_t *re, + const gchar *text, gsize len, gboolean raw); + +/** + * Increase refcount for a regexp object + */ +rspamd_regexp_t *rspamd_regexp_ref(rspamd_regexp_t *re); + +/** + * Unref regexp object + * @param re + */ +void rspamd_regexp_unref(rspamd_regexp_t *re); + +/** + * Set auxiliary userdata for the specified regexp + * @param re regexp object + * @param ud opaque pointer + */ +void rspamd_regexp_set_ud(rspamd_regexp_t *re, gpointer ud); + +/** + * Get userdata for a regexp object + * @param re regexp object + * @return opaque pointer + */ +gpointer rspamd_regexp_get_ud(const rspamd_regexp_t *re); + +/** + * Get regexp ID suitable for hashing + * @param re + * @return + */ +gpointer rspamd_regexp_get_id(const rspamd_regexp_t *re); + +/** + * Get pattern for the specified regexp object + * @param re + * @return + */ +const char *rspamd_regexp_get_pattern(const rspamd_regexp_t *re); + +/** + * Get PCRE flags for the regexp + */ +guint rspamd_regexp_get_pcre_flags(const rspamd_regexp_t *re); + +/** + * Get rspamd flags for the regexp + */ +guint rspamd_regexp_get_flags(const rspamd_regexp_t *re); + +/** + * Set rspamd flags for the regexp + */ +guint rspamd_regexp_set_flags(rspamd_regexp_t *re, guint new_flags); + +/** + * Set regexp maximum hits + */ +guint rspamd_regexp_get_maxhits(const rspamd_regexp_t *re); + +/** + * Get regexp maximum hits + */ +guint rspamd_regexp_set_maxhits(rspamd_regexp_t *re, guint new_maxhits); + +/** + * Returns cache id for a regexp + */ +guint64 rspamd_regexp_get_cache_id(const rspamd_regexp_t *re); + +/** + * Sets cache id for a regexp + */ +guint64 rspamd_regexp_set_cache_id(rspamd_regexp_t *re, guint64 id); + +/** + * Returns match limit for a regexp + */ +gsize rspamd_regexp_get_match_limit(const rspamd_regexp_t *re); + +/** + * Sets cache id for a regexp + */ +gsize rspamd_regexp_set_match_limit(rspamd_regexp_t *re, gsize lim); + +/** + * Get regexp class for the re object + */ +gpointer rspamd_regexp_get_class(const rspamd_regexp_t *re); + +/** + * Set regexp class for the re object + * @return old re class value + */ +gpointer rspamd_regexp_set_class(rspamd_regexp_t *re, gpointer re_class); + +/** + * Create new regexp cache + * @return + */ +struct rspamd_regexp_cache *rspamd_regexp_cache_new(void); + +/** + * Query rspamd cache for a specified regexp + * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe) + * @param pattern + * @param flags + * @return + */ +rspamd_regexp_t *rspamd_regexp_cache_query(struct rspamd_regexp_cache *cache, + const gchar *pattern, + const gchar *flags); + +/** + * Create or get cached regexp from the specified cache + * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe) + * @param pattern regexp pattern + * @param flags flags (may be enclosed inside pattern) + * @param err error pointer set if compilation failed + * @return new regexp object + */ +rspamd_regexp_t *rspamd_regexp_cache_create(struct rspamd_regexp_cache *cache, + const gchar *pattern, + const gchar *flags, GError **err); + +/** + * Remove regexp from the cache + * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe) + * @param re re to remove + * @return TRUE if a regexp has been removed + */ +gboolean rspamd_regexp_cache_remove(struct rspamd_regexp_cache *cache, + rspamd_regexp_t *re); + +/** + * Destroy regexp cache and unref all elements inside it + * @param cache + */ +void rspamd_regexp_cache_destroy(struct rspamd_regexp_cache *cache); + +/** + * Return the value for regexp hash based on its ID + * @param a + * @return + */ +guint32 rspamd_regexp_hash(gconstpointer a); + +/** + * Compare two regexp objects based on theirs ID + * @param a + * @param b + * @return + */ +gboolean rspamd_regexp_equal(gconstpointer a, gconstpointer b); + +/** + * Acts like memcmp but for regexp + */ +gint rspamd_regexp_cmp(gconstpointer a, gconstpointer b); + +/** + * Initialize superglobal regexp cache and library + */ +void rspamd_regexp_library_init(struct rspamd_config *cfg); + +/** + * Create regexp from glob + * @param gl + * @param err + * @return + */ +rspamd_regexp_t *rspamd_regexp_from_glob(const gchar *gl, gsize sz, GError **err); + +#ifdef __cplusplus +} +#endif + +#endif /* REGEXP_H_ */ diff --git a/src/libutil/rrd.c b/src/libutil/rrd.c new file mode 100644 index 0000000..451e222 --- /dev/null +++ b/src/libutil/rrd.c @@ -0,0 +1,1502 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "rrd.h" +#include "util.h" +#include "cfg_file.h" +#include "logger.h" +#include "unix-std.h" +#include "cryptobox.h" +#include <math.h> + +#define RSPAMD_RRD_DS_COUNT METRIC_ACTION_MAX +#define RSPAMD_RRD_OLD_DS_COUNT 4 +#define RSPAMD_RRD_RRA_COUNT 4 + +#define msg_err_rrd(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "rrd", file->id, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_rrd(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ + "rrd", file->id, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_rrd(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "rrd", file->id, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_rrd(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_rrd_log_id, "rrd", file->id, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(rrd) + +static GQuark +rrd_error_quark(void) +{ + return g_quark_from_static_string("rrd-error"); +} + +/** + * Convert rrd dst type from string to numeric value + */ +enum rrd_dst_type +rrd_dst_from_string(const gchar *str) +{ + if (g_ascii_strcasecmp(str, "counter") == 0) { + return RRD_DST_COUNTER; + } + else if (g_ascii_strcasecmp(str, "absolute") == 0) { + return RRD_DST_ABSOLUTE; + } + else if (g_ascii_strcasecmp(str, "gauge") == 0) { + return RRD_DST_GAUGE; + } + else if (g_ascii_strcasecmp(str, "cdef") == 0) { + return RRD_DST_CDEF; + } + else if (g_ascii_strcasecmp(str, "derive") == 0) { + return RRD_DST_DERIVE; + } + + return RRD_DST_INVALID; +} + +/** + * Convert numeric presentation of dst to string + */ +const gchar * +rrd_dst_to_string(enum rrd_dst_type type) +{ + switch (type) { + case RRD_DST_COUNTER: + return "COUNTER"; + case RRD_DST_ABSOLUTE: + return "ABSOLUTE"; + case RRD_DST_GAUGE: + return "GAUGE"; + case RRD_DST_CDEF: + return "CDEF"; + case RRD_DST_DERIVE: + return "DERIVE"; + default: + return "U"; + } + + return "U"; +} + +/** + * Convert rrd consolidation function type from string to numeric value + */ +enum rrd_cf_type +rrd_cf_from_string(const gchar *str) +{ + if (g_ascii_strcasecmp(str, "average") == 0) { + return RRD_CF_AVERAGE; + } + else if (g_ascii_strcasecmp(str, "minimum") == 0) { + return RRD_CF_MINIMUM; + } + else if (g_ascii_strcasecmp(str, "maximum") == 0) { + return RRD_CF_MAXIMUM; + } + else if (g_ascii_strcasecmp(str, "last") == 0) { + return RRD_CF_LAST; + } + /* XXX: add other CF functions supported by rrd */ + + return RRD_CF_INVALID; +} + +/** + * Convert numeric presentation of cf to string + */ +const gchar * +rrd_cf_to_string(enum rrd_cf_type type) +{ + switch (type) { + case RRD_CF_AVERAGE: + return "AVERAGE"; + case RRD_CF_MINIMUM: + return "MINIMUM"; + case RRD_CF_MAXIMUM: + return "MAXIMUM"; + case RRD_CF_LAST: + return "LAST"; + default: + return "U"; + } + + /* XXX: add other CF functions supported by rrd */ + + return "U"; +} + +void rrd_make_default_rra(const gchar *cf_name, + gulong pdp_cnt, + gulong rows, + struct rrd_rra_def *rra) +{ + g_assert(cf_name != NULL); + g_assert(rrd_cf_from_string(cf_name) != RRD_CF_INVALID); + + rra->pdp_cnt = pdp_cnt; + rra->row_cnt = rows; + rspamd_strlcpy(rra->cf_nam, cf_name, sizeof(rra->cf_nam)); + memset(rra->par, 0, sizeof(rra->par)); + rra->par[RRA_cdp_xff_val].dv = 0.5; +} + +void rrd_make_default_ds(const gchar *name, + const gchar *type, + gulong pdp_step, + struct rrd_ds_def *ds) +{ + g_assert(name != NULL); + g_assert(type != NULL); + g_assert(rrd_dst_from_string(type) != RRD_DST_INVALID); + + rspamd_strlcpy(ds->ds_nam, name, sizeof(ds->ds_nam)); + rspamd_strlcpy(ds->dst, type, sizeof(ds->dst)); + memset(ds->par, 0, sizeof(ds->par)); + ds->par[RRD_DS_mrhb_cnt].lv = pdp_step * 2; + ds->par[RRD_DS_min_val].dv = NAN; + ds->par[RRD_DS_max_val].dv = NAN; +} + +/** + * Check rrd file for correctness (size, cookies, etc) + */ +static gboolean +rspamd_rrd_check_file(const gchar *filename, gboolean need_data, GError **err) +{ + gint fd, i; + struct stat st; + struct rrd_file_head head; + struct rrd_rra_def rra; + gint head_size; + + fd = open(filename, O_RDWR); + if (fd == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd open error: %s", strerror(errno)); + return FALSE; + } + + if (fstat(fd, &st) == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd stat error: %s", strerror(errno)); + close(fd); + return FALSE; + } + if (st.st_size < (goffset) sizeof(struct rrd_file_head)) { + /* We have trimmed file */ + g_set_error(err, rrd_error_quark(), EINVAL, "rrd size is bad: %ud", + (guint) st.st_size); + close(fd); + return FALSE; + } + + /* Try to read header */ + if (read(fd, &head, sizeof(head)) != sizeof(head)) { + g_set_error(err, + rrd_error_quark(), errno, "rrd read head error: %s", + strerror(errno)); + close(fd); + return FALSE; + } + /* Check magic */ + if (memcmp(head.version, RRD_VERSION, sizeof(head.version)) != 0) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd head error: bad cookie"); + close(fd); + return FALSE; + } + if (head.float_cookie != RRD_FLOAT_COOKIE) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd head error: another architecture " + "(file cookie %g != our cookie %g)", + head.float_cookie, RRD_FLOAT_COOKIE); + close(fd); + return FALSE; + } + /* Check for other params */ + if (head.ds_cnt <= 0 || head.rra_cnt <= 0) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd head cookies error: bad rra or ds count"); + close(fd); + return FALSE; + } + /* Now we can calculate the overall size of rrd */ + head_size = sizeof(struct rrd_file_head) + + sizeof(struct rrd_ds_def) * head.ds_cnt + + sizeof(struct rrd_rra_def) * head.rra_cnt + + sizeof(struct rrd_live_head) + + sizeof(struct rrd_pdp_prep) * head.ds_cnt + + sizeof(struct rrd_cdp_prep) * head.ds_cnt * head.rra_cnt + + sizeof(struct rrd_rra_ptr) * head.rra_cnt; + if (st.st_size < (goffset) head_size) { + g_set_error(err, + rrd_error_quark(), errno, "rrd file seems to have stripped header: %d", + head_size); + close(fd); + return FALSE; + } + + if (need_data) { + /* Now check rra */ + if (lseek(fd, sizeof(struct rrd_ds_def) * head.ds_cnt, + SEEK_CUR) == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd head lseek error: %s", + strerror(errno)); + close(fd); + return FALSE; + } + for (i = 0; i < (gint) head.rra_cnt; i++) { + if (read(fd, &rra, sizeof(rra)) != sizeof(rra)) { + g_set_error(err, + rrd_error_quark(), errno, "rrd read rra error: %s", + strerror(errno)); + close(fd); + return FALSE; + } + head_size += rra.row_cnt * head.ds_cnt * sizeof(gdouble); + } + + if (st.st_size != head_size) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd file seems to have incorrect size: %d, must be %d", + (gint) st.st_size, head_size); + close(fd); + return FALSE; + } + } + + close(fd); + return TRUE; +} + +/** + * Adjust pointers in mmapped rrd file + * @param file + */ +static void +rspamd_rrd_adjust_pointers(struct rspamd_rrd_file *file, gboolean completed) +{ + guint8 *ptr; + + ptr = file->map; + file->stat_head = (struct rrd_file_head *) ptr; + ptr += sizeof(struct rrd_file_head); + file->ds_def = (struct rrd_ds_def *) ptr; + ptr += sizeof(struct rrd_ds_def) * file->stat_head->ds_cnt; + file->rra_def = (struct rrd_rra_def *) ptr; + ptr += sizeof(struct rrd_rra_def) * file->stat_head->rra_cnt; + file->live_head = (struct rrd_live_head *) ptr; + ptr += sizeof(struct rrd_live_head); + file->pdp_prep = (struct rrd_pdp_prep *) ptr; + ptr += sizeof(struct rrd_pdp_prep) * file->stat_head->ds_cnt; + file->cdp_prep = (struct rrd_cdp_prep *) ptr; + ptr += sizeof(struct rrd_cdp_prep) * file->stat_head->rra_cnt * + file->stat_head->ds_cnt; + file->rra_ptr = (struct rrd_rra_ptr *) ptr; + if (completed) { + ptr += sizeof(struct rrd_rra_ptr) * file->stat_head->rra_cnt; + file->rrd_value = (gdouble *) ptr; + } + else { + file->rrd_value = NULL; + } +} + +static void +rspamd_rrd_calculate_checksum(struct rspamd_rrd_file *file) +{ + guchar sigbuf[rspamd_cryptobox_HASHBYTES]; + struct rrd_ds_def *ds; + guint i; + rspamd_cryptobox_hash_state_t st; + + if (file->finalized) { + rspamd_cryptobox_hash_init(&st, NULL, 0); + rspamd_cryptobox_hash_update(&st, file->filename, strlen(file->filename)); + + for (i = 0; i < file->stat_head->ds_cnt; i++) { + ds = &file->ds_def[i]; + rspamd_cryptobox_hash_update(&st, ds->ds_nam, sizeof(ds->ds_nam)); + } + + rspamd_cryptobox_hash_final(&st, sigbuf); + + file->id = rspamd_encode_base32(sigbuf, sizeof(sigbuf), RSPAMD_BASE32_DEFAULT); + } +} + +static int +rspamd_rrd_open_exclusive(const gchar *filename) +{ + struct timespec sleep_ts = { + .tv_sec = 0, + .tv_nsec = 1000000}; + gint fd; + + fd = open(filename, O_RDWR); + + if (fd == -1) { + return -1; + } + + for (;;) { + if (rspamd_file_lock(fd, TRUE) == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + nanosleep(&sleep_ts, NULL); + continue; + } + else { + close(fd); + return -1; + } + } + else { + break; + } + } + + return fd; +}; + +/** + * Open completed or incompleted rrd file + * @param filename + * @param completed + * @param err + * @return + */ +static struct rspamd_rrd_file * +rspamd_rrd_open_common(const gchar *filename, gboolean completed, GError **err) +{ + struct rspamd_rrd_file *file; + gint fd; + struct stat st; + + if (!rspamd_rrd_check_file(filename, completed, err)) { + return NULL; + } + + file = g_malloc0(sizeof(struct rspamd_rrd_file)); + + /* Open file */ + fd = rspamd_rrd_open_exclusive(filename); + if (fd == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd open error: %s", strerror(errno)); + g_free(file); + return FALSE; + } + + if (fstat(fd, &st) == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd stat error: %s", strerror(errno)); + rspamd_file_unlock(fd, FALSE); + g_free(file); + close(fd); + return FALSE; + } + /* Mmap file */ + file->size = st.st_size; + if ((file->map = + mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0)) == MAP_FAILED) { + + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), ENOMEM, "mmap failed: %s", strerror(errno)); + g_free(file); + return NULL; + } + + file->fd = fd; + + /* Adjust pointers */ + rspamd_rrd_adjust_pointers(file, completed); + + /* Mark it as finalized */ + file->finalized = completed; + + file->filename = g_strdup(filename); + rspamd_rrd_calculate_checksum(file); + + return file; +} + +/** + * Open (and mmap) existing RRD file + * @param filename path + * @param err error pointer + * @return rrd file structure + */ +struct rspamd_rrd_file * +rspamd_rrd_open(const gchar *filename, GError **err) +{ + struct rspamd_rrd_file *file; + + if ((file = rspamd_rrd_open_common(filename, TRUE, err))) { + msg_info_rrd("rrd file opened: %s", filename); + } + + return file; +} + +/** + * Create basic header for rrd file + * @param filename file path + * @param ds_count number of data sources + * @param rra_count number of round robin archives + * @param pdp_step step of primary data points + * @param err error pointer + * @return TRUE if file has been created + */ +struct rspamd_rrd_file * +rspamd_rrd_create(const gchar *filename, + gulong ds_count, + gulong rra_count, + gulong pdp_step, + gdouble initial_ticks, + GError **err) +{ + struct rspamd_rrd_file *new; + struct rrd_file_head head; + struct rrd_ds_def ds; + struct rrd_rra_def rra; + struct rrd_live_head lh; + struct rrd_pdp_prep pdp; + struct rrd_cdp_prep cdp; + struct rrd_rra_ptr rra_ptr; + gint fd; + guint i, j; + + /* Open file */ + fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd create error: %s", + strerror(errno)); + return NULL; + } + + rspamd_file_lock(fd, FALSE); + + /* Fill header */ + memset(&head, 0, sizeof(head)); + head.rra_cnt = rra_count; + head.ds_cnt = ds_count; + head.pdp_step = pdp_step; + memcpy(head.cookie, RRD_COOKIE, sizeof(head.cookie)); + memcpy(head.version, RRD_VERSION, sizeof(head.version)); + head.float_cookie = RRD_FLOAT_COOKIE; + + if (write(fd, &head, sizeof(head)) != sizeof(head)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", strerror(errno)); + return NULL; + } + + /* Fill DS section */ + memset(&ds, 0, sizeof(ds)); + memset(&ds.ds_nam, 0, sizeof(ds.ds_nam)); + memcpy(&ds.dst, "COUNTER", sizeof("COUNTER")); + memset(&ds.par, 0, sizeof(ds.par)); + for (i = 0; i < ds_count; i++) { + if (write(fd, &ds, sizeof(ds)) != sizeof(ds)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", + strerror(errno)); + return NULL; + } + } + + /* Fill RRA section */ + memset(&rra, 0, sizeof(rra)); + memcpy(&rra.cf_nam, "AVERAGE", sizeof("AVERAGE")); + rra.pdp_cnt = 1; + memset(&rra.par, 0, sizeof(rra.par)); + for (i = 0; i < rra_count; i++) { + if (write(fd, &rra, sizeof(rra)) != sizeof(rra)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", + strerror(errno)); + return NULL; + } + } + + /* Fill live header */ + memset(&lh, 0, sizeof(lh)); + lh.last_up = (glong) initial_ticks; + lh.last_up_usec = (glong) ((initial_ticks - lh.last_up) * 1e6f); + + if (write(fd, &lh, sizeof(lh)) != sizeof(lh)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", strerror(errno)); + return NULL; + } + + /* Fill pdp prep */ + memset(&pdp, 0, sizeof(pdp)); + memcpy(&pdp.last_ds, "U", sizeof("U")); + memset(&pdp.scratch, 0, sizeof(pdp.scratch)); + pdp.scratch[PDP_val].dv = NAN; + pdp.scratch[PDP_unkn_sec_cnt].lv = 0; + + for (i = 0; i < ds_count; i++) { + if (write(fd, &pdp, sizeof(pdp)) != sizeof(pdp)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", + strerror(errno)); + return NULL; + } + } + + /* Fill cdp prep */ + memset(&cdp, 0, sizeof(cdp)); + memset(&cdp.scratch, 0, sizeof(cdp.scratch)); + cdp.scratch[CDP_val].dv = NAN; + cdp.scratch[CDP_unkn_pdp_cnt].lv = 0; + + for (i = 0; i < rra_count; i++) { + for (j = 0; j < ds_count; j++) { + if (write(fd, &cdp, sizeof(cdp)) != sizeof(cdp)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", + strerror(errno)); + return NULL; + } + } + } + + /* Set row pointers */ + memset(&rra_ptr, 0, sizeof(rra_ptr)); + for (i = 0; i < rra_count; i++) { + if (write(fd, &rra_ptr, sizeof(rra_ptr)) != sizeof(rra_ptr)) { + rspamd_file_unlock(fd, FALSE); + close(fd); + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", + strerror(errno)); + return NULL; + } + } + + rspamd_file_unlock(fd, FALSE); + close(fd); + + new = rspamd_rrd_open_common(filename, FALSE, err); + + return new; +} + +/** + * Add data sources to rrd file + * @param filename path to file + * @param ds array of struct rrd_ds_def + * @param err error pointer + * @return TRUE if data sources were added + */ +gboolean +rspamd_rrd_add_ds(struct rspamd_rrd_file *file, GArray *ds, GError **err) +{ + + if (file == NULL || file->stat_head->ds_cnt * sizeof(struct rrd_ds_def) != + ds->len) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd add ds failed: wrong arguments"); + return FALSE; + } + + /* Straightforward memcpy */ + memcpy(file->ds_def, ds->data, ds->len); + + return TRUE; +} + +/** + * Add round robin archives to rrd file + * @param filename path to file + * @param ds array of struct rrd_rra_def + * @param err error pointer + * @return TRUE if archives were added + */ +gboolean +rspamd_rrd_add_rra(struct rspamd_rrd_file *file, GArray *rra, GError **err) +{ + if (file == NULL || file->stat_head->rra_cnt * + sizeof(struct rrd_rra_def) != + rra->len) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd add rra failed: wrong arguments"); + return FALSE; + } + + /* Straightforward memcpy */ + memcpy(file->rra_def, rra->data, rra->len); + + return TRUE; +} + +/** + * Finalize rrd file header and initialize all RRA in the file + * @param filename file path + * @param err error pointer + * @return TRUE if rrd file is ready for use + */ +gboolean +rspamd_rrd_finalize(struct rspamd_rrd_file *file, GError **err) +{ + gint fd; + guint i; + gint count = 0; + gdouble vbuf[1024]; + struct stat st; + + if (file == NULL || file->filename == NULL || file->fd == -1) { + g_set_error(err, + rrd_error_quark(), EINVAL, "rrd add rra failed: wrong arguments"); + return FALSE; + } + + fd = file->fd; + + if (lseek(fd, 0, SEEK_END) == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd seek error: %s", strerror(errno)); + close(fd); + return FALSE; + } + + /* Adjust CDP */ + for (i = 0; i < file->stat_head->rra_cnt; i++) { + file->cdp_prep->scratch[CDP_unkn_pdp_cnt].lv = 0; + /* Randomize row pointer (disabled) */ + /* file->rra_ptr->cur_row = g_random_int () % file->rra_def[i].row_cnt; */ + file->rra_ptr->cur_row = file->rra_def[i].row_cnt - 1; + /* Calculate values count */ + count += file->rra_def[i].row_cnt * file->stat_head->ds_cnt; + } + + munmap(file->map, file->size); + /* Write values */ + for (i = 0; i < G_N_ELEMENTS(vbuf); i++) { + vbuf[i] = NAN; + } + + while (count > 0) { + /* Write values in buffered matter */ + if (write(fd, vbuf, + MIN((gint) G_N_ELEMENTS(vbuf), count) * sizeof(gdouble)) == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd write error: %s", + strerror(errno)); + close(fd); + return FALSE; + } + count -= G_N_ELEMENTS(vbuf); + } + + if (fstat(fd, &st) == -1) { + g_set_error(err, + rrd_error_quark(), errno, "rrd stat error: %s", strerror(errno)); + close(fd); + return FALSE; + } + + /* Mmap again */ + file->size = st.st_size; + if ((file->map = + mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, + 0)) == MAP_FAILED) { + close(fd); + g_set_error(err, + rrd_error_quark(), ENOMEM, "mmap failed: %s", strerror(errno)); + + return FALSE; + } + + /* Adjust pointers */ + rspamd_rrd_adjust_pointers(file, TRUE); + + file->finalized = TRUE; + rspamd_rrd_calculate_checksum(file); + msg_info_rrd("rrd file created: %s", file->filename); + + return TRUE; +} + +/** + * Update pdp_prep data + * @param file rrd file + * @param vals new values + * @param pdp_new new pdp array + * @param interval time elapsed from the last update + * @return + */ +static gboolean +rspamd_rrd_update_pdp_prep(struct rspamd_rrd_file *file, + gdouble *vals, + gdouble *pdp_new, + gdouble interval) +{ + guint i; + enum rrd_dst_type type; + + for (i = 0; i < file->stat_head->ds_cnt; i++) { + type = rrd_dst_from_string(file->ds_def[i].dst); + + if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv < interval) { + rspamd_strlcpy(file->pdp_prep[i].last_ds, "U", + sizeof(file->pdp_prep[i].last_ds)); + pdp_new[i] = NAN; + msg_debug_rrd("adding unknown point interval %.3f is less than heartbeat %l", + interval, file->ds_def[i].par[RRD_DS_mrhb_cnt].lv); + } + else { + switch (type) { + case RRD_DST_COUNTER: + case RRD_DST_DERIVE: + if (file->pdp_prep[i].last_ds[0] == 'U') { + pdp_new[i] = NAN; + msg_debug_rrd("last point is NaN for point %ud", i); + } + else { + pdp_new[i] = vals[i] - strtod(file->pdp_prep[i].last_ds, + NULL); + msg_debug_rrd("new PDP %ud, %.3f", i, pdp_new[i]); + } + break; + case RRD_DST_GAUGE: + pdp_new[i] = vals[i] * interval; + msg_debug_rrd("new PDP %ud, %.3f", i, pdp_new[i]); + break; + case RRD_DST_ABSOLUTE: + pdp_new[i] = vals[i]; + msg_debug_rrd("new PDP %ud, %.3f", i, pdp_new[i]); + break; + default: + return FALSE; + } + } + + /* Copy value to the last_ds */ + if (!isnan(vals[i])) { + rspamd_snprintf(file->pdp_prep[i].last_ds, + sizeof(file->pdp_prep[i].last_ds), "%.4f", vals[i]); + } + else { + file->pdp_prep[i].last_ds[0] = 'U'; + file->pdp_prep[i].last_ds[1] = '\0'; + } + } + + + return TRUE; +} + +/** + * Update step for this pdp + * @param file + * @param pdp_new new pdp array + * @param pdp_temp temp pdp array + * @param interval time till last update + * @param pre_int pre interval + * @param post_int post intervall + * @param pdp_diff time till last pdp update + */ +static void +rspamd_rrd_update_pdp_step(struct rspamd_rrd_file *file, + gdouble *pdp_new, + gdouble *pdp_temp, + gdouble interval, + gulong pdp_diff) +{ + guint i; + rrd_value_t *scratch; + gulong heartbeat; + + + for (i = 0; i < file->stat_head->ds_cnt; i++) { + scratch = file->pdp_prep[i].scratch; + heartbeat = file->ds_def[i].par[RRD_DS_mrhb_cnt].lv; + + if (!isnan(pdp_new[i])) { + if (isnan(scratch[PDP_val].dv)) { + scratch[PDP_val].dv = 0; + } + } + + /* Check interval value for heartbeat for this DS */ + if ((interval > heartbeat) || + (file->stat_head->pdp_step / 2.0 < scratch[PDP_unkn_sec_cnt].lv)) { + pdp_temp[i] = NAN; + } + else { + pdp_temp[i] = scratch[PDP_val].dv / + ((double) (pdp_diff - scratch[PDP_unkn_sec_cnt].lv)); + } + + if (isnan(pdp_new[i])) { + scratch[PDP_unkn_sec_cnt].lv = interval; + scratch[PDP_val].dv = NAN; + } + else { + scratch[PDP_unkn_sec_cnt].lv = 0; + scratch[PDP_val].dv = pdp_new[i] / interval; + } + + msg_debug_rrd("new temp PDP %ud, %.3f -> %.3f, scratch: %3f", + i, pdp_new[i], pdp_temp[i], + scratch[PDP_val].dv); + } +} + +/** + * Update CDP for this rra + * @param file rrd file + * @param pdp_steps how much pdp steps elapsed from the last update + * @param pdp_offset offset from pdp + * @param rra_steps how much steps must be updated for this rra + * @param rra_index index of desired rra + * @param pdp_temp temporary pdp points + */ +static void +rspamd_rrd_update_cdp(struct rspamd_rrd_file *file, + gdouble pdp_steps, + gdouble pdp_offset, + gulong *rra_steps, + gulong rra_index, + gdouble *pdp_temp) +{ + guint i; + struct rrd_rra_def *rra; + rrd_value_t *scratch; + enum rrd_cf_type cf; + gdouble last_cdp = INFINITY, cur_cdp = INFINITY; + gulong pdp_in_cdp; + + rra = &file->rra_def[rra_index]; + cf = rrd_cf_from_string(rra->cf_nam); + + /* Iterate over all DS for this RRA */ + for (i = 0; i < file->stat_head->ds_cnt; i++) { + /* Get CDP for this RRA and DS */ + scratch = + file->cdp_prep[rra_index * file->stat_head->ds_cnt + i].scratch; + if (rra->pdp_cnt > 1) { + /* Do we have any CDP to update for this rra ? */ + if (rra_steps[rra_index] > 0) { + + if (isnan(pdp_temp[i])) { + /* New pdp is nan */ + /* Increment unknown points count */ + scratch[CDP_unkn_pdp_cnt].lv += pdp_offset; + /* Reset secondary value */ + scratch[CDP_secondary_val].dv = NAN; + } + else { + scratch[CDP_secondary_val].dv = pdp_temp[i]; + } + + /* Check XFF for this rra */ + if (scratch[CDP_unkn_pdp_cnt].lv > rra->pdp_cnt * + rra->par[RRA_cdp_xff_val].lv) { + /* XFF is reached */ + scratch[CDP_primary_val].dv = NAN; + } + else { + /* Need to initialize CDP using specified consolidation */ + switch (cf) { + case RRD_CF_AVERAGE: + last_cdp = + isnan(scratch[CDP_val].dv) ? 0.0 : scratch[CDP_val].dv; + cur_cdp = isnan(pdp_temp[i]) ? 0.0 : pdp_temp[i]; + scratch[CDP_primary_val].dv = + (last_cdp + cur_cdp * + pdp_offset) / + (rra->pdp_cnt - scratch[CDP_unkn_pdp_cnt].lv); + break; + case RRD_CF_MAXIMUM: + last_cdp = + isnan(scratch[CDP_val].dv) ? -INFINITY : scratch[CDP_val].dv; + cur_cdp = isnan(pdp_temp[i]) ? -INFINITY : pdp_temp[i]; + scratch[CDP_primary_val].dv = MAX(last_cdp, cur_cdp); + break; + case RRD_CF_MINIMUM: + last_cdp = + isnan(scratch[CDP_val].dv) ? INFINITY : scratch[CDP_val].dv; + cur_cdp = isnan(pdp_temp[i]) ? INFINITY : pdp_temp[i]; + scratch[CDP_primary_val].dv = MIN(last_cdp, cur_cdp); + break; + case RRD_CF_LAST: + default: + scratch[CDP_primary_val].dv = pdp_temp[i]; + last_cdp = INFINITY; + break; + } + } + + /* Init carry of this CDP */ + pdp_in_cdp = (pdp_steps - pdp_offset) / rra->pdp_cnt; + if (pdp_in_cdp == 0 || isnan(pdp_temp[i])) { + /* Set overflow */ + switch (cf) { + case RRD_CF_AVERAGE: + scratch[CDP_val].dv = 0; + break; + case RRD_CF_MAXIMUM: + scratch[CDP_val].dv = -INFINITY; + break; + case RRD_CF_MINIMUM: + scratch[CDP_val].dv = INFINITY; + break; + default: + scratch[CDP_val].dv = NAN; + break; + } + } + else { + /* Special carry for average */ + if (cf == RRD_CF_AVERAGE) { + scratch[CDP_val].dv = pdp_temp[i] * pdp_in_cdp; + } + else { + scratch[CDP_val].dv = pdp_temp[i]; + } + } + + scratch[CDP_unkn_pdp_cnt].lv = 0; + + msg_debug_rrd("update cdp for DS %d with value %.3f, " + "stored value: %.3f, carry: %.3f", + i, last_cdp, + scratch[CDP_primary_val].dv, scratch[CDP_val].dv); + } + /* In this case we just need to update cdp_prep for this RRA */ + else { + if (isnan(pdp_temp[i])) { + /* Just increase undefined zone */ + scratch[CDP_unkn_pdp_cnt].lv += pdp_steps; + } + else { + /* Calculate cdp value */ + last_cdp = scratch[CDP_val].dv; + switch (cf) { + case RRD_CF_AVERAGE: + if (isnan(last_cdp)) { + scratch[CDP_val].dv = pdp_temp[i] * pdp_steps; + } + else { + scratch[CDP_val].dv = last_cdp + pdp_temp[i] * + pdp_steps; + } + break; + case RRD_CF_MAXIMUM: + scratch[CDP_val].dv = MAX(last_cdp, pdp_temp[i]); + break; + case RRD_CF_MINIMUM: + scratch[CDP_val].dv = MIN(last_cdp, pdp_temp[i]); + break; + case RRD_CF_LAST: + scratch[CDP_val].dv = pdp_temp[i]; + break; + default: + scratch[CDP_val].dv = NAN; + break; + } + } + + msg_debug_rrd("aggregate cdp %d with pdp %.3f, " + "stored value: %.3f", + i, pdp_temp[i], scratch[CDP_val].dv); + } + } + else { + /* We have nothing to consolidate, but we may miss some pdp */ + if (pdp_steps > 2) { + /* Just write PDP value */ + scratch[CDP_primary_val].dv = pdp_temp[i]; + scratch[CDP_secondary_val].dv = pdp_temp[i]; + } + } + } +} + +/** + * Update RRA in a file + * @param file rrd file + * @param rra_steps steps for each rra + * @param now current time + */ +void rspamd_rrd_write_rra(struct rspamd_rrd_file *file, gulong *rra_steps) +{ + guint i, j, ds_cnt; + struct rrd_rra_def *rra; + struct rrd_cdp_prep *cdp; + gdouble *rra_row = file->rrd_value, *cur_row; + + + ds_cnt = file->stat_head->ds_cnt; + /* Iterate over all RRA */ + for (i = 0; i < file->stat_head->rra_cnt; i++) { + rra = &file->rra_def[i]; + + if (rra_steps[i] > 0) { + + /* Move row ptr */ + if (++file->rra_ptr[i].cur_row >= rra->row_cnt) { + file->rra_ptr[i].cur_row = 0; + } + /* Calculate seek */ + cdp = &file->cdp_prep[ds_cnt * i]; + cur_row = rra_row + ds_cnt * file->rra_ptr[i].cur_row; + /* Iterate over DS */ + for (j = 0; j < ds_cnt; j++) { + cur_row[j] = cdp[j].scratch[CDP_primary_val].dv; + msg_debug_rrd("write cdp %d: %.3f", j, cur_row[j]); + } + } + + rra_row += rra->row_cnt * ds_cnt; + } +} + +/** + * Add record to rrd file + * @param file rrd file object + * @param points points (must be row suitable for this RRA, depending on ds count) + * @param err error pointer + * @return TRUE if a row has been added + */ +gboolean +rspamd_rrd_add_record(struct rspamd_rrd_file *file, + GArray *points, + gdouble ticks, + GError **err) +{ + gdouble interval, *pdp_new, *pdp_temp; + guint i; + glong seconds, microseconds; + gulong pdp_steps, cur_pdp_count, prev_pdp_step, cur_pdp_step, + prev_pdp_age, cur_pdp_age, *rra_steps, pdp_offset; + + if (file == NULL || file->stat_head->ds_cnt * sizeof(gdouble) != + points->len) { + g_set_error(err, + rrd_error_quark(), EINVAL, + "rrd add points failed: wrong arguments"); + return FALSE; + } + + /* Get interval */ + seconds = (glong) ticks; + microseconds = (glong) ((ticks - seconds) * 1000000.); + interval = ticks - ((gdouble) file->live_head->last_up + + file->live_head->last_up_usec / 1000000.); + + msg_debug_rrd("update rrd record after %.3f seconds", interval); + + /* Update PDP preparation values */ + pdp_new = g_malloc0(sizeof(gdouble) * file->stat_head->ds_cnt); + pdp_temp = g_malloc0(sizeof(gdouble) * file->stat_head->ds_cnt); + /* How much steps need to be updated in each RRA */ + rra_steps = g_malloc0(sizeof(gulong) * file->stat_head->rra_cnt); + + if (!rspamd_rrd_update_pdp_prep(file, (gdouble *) points->data, pdp_new, + interval)) { + g_set_error(err, + rrd_error_quark(), EINVAL, + "rrd update pdp failed: wrong arguments"); + g_free(pdp_new); + g_free(pdp_temp); + g_free(rra_steps); + return FALSE; + } + + /* Calculate elapsed steps */ + /* Age in seconds for previous pdp store */ + prev_pdp_age = file->live_head->last_up % file->stat_head->pdp_step; + /* Time in seconds for last pdp update */ + prev_pdp_step = file->live_head->last_up - prev_pdp_age; + /* Age in seconds from current time to required pdp time */ + cur_pdp_age = seconds % file->stat_head->pdp_step; + /* Time of desired pdp step */ + cur_pdp_step = seconds - cur_pdp_age; + cur_pdp_count = cur_pdp_step / file->stat_head->pdp_step; + pdp_steps = (cur_pdp_step - prev_pdp_step) / file->stat_head->pdp_step; + + + if (pdp_steps == 0) { + /* Simple update of pdp prep */ + for (i = 0; i < file->stat_head->ds_cnt; i++) { + if (isnan(pdp_new[i])) { + /* Increment unknown period */ + file->pdp_prep[i].scratch[PDP_unkn_sec_cnt].lv += floor( + interval); + } + else { + if (isnan(file->pdp_prep[i].scratch[PDP_val].dv)) { + /* Reset pdp to the current value */ + file->pdp_prep[i].scratch[PDP_val].dv = pdp_new[i]; + } + else { + /* Increment pdp value */ + file->pdp_prep[i].scratch[PDP_val].dv += pdp_new[i]; + } + } + } + } + else { + /* Complex update of PDP, CDP and RRA */ + + /* Update PDP for this step */ + rspamd_rrd_update_pdp_step(file, + pdp_new, + pdp_temp, + interval, + pdp_steps * file->stat_head->pdp_step); + + + /* Update CDP points for each RRA*/ + for (i = 0; i < file->stat_head->rra_cnt; i++) { + /* Calculate pdp offset for this RRA */ + pdp_offset = file->rra_def[i].pdp_cnt - cur_pdp_count % + file->rra_def[i].pdp_cnt; + /* How much steps we got for this RRA */ + if (pdp_offset <= pdp_steps) { + rra_steps[i] = + (pdp_steps - pdp_offset) / file->rra_def[i].pdp_cnt + 1; + } + else { + /* This rra have not passed enough pdp steps */ + rra_steps[i] = 0; + } + + msg_debug_rrd("cdp: %ud, rra steps: %ul(%ul), pdp steps: %ul", + i, rra_steps[i], pdp_offset, pdp_steps); + + /* Update this specific CDP */ + rspamd_rrd_update_cdp(file, + pdp_steps, + pdp_offset, + rra_steps, + i, + pdp_temp); + } + + /* Write RRA */ + rspamd_rrd_write_rra(file, rra_steps); + } + file->live_head->last_up = seconds; + file->live_head->last_up_usec = microseconds; + + /* Sync and invalidate */ + msync(file->map, file->size, MS_ASYNC | MS_INVALIDATE); + + g_free(pdp_new); + g_free(pdp_temp); + g_free(rra_steps); + + return TRUE; +} + +/** + * Close rrd file + * @param file + * @return + */ +gint rspamd_rrd_close(struct rspamd_rrd_file *file) +{ + if (file == NULL) { + errno = EINVAL; + return -1; + } + + munmap(file->map, file->size); + close(file->fd); + g_free(file->filename); + g_free(file->id); + + g_free(file); + + return 0; +} + +static struct rspamd_rrd_file * +rspamd_rrd_create_file(const gchar *path, gboolean finalize, GError **err) +{ + struct rspamd_rrd_file *file; + struct rrd_ds_def ds[RSPAMD_RRD_DS_COUNT]; + struct rrd_rra_def rra[RSPAMD_RRD_RRA_COUNT]; + gint i; + GArray ar; + + /* Try to create new rrd file */ + + file = rspamd_rrd_create(path, RSPAMD_RRD_DS_COUNT, RSPAMD_RRD_RRA_COUNT, + 1, rspamd_get_calendar_ticks(), err); + + if (file == NULL) { + return NULL; + } + + /* Create DS and RRA */ + + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i++) { + rrd_make_default_ds(rspamd_action_to_str(i), + rrd_dst_to_string(RRD_DST_COUNTER), 1, &ds[i]); + } + + ar.data = (gchar *) ds; + ar.len = sizeof(ds); + + if (!rspamd_rrd_add_ds(file, &ar, err)) { + rspamd_rrd_close(file); + return NULL; + } + + /* Once per minute for 1 day */ + rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE), + 60, 24 * 60, &rra[0]); + /* Once per 5 minutes for 1 week */ + rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE), + 5 * 60, 7 * 24 * 60 / 5, &rra[1]); + /* Once per 10 mins for 1 month */ + rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE), + 60 * 10, 30 * 24 * 6, &rra[2]); + /* Once per hour for 1 year */ + rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE), + 60 * 60, 365 * 24, &rra[3]); + ar.data = (gchar *) rra; + ar.len = sizeof(rra); + + if (!rspamd_rrd_add_rra(file, &ar, err)) { + rspamd_rrd_close(file); + return NULL; + } + + if (finalize && !rspamd_rrd_finalize(file, err)) { + rspamd_rrd_close(file); + return NULL; + } + + return file; +} + +static void +rspamd_rrd_convert_ds(struct rspamd_rrd_file *old, + struct rspamd_rrd_file *cur, gint idx_old, gint idx_new) +{ + struct rrd_pdp_prep *pdp_prep_old, *pdp_prep_new; + struct rrd_cdp_prep *cdp_prep_old, *cdp_prep_new; + gdouble *val_old, *val_new; + gulong rra_cnt, i, j, points_cnt, old_ds, new_ds; + + rra_cnt = old->stat_head->rra_cnt; + pdp_prep_old = &old->pdp_prep[idx_old]; + pdp_prep_new = &cur->pdp_prep[idx_new]; + memcpy(pdp_prep_new, pdp_prep_old, sizeof(*pdp_prep_new)); + val_old = old->rrd_value; + val_new = cur->rrd_value; + old_ds = old->stat_head->ds_cnt; + new_ds = cur->stat_head->ds_cnt; + + for (i = 0; i < rra_cnt; i++) { + cdp_prep_old = &old->cdp_prep[i * old_ds] + idx_old; + cdp_prep_new = &cur->cdp_prep[i * new_ds] + idx_new; + memcpy(cdp_prep_new, cdp_prep_old, sizeof(*cdp_prep_new)); + points_cnt = old->rra_def[i].row_cnt; + + for (j = 0; j < points_cnt; j++) { + val_new[j * new_ds + idx_new] = val_old[j * old_ds + idx_old]; + } + + val_new += points_cnt * new_ds; + val_old += points_cnt * old_ds; + } +} + +static struct rspamd_rrd_file * +rspamd_rrd_convert(const gchar *path, struct rspamd_rrd_file *old, + GError **err) +{ + struct rspamd_rrd_file *rrd; + gchar tpath[PATH_MAX]; + + g_assert(old != NULL); + + rspamd_snprintf(tpath, sizeof(tpath), "%s.new", path); + rrd = rspamd_rrd_create_file(tpath, TRUE, err); + + if (rrd) { + /* Copy old data */ + memcpy(rrd->live_head, old->live_head, sizeof(*rrd->live_head)); + memcpy(rrd->rra_ptr, old->rra_ptr, + sizeof(*old->rra_ptr) * rrd->stat_head->rra_cnt); + + /* + * Old DSes: + * 0 - spam -> reject + * 1 - probable spam -> add header + * 2 - greylist -> greylist + * 3 - ham -> ham + */ + rspamd_rrd_convert_ds(old, rrd, 0, METRIC_ACTION_REJECT); + rspamd_rrd_convert_ds(old, rrd, 1, METRIC_ACTION_ADD_HEADER); + rspamd_rrd_convert_ds(old, rrd, 2, METRIC_ACTION_GREYLIST); + rspamd_rrd_convert_ds(old, rrd, 3, METRIC_ACTION_NOACTION); + + if (unlink(path) == -1) { + g_set_error(err, rrd_error_quark(), errno, "cannot unlink old rrd file %s: %s", + path, strerror(errno)); + unlink(tpath); + rspamd_rrd_close(rrd); + + return NULL; + } + + if (rename(tpath, path) == -1) { + g_set_error(err, rrd_error_quark(), errno, "cannot rename old rrd file %s: %s", + path, strerror(errno)); + unlink(tpath); + rspamd_rrd_close(rrd); + + return NULL; + } + } + + return rrd; +} + +struct rspamd_rrd_file * +rspamd_rrd_file_default(const gchar *path, + GError **err) +{ + struct rspamd_rrd_file *file, *nf; + + g_assert(path != NULL); + + if (access(path, R_OK) != -1) { + /* We can open rrd file */ + file = rspamd_rrd_open(path, err); + + if (file == NULL) { + return NULL; + } + + + if (file->stat_head->rra_cnt != RSPAMD_RRD_RRA_COUNT) { + msg_err_rrd("rrd file is not suitable for rspamd: it has " + "%ul ds and %ul rra", + file->stat_head->ds_cnt, + file->stat_head->rra_cnt); + g_set_error(err, rrd_error_quark(), EINVAL, "bad rrd file"); + rspamd_rrd_close(file); + + return NULL; + } + else if (file->stat_head->ds_cnt == RSPAMD_RRD_OLD_DS_COUNT) { + /* Old rrd, need to convert */ + msg_info_rrd("rrd file %s is not suitable for rspamd, convert it", + path); + + nf = rspamd_rrd_convert(path, file, err); + rspamd_rrd_close(file); + + return nf; + } + else if (file->stat_head->ds_cnt == RSPAMD_RRD_DS_COUNT) { + return file; + } + else { + msg_err_rrd("rrd file is not suitable for rspamd: it has " + "%ul ds and %ul rra", + file->stat_head->ds_cnt, + file->stat_head->rra_cnt); + g_set_error(err, rrd_error_quark(), EINVAL, "bad rrd file"); + rspamd_rrd_close(file); + + return NULL; + } + } + + file = rspamd_rrd_create_file(path, TRUE, err); + + return file; +} + +struct rspamd_rrd_query_result * +rspamd_rrd_query(struct rspamd_rrd_file *file, + gulong rra_num) +{ + struct rspamd_rrd_query_result *res; + struct rrd_rra_def *rra; + const gdouble *rra_offset = NULL; + guint i; + + g_assert(file != NULL); + + + if (rra_num > file->stat_head->rra_cnt) { + msg_err_rrd("requested unexisting rra: %l", rra_num); + + return NULL; + } + + res = g_malloc0(sizeof(*res)); + res->ds_count = file->stat_head->ds_cnt; + res->last_update = (gdouble) file->live_head->last_up + + ((gdouble) file->live_head->last_up_usec / 1e6f); + res->pdp_per_cdp = file->rra_def[rra_num].pdp_cnt; + res->rra_rows = file->rra_def[rra_num].row_cnt; + rra_offset = file->rrd_value; + + for (i = 0; i < file->stat_head->rra_cnt; i++) { + rra = &file->rra_def[i]; + + if (i == rra_num) { + res->cur_row = file->rra_ptr[i].cur_row % rra->row_cnt; + break; + } + + rra_offset += rra->row_cnt * res->ds_count; + } + + res->data = rra_offset; + + return res; +} diff --git a/src/libutil/rrd.h b/src/libutil/rrd.h new file mode 100644 index 0000000..3d81477 --- /dev/null +++ b/src/libutil/rrd.h @@ -0,0 +1,362 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RRD_H_ +#define RRD_H_ + +#include "config.h" + +/** + * This file contains basic structure and functions to operate with round-robin databases + */ + +#define RRD_COOKIE "RRD" +#define RRD_VERSION "0003" +#define RRD_FLOAT_COOKIE ((double) 8.642135E130) + +#ifdef __cplusplus +extern "C" { +#endif + +typedef union { + unsigned long lv; + double dv; +} rrd_value_t; + +struct rrd_file_head { + /* Data Base Identification Section ** */ + gchar cookie[4]; /* RRD */ + gchar version[5]; /* version of the format */ + gdouble float_cookie; /* is it the correct double representation ? */ + + /* Data Base Structure Definition **** */ + gulong ds_cnt; /* how many different ds provide input to the rrd */ + gulong rra_cnt; /* how many rras will be maintained in the rrd */ + gulong pdp_step; /* pdp interval in seconds */ + + rrd_value_t par[10]; /* global parameters ... unused + at the moment */ +}; + +enum rrd_dst_type { + RRD_DST_INVALID = -1, + RRD_DST_COUNTER = 0, /* data source types available */ + RRD_DST_ABSOLUTE, + RRD_DST_GAUGE, + RRD_DST_DERIVE, + RRD_DST_CDEF +}; +enum rrd_ds_param { + RRD_DS_mrhb_cnt = 0, /* minimum required heartbeat */ + RRD_DS_min_val, /* the processed input of a ds must */ + RRD_DS_max_val, /* be between max_val and min_val + * both can be set to UNKNOWN if you + * do not care. Data outside the limits + * set to UNKNOWN */ + RRD_DS_cdef = RRD_DS_mrhb_cnt +}; /* pointer to encoded rpn expression only applies to DST_CDEF */ + + +/* The magic number here is one less than DS_NAM_SIZE */ +#define RRD_DS_NAM_SIZE 20 + +#define RRD_DST_SIZE 20 + +struct rrd_ds_def { + gchar ds_nam[RRD_DS_NAM_SIZE]; /* Name of the data source (null terminated) */ + gchar dst[RRD_DST_SIZE]; /* Type of data source (null terminated) */ + rrd_value_t par[10]; /* index of this array see ds_param_en */ +}; + +/* RRA definition */ + +enum rrd_cf_type { + RRD_CF_INVALID = -1, + RRD_CF_AVERAGE = 0, /* data consolidation functions */ + RRD_CF_MINIMUM, + RRD_CF_MAXIMUM, + RRD_CF_LAST, +}; + + +#define MAX_RRA_PAR_EN 10 + +enum rrd_rra_param { + RRA_cdp_xff_val = 0, /* what part of the consolidated + * datapoint must be known, to produce a + * valid entry in the rra */ +}; + + +#define RRD_CF_NAM_SIZE 20 + +struct rrd_rra_def { + gchar cf_nam[RRD_CF_NAM_SIZE]; /* consolidation function (null term) */ + gulong row_cnt; /* number of entries in the store */ + gulong pdp_cnt; /* how many primary data points are + * required for a consolidated data point?*/ + rrd_value_t par[MAX_RRA_PAR_EN]; /* index see rra_param_en */ +}; + +struct rrd_live_head { + time_t last_up; /* when was rrd last updated */ + glong last_up_usec; /* micro seconds part of the update timestamp. Always >= 0 */ +}; + +#define RRD_LAST_DS_LEN 30 + +enum rrd_pdp_param { + PDP_unkn_sec_cnt = 0, /* how many seconds of the current + * pdp value is unknown data? */ + PDP_val +}; /* current value of the pdp. + this depends on dst */ + +struct rrd_pdp_prep { + gchar last_ds[RRD_LAST_DS_LEN]; /* the last reading from the data + * source. this is stored in ASCII + * to cater for very large counters + * we might encounter in connection + * with SNMP. */ + rrd_value_t scratch[10]; /* contents according to pdp_par_en */ +}; + +#define RRD_MAX_CDP_PAR_EN 10 +#define RRD_MAX_CDP_FAILURES_IDX 8 +/* max CDP scratch entries avail to record violations for a FAILURES RRA */ +#define RRD_MAX_FAILURES_WINDOW_LEN 28 + +enum rrd_cdp_param { + CDP_val = 0, + /* the base_interval is always an + * average */ + CDP_unkn_pdp_cnt, + /* how many unknown pdp were + * integrated. This and the cdp_xff + * will decide if this is going to + * be a UNKNOWN or a valid value */ + CDP_hw_intercept, + /* Current intercept coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_last_intercept, + /* Last iteration intercept coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_slope, + /* Current slope coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_last_slope, + /* Last iteration slope coefficient. */ + CDP_null_count, + /* Number of sequential Unknown (DNAN) values + 1 preceding + * the current prediction. + * */ + CDP_last_null_count, + /* Last iteration count of Unknown (DNAN) values. */ + CDP_primary_val = 8, + /* optimization for bulk updates: the value of the first CDP + * value to be written in the bulk update. */ + CDP_secondary_val = 9, + /* optimization for bulk updates: the value of subsequent + * CDP values to be written in the bulk update. */ + CDP_hw_seasonal = CDP_hw_intercept, + /* Current seasonal coefficient for the Holt-Winters + * prediction algorithm. This is stored in CDP prep to avoid + * redundant seek operations. */ + CDP_hw_last_seasonal = CDP_hw_last_intercept, + /* Last iteration seasonal coefficient. */ + CDP_seasonal_deviation = CDP_hw_intercept, + CDP_last_seasonal_deviation = CDP_hw_last_intercept, + CDP_init_seasonal = CDP_null_count +}; + +struct rrd_cdp_prep { + rrd_value_t scratch[RRD_MAX_CDP_PAR_EN]; + /* contents according to cdp_par_en * + * init state should be NAN */ +}; + +struct rrd_rra_ptr { + gulong cur_row; /* current row in the rra */ +}; + +/* Final rrd file structure */ +struct rspamd_rrd_file { + struct rrd_file_head *stat_head; /* the static header */ + struct rrd_ds_def *ds_def; /* list of data source definitions */ + struct rrd_rra_def *rra_def; /* list of round robin archive def */ + struct rrd_live_head *live_head; /* rrd v >= 3 last_up with us */ + struct rrd_pdp_prep *pdp_prep; /* pdp data prep area */ + struct rrd_cdp_prep *cdp_prep; /* cdp prep area */ + struct rrd_rra_ptr *rra_ptr; /* list of rra pointers */ + gdouble *rrd_value; /* list of rrd values */ + + gchar *filename; + guint8 *map; /* mmapped area */ + gsize size; /* its size */ + gboolean finalized; + gchar *id; + gint fd; +}; + + +/* Public API */ + +/** + * Open (and mmap) existing RRD file + * @param filename path + * @param err error pointer + * @return rrd file structure + */ +struct rspamd_rrd_file *rspamd_rrd_open(const gchar *filename, GError **err); + +/** + * Create basic header for rrd file + * @param filename file path + * @param ds_count number of data sources + * @param rra_count number of round robin archives + * @param pdp_step step of primary data points + * @param err error pointer + * @return TRUE if file has been created + */ +struct rspamd_rrd_file *rspamd_rrd_create(const gchar *filename, + gulong ds_count, + gulong rra_count, + gulong pdp_step, + gdouble initial_ticks, + GError **err); + +/** + * Add data sources to rrd file + * @param filename path to file + * @param ds array of struct rrd_ds_def + * @param err error pointer + * @return TRUE if data sources were added + */ +gboolean rspamd_rrd_add_ds(struct rspamd_rrd_file *file, + GArray *ds, + GError **err); + +/** + * Add round robin archives to rrd file + * @param filename path to file + * @param ds array of struct rrd_rra_def + * @param err error pointer + * @return TRUE if archives were added + */ +gboolean rspamd_rrd_add_rra(struct rspamd_rrd_file *file, + GArray *rra, + GError **err); + +/** + * Finalize rrd file header and initialize all RRA in the file + * @param filename file path + * @param err error pointer + * @return TRUE if rrd file is ready for use + */ +gboolean rspamd_rrd_finalize(struct rspamd_rrd_file *file, GError **err); + +/** + * Add record to rrd file + * @param file rrd file object + * @param points points (must be row suitable for this RRA, depending on ds count) + * @param err error pointer + * @return TRUE if a row has been added + */ +gboolean rspamd_rrd_add_record(struct rspamd_rrd_file *file, + GArray *points, + gdouble ticks, + GError **err); + +/** + * Close rrd file + * @param file + * @return + */ +gint rspamd_rrd_close(struct rspamd_rrd_file *file); + +/* + * Conversion functions + */ + +/** + * Convert rrd dst type from string to numeric value + */ +enum rrd_dst_type rrd_dst_from_string(const gchar *str); + +/** + * Convert numeric presentation of dst to string + */ +const gchar *rrd_dst_to_string(enum rrd_dst_type type); + +/** + * Convert rrd consolidation function type from string to numeric value + */ +enum rrd_cf_type rrd_cf_from_string(const gchar *str); + +/** + * Convert numeric presentation of cf to string + */ +const gchar *rrd_cf_to_string(enum rrd_cf_type type); + +/* Default RRA and DS */ + +/** + * Create default RRA + */ +void rrd_make_default_rra(const gchar *cf_name, + gulong pdp_cnt, + gulong rows, + struct rrd_rra_def *rra); + +/** + * Create default DS + */ +void rrd_make_default_ds(const gchar *name, + const gchar *type, + gulong pdp_step, + struct rrd_ds_def *ds); + +/** + * Open or create the default rspamd rrd file + */ +struct rspamd_rrd_file *rspamd_rrd_file_default(const gchar *path, + GError **err); + +/** + * Returned by querying rrd database + */ +struct rspamd_rrd_query_result { + gulong rra_rows; + gulong pdp_per_cdp; + gulong ds_count; + gdouble last_update; + gulong cur_row; + const gdouble *data; +}; + +/** + * Return RRA data + * @param file rrd file + * @param rra_num number of rra to return data for + * @return query result structure, that should be freed (using g_slice_free1) after usage + */ +struct rspamd_rrd_query_result *rspamd_rrd_query(struct rspamd_rrd_file *file, + gulong rra_num); + +#ifdef __cplusplus +} +#endif + +#endif /* RRD_H_ */ diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c new file mode 100644 index 0000000..42d5168 --- /dev/null +++ b/src/libutil/shingles.c @@ -0,0 +1,412 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "shingles.h" +#include "fstring.h" +#include "cryptobox.h" +#include "images.h" +#include "libstat/stat_api.h" + +#define SHINGLES_WINDOW 3 +#define SHINGLES_KEY_SIZE rspamd_cryptobox_SIPKEYBYTES + +static guint +rspamd_shingles_keys_hash(gconstpointer k) +{ + return rspamd_cryptobox_fast_hash(k, SHINGLES_KEY_SIZE, + rspamd_hash_seed()); +} + +static gboolean +rspamd_shingles_keys_equal(gconstpointer k1, gconstpointer k2) +{ + return (memcmp(k1, k2, SHINGLES_KEY_SIZE) == 0); +} + +static void +rspamd_shingles_keys_free(gpointer p) +{ + guchar **k = p; + guint i; + + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + g_free(k[i]); + } + + g_free(k); +} + +static guchar ** +rspamd_shingles_keys_new(void) +{ + guchar **k; + guint i; + + k = g_malloc0(sizeof(guchar *) * RSPAMD_SHINGLE_SIZE); + + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + k[i] = g_malloc0(sizeof(guchar) * SHINGLES_KEY_SIZE); + } + + return k; +} + +static guchar ** +rspamd_shingles_get_keys_cached(const guchar key[SHINGLES_KEY_SIZE]) +{ + static GHashTable *ht = NULL; + guchar **keys = NULL, *key_cpy; + rspamd_cryptobox_hash_state_t bs; + const guchar *cur_key; + guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key; + guint i; + + if (ht == NULL) { + ht = g_hash_table_new_full(rspamd_shingles_keys_hash, + rspamd_shingles_keys_equal, g_free, rspamd_shingles_keys_free); + } + else { + keys = g_hash_table_lookup(ht, key); + } + + if (keys == NULL) { + keys = rspamd_shingles_keys_new(); + key_cpy = g_malloc(SHINGLES_KEY_SIZE); + memcpy(key_cpy, key, SHINGLES_KEY_SIZE); + + /* Generate keys */ + rspamd_cryptobox_hash_init(&bs, NULL, 0); + cur_key = key; + + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + /* + * To generate a set of hashes we just apply sha256 to the + * initial key as many times as many hashes are required and + * xor left and right parts of sha256 to get a single 16 bytes SIP key. + */ + out_key = keys[i]; + rspamd_cryptobox_hash_update(&bs, cur_key, 16); + rspamd_cryptobox_hash_final(&bs, shabuf); + + memcpy(out_key, shabuf, 16); + rspamd_cryptobox_hash_init(&bs, NULL, 0); + cur_key = out_key; + } + + g_hash_table_insert(ht, key_cpy, keys); + } + + return keys; +} + +struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops") + rspamd_shingles_from_text(GArray *input, + const guchar key[16], + rspamd_mempool_t *pool, + rspamd_shingles_filter filter, + gpointer filterd, + enum rspamd_shingle_alg alg) +{ + struct rspamd_shingle *res; + guint64 **hashes; + guchar **keys; + rspamd_fstring_t *row; + rspamd_stat_token_t *word; + guint64 val; + gint i, j, k; + gsize hlen, ilen = 0, beg = 0, widx = 0; + enum rspamd_cryptobox_fast_hash_type ht; + + if (pool != NULL) { + res = rspamd_mempool_alloc(pool, sizeof(*res)); + } + else { + res = g_malloc(sizeof(*res)); + } + + row = rspamd_fstring_sized_new(256); + + for (i = 0; i < input->len; i++) { + word = &g_array_index(input, rspamd_stat_token_t, i); + + if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0)) { + ilen++; + } + } + + /* Init hashes pipes and keys */ + hashes = g_malloc(sizeof(*hashes) * RSPAMD_SHINGLE_SIZE); + hlen = ilen > SHINGLES_WINDOW ? (ilen - SHINGLES_WINDOW + 1) : 1; + keys = rspamd_shingles_get_keys_cached(key); + + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + hashes[i] = g_malloc(hlen * sizeof(guint64)); + } + + /* Now parse input words into a vector of hashes using rolling window */ + if (alg == RSPAMD_SHINGLES_OLD) { + for (i = 0; i <= (gint) ilen; i++) { + if (i - beg >= SHINGLES_WINDOW || i == (gint) ilen) { + for (j = beg; j < i; j++) { + + word = NULL; + while (widx < input->len) { + word = &g_array_index(input, rspamd_stat_token_t, widx); + + if ((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0) { + widx++; + } + else { + break; + } + } + + if (word == NULL) { + /* Nothing but exceptions */ + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + g_free(hashes[i]); + } + + g_free(hashes); + + if (pool == NULL) { + g_free(res); + } + + rspamd_fstring_free(row); + + return NULL; + } + + row = rspamd_fstring_append(row, word->stemmed.begin, + word->stemmed.len); + } + + /* Now we need to create a new row here */ + for (j = 0; j < RSPAMD_SHINGLE_SIZE; j++) { + rspamd_cryptobox_siphash((guchar *) &val, row->str, row->len, + keys[j]); + g_assert(hlen > beg); + hashes[j][beg] = val; + } + + beg++; + widx++; + + row = rspamd_fstring_assign(row, "", 0); + } + } + } + else { + guint64 window[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE], seed; + + switch (alg) { + case RSPAMD_SHINGLES_XXHASH: + ht = RSPAMD_CRYPTOBOX_XXHASH64; + break; + case RSPAMD_SHINGLES_MUMHASH: + ht = RSPAMD_CRYPTOBOX_MUMHASH; + break; + default: + ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT; + break; + } + + memset(window, 0, sizeof(window)); + for (i = 0; i <= ilen; i++) { + if (i - beg >= SHINGLES_WINDOW || i == ilen) { + + for (j = 0; j < RSPAMD_SHINGLE_SIZE; j++) { + /* Shift hashes window to right */ + for (k = 0; k < SHINGLES_WINDOW - 1; k++) { + window[j * SHINGLES_WINDOW + k] = + window[j * SHINGLES_WINDOW + k + 1]; + } + + word = NULL; + + while (widx < input->len) { + word = &g_array_index(input, rspamd_stat_token_t, widx); + + if ((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0) { + widx++; + } + else { + break; + } + } + + if (word == NULL) { + /* Nothing but exceptions */ + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + g_free(hashes[i]); + } + + if (pool == NULL) { + g_free(res); + } + + g_free(hashes); + rspamd_fstring_free(row); + + return NULL; + } + + /* Insert the last element to the pipe */ + memcpy(&seed, keys[j], sizeof(seed)); + window[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] = + rspamd_cryptobox_fast_hash_specific(ht, + word->stemmed.begin, word->stemmed.len, + seed); + val = 0; + for (k = 0; k < SHINGLES_WINDOW; k++) { + val ^= window[j * SHINGLES_WINDOW + k] >> + (8 * (SHINGLES_WINDOW - k - 1)); + } + + g_assert(hlen > beg); + hashes[j][beg] = val; + } + + beg++; + widx++; + } + } + } + + /* Now we need to filter all hashes and make a shingles result */ + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + res->hashes[i] = filter(hashes[i], hlen, + i, key, filterd); + g_free(hashes[i]); + } + + g_free(hashes); + + rspamd_fstring_free(row); + + return res; +} + +struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops") + rspamd_shingles_from_image(guchar *dct, + const guchar key[16], + rspamd_mempool_t *pool, + rspamd_shingles_filter filter, + gpointer filterd, + enum rspamd_shingle_alg alg) +{ + struct rspamd_shingle *shingle; + guint64 **hashes; + guchar **keys; + guint64 d; + guint64 val; + gint i, j; + gsize hlen, beg = 0; + enum rspamd_cryptobox_fast_hash_type ht; + guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE], seed; + + if (pool != NULL) { + shingle = rspamd_mempool_alloc(pool, sizeof(*shingle)); + } + else { + shingle = g_malloc(sizeof(*shingle)); + } + + /* Init hashes pipes and keys */ + hashes = g_malloc(sizeof(*hashes) * RSPAMD_SHINGLE_SIZE); + hlen = RSPAMD_DCT_LEN / NBBY + 1; + keys = rspamd_shingles_get_keys_cached(key); + + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + hashes[i] = g_malloc(hlen * sizeof(guint64)); + } + + switch (alg) { + case RSPAMD_SHINGLES_OLD: + ht = RSPAMD_CRYPTOBOX_MUMHASH; + break; + case RSPAMD_SHINGLES_XXHASH: + ht = RSPAMD_CRYPTOBOX_XXHASH64; + break; + case RSPAMD_SHINGLES_MUMHASH: + ht = RSPAMD_CRYPTOBOX_MUMHASH; + break; + default: + ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT; + break; + } + + memset(res, 0, sizeof(res)); +#define INNER_CYCLE_SHINGLES(s, e) \ + for (j = (s); j < (e); j++) { \ + d = dct[beg]; \ + memcpy(&seed, keys[j], sizeof(seed)); \ + val = rspamd_cryptobox_fast_hash_specific(ht, \ + &d, sizeof(d), \ + seed); \ + hashes[j][beg] = val; \ + } + for (i = 0; i < RSPAMD_DCT_LEN / NBBY; i++) { + INNER_CYCLE_SHINGLES(0, RSPAMD_SHINGLE_SIZE / 4); + INNER_CYCLE_SHINGLES(RSPAMD_SHINGLE_SIZE / 4, RSPAMD_SHINGLE_SIZE / 2); + INNER_CYCLE_SHINGLES(RSPAMD_SHINGLE_SIZE / 2, 3 * RSPAMD_SHINGLE_SIZE / 4); + INNER_CYCLE_SHINGLES(3 * RSPAMD_SHINGLE_SIZE / 4, RSPAMD_SHINGLE_SIZE); + + beg++; + } +#undef INNER_CYCLE_SHINGLES + /* Now we need to filter all hashes and make a shingles result */ + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + shingle->hashes[i] = filter(hashes[i], hlen, + i, key, filterd); + g_free(hashes[i]); + } + + g_free(hashes); + + return shingle; +} + +guint64 +rspamd_shingles_default_filter(guint64 *input, gsize count, + gint shno, const guchar *key, gpointer ud) +{ + guint64 minimal = G_MAXUINT64; + gsize i; + + for (i = 0; i < count; i++) { + if (minimal > input[i]) { + minimal = input[i]; + } + } + + return minimal; +} + + +gdouble rspamd_shingles_compare(const struct rspamd_shingle *a, + const struct rspamd_shingle *b) +{ + gint i, common = 0; + + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + if (a->hashes[i] == b->hashes[i]) { + common++; + } + } + + return (gdouble) common / (gdouble) RSPAMD_SHINGLE_SIZE; +} diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h new file mode 100644 index 0000000..9a0ca69 --- /dev/null +++ b/src/libutil/shingles.h @@ -0,0 +1,101 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SHINGLES_H_ +#define SHINGLES_H_ + +#include "config.h" +#include "mem_pool.h" + +#define RSPAMD_SHINGLE_SIZE 32 + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_shingle { + guint64 hashes[RSPAMD_SHINGLE_SIZE]; +}; + +enum rspamd_shingle_alg { + RSPAMD_SHINGLES_OLD = 0, + RSPAMD_SHINGLES_XXHASH, + RSPAMD_SHINGLES_MUMHASH, + RSPAMD_SHINGLES_FAST +}; + +/** + * Shingles filtering function + * @param input input array of hashes + * @param count number of hashes in the vector + * @return shingle value + */ +typedef guint64 (*rspamd_shingles_filter)(guint64 *input, gsize count, + gint shno, const guchar *key, gpointer ud); + +/** + * Generate shingles from the input of fixed size strings using lemmatizer + * if needed + * @param input array of `rspamd_fstring_t` + * @param key secret key used to generate shingles + * @param pool pool to allocate shingles array + * @param filter hashes filtering function + * @param filterd opaque data for filtering function + * @return shingles array + */ +struct rspamd_shingle *rspamd_shingles_from_text(GArray *input, + const guchar key[16], + rspamd_mempool_t *pool, + rspamd_shingles_filter filter, + gpointer filterd, + enum rspamd_shingle_alg alg); + +/** + * Generate shingles from the DCT matrix of an image + * @param dct discrete cosine transfor matrix (must be 64x64) + * @param key secret key used to generate shingles + * @param pool pool to allocate shingles array + * @param filter hashes filtering function + * @param filterd opaque data for filtering function + * @return shingles array + */ +struct rspamd_shingle *rspamd_shingles_from_image(guchar *dct, + const guchar key[16], + rspamd_mempool_t *pool, + rspamd_shingles_filter filter, + gpointer filterd, + enum rspamd_shingle_alg alg); + +/** + * Compares two shingles and return result as a floating point value - 1.0 + * for completely similar shingles and 0.0 for completely different ones + * @param a + * @param b + * @return + */ +gdouble rspamd_shingles_compare(const struct rspamd_shingle *a, + const struct rspamd_shingle *b); + +/** + * Default filtering function + */ +guint64 rspamd_shingles_default_filter(guint64 *input, gsize count, + gint shno, const guchar *key, gpointer ud); + +#ifdef __cplusplus +} +#endif + +#endif /* SHINGLES_H_ */ diff --git a/src/libutil/sqlite_utils.c b/src/libutil/sqlite_utils.c new file mode 100644 index 0000000..8aeb598 --- /dev/null +++ b/src/libutil/sqlite_utils.c @@ -0,0 +1,620 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "libserver/logger.h" +#include "libutil/sqlite_utils.h" +#include "unix-std.h" + + +static GQuark +rspamd_sqlite3_quark(void) +{ + return g_quark_from_static_string("rspamd-sqlite3"); +} + +GArray * +rspamd_sqlite3_init_prstmt(sqlite3 *db, + struct rspamd_sqlite3_prstmt *init_stmt, + gint max_idx, + GError **err) +{ + gint i; + GArray *res; + struct rspamd_sqlite3_prstmt *nst; + + res = g_array_sized_new(FALSE, TRUE, sizeof(struct rspamd_sqlite3_prstmt), + max_idx); + g_array_set_size(res, max_idx); + + for (i = 0; i < max_idx; i++) { + nst = &g_array_index(res, struct rspamd_sqlite3_prstmt, i); + memcpy(nst, &init_stmt[i], sizeof(*nst)); + + if (sqlite3_prepare_v2(db, init_stmt[i].sql, -1, + &nst->stmt, NULL) != SQLITE_OK) { + g_set_error(err, rspamd_sqlite3_quark(), + -1, "Cannot initialize prepared sql `%s`: %s", + nst->sql, sqlite3_errmsg(db)); + rspamd_sqlite3_close_prstmt(db, res); + + return NULL; + } + } + + return res; +} + +int rspamd_sqlite3_run_prstmt(rspamd_mempool_t *pool, sqlite3 *db, GArray *stmts, + gint idx, ...) +{ + gint retcode; + va_list ap; + sqlite3_stmt *stmt; + gint i, rowid, nargs, j; + gint64 len; + gpointer p; + struct rspamd_sqlite3_prstmt *nst; + const char *argtypes; + + if (idx < 0 || idx >= (gint) stmts->len) { + + return -1; + } + + nst = &g_array_index(stmts, struct rspamd_sqlite3_prstmt, idx); + stmt = nst->stmt; + + g_assert(nst != NULL); + + msg_debug_pool("executing `%s`", nst->sql); + argtypes = nst->args; + sqlite3_clear_bindings(stmt); + sqlite3_reset(stmt); + va_start(ap, idx); + nargs = 1; + + for (i = 0, rowid = 1; argtypes[i] != '\0'; i++) { + switch (argtypes[i]) { + case 'T': + + for (j = 0; j < nargs; j++, rowid++) { + sqlite3_bind_text(stmt, rowid, va_arg(ap, const char *), -1, + SQLITE_STATIC); + } + + nargs = 1; + break; + case 'V': + case 'B': + + for (j = 0; j < nargs; j++, rowid++) { + len = va_arg(ap, gint64); + sqlite3_bind_text(stmt, rowid, va_arg(ap, const char *), len, + SQLITE_STATIC); + } + + nargs = 1; + break; + case 'I': + + for (j = 0; j < nargs; j++, rowid++) { + sqlite3_bind_int64(stmt, rowid, va_arg(ap, gint64)); + } + + nargs = 1; + break; + case 'S': + + for (j = 0; j < nargs; j++, rowid++) { + sqlite3_bind_int(stmt, rowid, va_arg(ap, gint)); + } + + nargs = 1; + break; + case '*': + nargs = va_arg(ap, gint); + break; + } + } + + retcode = sqlite3_step(stmt); + + if (retcode == nst->result) { + argtypes = nst->ret; + + for (i = 0; argtypes != NULL && argtypes[i] != '\0'; i++) { + switch (argtypes[i]) { + case 'T': + *va_arg(ap, char **) = g_strdup(sqlite3_column_text(stmt, i)); + break; + case 'I': + *va_arg(ap, gint64 *) = sqlite3_column_int64(stmt, i); + break; + case 'S': + *va_arg(ap, int *) = sqlite3_column_int(stmt, i); + break; + case 'L': + *va_arg(ap, gint64 *) = sqlite3_last_insert_rowid(db); + break; + case 'B': + len = sqlite3_column_bytes(stmt, i); + g_assert(len >= 0); + p = g_malloc(len); + memcpy(p, sqlite3_column_blob(stmt, i), len); + *va_arg(ap, gint64 *) = len; + *va_arg(ap, gpointer *) = p; + break; + } + } + + if (!(nst->flags & RSPAMD_SQLITE3_STMT_MULTIPLE)) { + sqlite3_clear_bindings(stmt); + sqlite3_reset(stmt); + } + + va_end(ap); + + return SQLITE_OK; + } + else if (retcode != SQLITE_DONE && retcode != SQLITE_OK && retcode != SQLITE_ROW) { + msg_warn_pool("failed to execute query %s: %d, %s", nst->sql, + retcode, sqlite3_errmsg(db)); + } + + if (!(nst->flags & RSPAMD_SQLITE3_STMT_MULTIPLE)) { + sqlite3_clear_bindings(stmt); + sqlite3_reset(stmt); + } + + va_end(ap); + + return retcode; +} + +void rspamd_sqlite3_close_prstmt(sqlite3 *db, GArray *stmts) +{ + guint i; + struct rspamd_sqlite3_prstmt *nst; + + for (i = 0; i < stmts->len; i++) { + nst = &g_array_index(stmts, struct rspamd_sqlite3_prstmt, i); + if (nst->stmt != NULL) { + sqlite3_finalize(nst->stmt); + } + } + + g_array_free(stmts, TRUE); + + return; +} + +static gboolean +rspamd_sqlite3_wait(rspamd_mempool_t *pool, const gchar *lock) +{ + gint fd; + pid_t pid; + gssize r; + struct timespec sleep_ts = { + .tv_sec = 0, + .tv_nsec = 1000000}; + + while ((fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600)) == -1) { + if (errno != EBUSY && errno != EEXIST) { + msg_err_pool_check("cannot open lock file %s: %s", lock, + strerror(errno)); + + return FALSE; + } + + fd = open(lock, O_RDONLY); + + if (fd == -1) { + msg_err_pool_check("cannot open lock file %s: %s", lock, + strerror(errno)); + + return FALSE; + } + + r = read(fd, &pid, sizeof(pid)); + + if (r != sizeof(pid)) { + msg_warn_pool_check("stale lock file %s, removing", lock); + unlink(lock); + close(fd); + + return TRUE; + } + + /* Now check for process existence */ + if (pid == getpid()) { + msg_warn_pool_check("lock file %s, belongs to me, removing", lock); + unlink(lock); + close(fd); + + return TRUE; + } + else if (kill(pid, 0) == -1) { + if (errno == ESRCH) { + /* Process is already dead */ + msg_warn_pool_check("stale lock file %s from pid: %P, removing", + lock, pid); + unlink(lock); + close(fd); + + return TRUE; + } + } + + close(fd); + + if (nanosleep(&sleep_ts, NULL) == -1 && errno != EINTR) { + msg_err_pool_check("cannot sleep open lock file %s: %s", lock, + strerror(errno)); + + return FALSE; + } + } + + unlink(lock); + close(fd); + + return TRUE; +} + +#define RSPAMD_SQLITE_MMAP_LIMIT 268435456 +#define RSPAMD_SQLITE_CACHE_SIZE 262144 + +sqlite3 * +rspamd_sqlite3_open_or_create(rspamd_mempool_t *pool, const gchar *path, const gchar *create_sql, guint version, GError **err) +{ + sqlite3 *sqlite; + gint rc, flags, lock_fd; + gchar lock_path[PATH_MAX], dbdir[PATH_MAX], *pdir; + static const char sqlite_wal[] = + "PRAGMA journal_mode=\"wal\";" + "PRAGMA wal_autocheckpoint = 16;" + "PRAGMA journal_size_limit = 1536;", + exclusive_lock_sql[] = "PRAGMA locking_mode=\"exclusive\";", + + fsync_sql[] = "PRAGMA synchronous=\"NORMAL\";", + + foreign_keys[] = "PRAGMA foreign_keys=\"ON\";", + +#if defined(__LP64__) || defined(_LP64) + enable_mmap[] = "PRAGMA mmap_size=" G_STRINGIFY(RSPAMD_SQLITE_MMAP_LIMIT) ";", +#endif + + other_pragmas[] = "PRAGMA read_uncommitted=\"ON\";" + "PRAGMA cache_size=" G_STRINGIFY(RSPAMD_SQLITE_CACHE_SIZE) ";", + db_version[] = "PRAGMA user_version;"; + gboolean create = FALSE, has_lock = FALSE; + + flags = SQLITE_OPEN_READWRITE; +#ifdef SQLITE_OPEN_SHAREDCACHE + flags |= SQLITE_OPEN_SHAREDCACHE; +#endif +#ifdef SQLITE_OPEN_WAL + flags |= SQLITE_OPEN_WAL; +#endif + + rspamd_strlcpy(dbdir, path, sizeof(dbdir)); + pdir = dirname(dbdir); + + if (access(pdir, W_OK) == -1) { + g_set_error(err, rspamd_sqlite3_quark(), + errno, "cannot open sqlite directory %s: %s", + pdir, strerror(errno)); + + return NULL; + } + + rspamd_snprintf(lock_path, sizeof(lock_path), "%s.lock", path); + + if (access(path, R_OK) == -1) { + flags |= SQLITE_OPEN_CREATE; + create = TRUE; + } + + + rspamd_snprintf(lock_path, sizeof(lock_path), "%s.lock", path); + lock_fd = open(lock_path, O_WRONLY | O_CREAT | O_EXCL, 00600); + + if (lock_fd == -1) { + if (errno == EEXIST || errno == EBUSY) { + msg_debug_pool_check("checking %s to wait for db being initialized", lock_path); + + if (!rspamd_sqlite3_wait(pool, lock_path)) { + g_set_error(err, rspamd_sqlite3_quark(), + errno, "cannot create sqlite file %s: %s", + path, strerror(errno)); + + return NULL; + } + + + /* At this point we have database created */ + create = FALSE; + has_lock = FALSE; + } + else { + g_set_error(err, rspamd_sqlite3_quark(), + errno, "cannot lock sqlite file %s: %s", + path, strerror(errno)); + } + } + else { + pid_t myself = getpid(); + msg_debug_pool_check("locking %s to block other processes", lock_path); + (void) write(lock_fd, &myself, sizeof(myself)); + + g_assert(rspamd_file_lock(lock_fd, FALSE)); + has_lock = TRUE; + } + + if ((rc = sqlite3_open_v2(path, &sqlite, + flags, NULL)) != SQLITE_OK) { +#if SQLITE_VERSION_NUMBER >= 3008000 + g_set_error(err, rspamd_sqlite3_quark(), + rc, "cannot open sqlite db %s: %s", + path, sqlite3_errstr(rc)); +#else + g_set_error(err, rspamd_sqlite3_quark(), + rc, "cannot open sqlite db %s: %d", + path, rc); +#endif + + if (has_lock && lock_fd != -1) { + msg_debug_pool_check("removing lock from %s", lock_path); + rspamd_file_unlock(lock_fd, FALSE); + unlink(lock_path); + close(lock_fd); + } + + return NULL; + } + + if (create && has_lock) { + while ((rc = sqlite3_exec(sqlite, sqlite_wal, NULL, NULL, NULL)) != SQLITE_OK) { + if (rc == SQLITE_BUSY) { + struct timespec sleep_ts = { + .tv_sec = 0, + .tv_nsec = 1000000}; + + nanosleep(&sleep_ts, NULL); + + continue; + } + + msg_warn_pool_check("WAL mode is not supported (%s), locking issues might occur", + sqlite3_errmsg(sqlite)); + break; + } + + if (sqlite3_exec(sqlite, exclusive_lock_sql, NULL, NULL, NULL) != SQLITE_OK) { + msg_warn_pool_check("cannot exclusively lock database to create schema: %s", + sqlite3_errmsg(sqlite)); + } + + if (create_sql) { + while ((rc = sqlite3_exec(sqlite, create_sql, NULL, NULL, NULL)) != SQLITE_OK) { + if (rc == SQLITE_BUSY) { + struct timespec sleep_ts = { + .tv_sec = 0, + .tv_nsec = 1000000}; + + nanosleep(&sleep_ts, NULL); + + continue; + } + + g_set_error(err, rspamd_sqlite3_quark(), + -1, "cannot execute create sql `%s`: %s", + create_sql, sqlite3_errmsg(sqlite)); + sqlite3_close(sqlite); + rspamd_file_unlock(lock_fd, FALSE); + unlink(lock_path); + if (lock_fd != -1) { + close(lock_fd); + } + + return NULL; + } + } + + sqlite3_close(sqlite); + + /* Reopen in normal mode */ + msg_debug_pool_check("reopening %s in normal mode", path); + flags &= ~SQLITE_OPEN_CREATE; + + if ((rc = sqlite3_open_v2(path, &sqlite, + flags, NULL)) != SQLITE_OK) { +#if SQLITE_VERSION_NUMBER >= 3008000 + g_set_error(err, rspamd_sqlite3_quark(), + rc, "cannot open sqlite db after creation %s: %s", + path, sqlite3_errstr(rc)); +#else + g_set_error(err, rspamd_sqlite3_quark(), + rc, "cannot open sqlite db after creation %s: %d", + path, rc); +#endif + rspamd_file_unlock(lock_fd, FALSE); + unlink(lock_path); + + if (lock_fd != -1) { + close(lock_fd); + } + + return NULL; + } + } + else if (has_lock && version > 0) { + /* Check user version */ + sqlite3_stmt *stmt = NULL; + guint32 db_ver; + GString *new_ver_sql; + + if (sqlite3_prepare(sqlite, db_version, -1, &stmt, NULL) != SQLITE_OK) { + msg_warn_pool_check("Cannot get user version pragma: %s", + sqlite3_errmsg(sqlite)); + } + else { + if (sqlite3_step(stmt) != SQLITE_ROW) { + msg_warn_pool_check("Cannot get user version pragma, step failed: %s", + sqlite3_errmsg(sqlite)); + sqlite3_finalize(stmt); + } + else { + db_ver = sqlite3_column_int(stmt, 0); + sqlite3_reset(stmt); + sqlite3_finalize(stmt); + + if (version > db_ver) { + msg_warn_pool_check("Database version %ud is less than " + "desired version %ud, run create script", + db_ver, + version); + + if (create_sql) { + if (sqlite3_exec(sqlite, create_sql, NULL, NULL, NULL) != SQLITE_OK) { + g_set_error(err, rspamd_sqlite3_quark(), + -1, "cannot execute create sql `%s`: %s", + create_sql, sqlite3_errmsg(sqlite)); + sqlite3_close(sqlite); + rspamd_file_unlock(lock_fd, FALSE); + unlink(lock_path); + if (lock_fd != -1) { + close(lock_fd); + } + + return NULL; + } + } + + new_ver_sql = g_string_new("PRAGMA user_version="); + rspamd_printf_gstring(new_ver_sql, "%ud", version); + + if (sqlite3_exec(sqlite, new_ver_sql->str, NULL, NULL, NULL) != SQLITE_OK) { + g_set_error(err, rspamd_sqlite3_quark(), + -1, "cannot execute update version sql `%s`: %s", + new_ver_sql->str, sqlite3_errmsg(sqlite)); + sqlite3_close(sqlite); + rspamd_file_unlock(lock_fd, FALSE); + unlink(lock_path); + if (lock_fd != -1) { + close(lock_fd); + } + + g_string_free(new_ver_sql, TRUE); + + return NULL; + } + + g_string_free(new_ver_sql, TRUE); + } + else if (db_ver > version) { + msg_warn_pool_check("Database version %ud is more than " + "desired version %ud, this could cause" + " unexpected behaviour", + db_ver, + version); + } + } + } + } + + while ((rc = sqlite3_exec(sqlite, sqlite_wal, NULL, NULL, NULL)) != SQLITE_OK) { + if (rc == SQLITE_BUSY) { + struct timespec sleep_ts = { + .tv_sec = 0, + .tv_nsec = 1000000}; + + nanosleep(&sleep_ts, NULL); + + continue; + } + + msg_warn_pool_check("WAL mode is not supported (%s), locking issues might occur", + sqlite3_errmsg(sqlite)); + break; + } + + if (sqlite3_exec(sqlite, fsync_sql, NULL, NULL, NULL) != SQLITE_OK) { + msg_warn_pool_check("cannot set synchronous: %s", + sqlite3_errmsg(sqlite)); + } + + if ((rc = sqlite3_exec(sqlite, foreign_keys, NULL, NULL, NULL)) != + SQLITE_OK) { + msg_warn_pool_check("cannot enable foreign keys: %s", + sqlite3_errmsg(sqlite)); + } + +#if defined(__LP64__) || defined(_LP64) + if ((rc = sqlite3_exec(sqlite, enable_mmap, NULL, NULL, NULL)) != SQLITE_OK) { + msg_warn_pool_check("cannot enable mmap: %s", + sqlite3_errmsg(sqlite)); + } +#endif + + if ((rc = sqlite3_exec(sqlite, other_pragmas, NULL, NULL, NULL)) != + SQLITE_OK) { + msg_warn_pool_check("cannot execute tuning pragmas: %s", + sqlite3_errmsg(sqlite)); + } + + if (has_lock && lock_fd != -1) { + msg_debug_pool_check("removing lock from %s", lock_path); + rspamd_file_unlock(lock_fd, FALSE); + unlink(lock_path); + close(lock_fd); + } + + return sqlite; +} + +gboolean +rspamd_sqlite3_sync(sqlite3 *db, gint *wal_frames, gint *wal_checkpoints) +{ + gint wf = 0, wc = 0, mode; + +#ifdef SQLITE_OPEN_WAL +#ifdef SQLITE_CHECKPOINT_TRUNCATE + mode = SQLITE_CHECKPOINT_TRUNCATE; +#elif defined(SQLITE_CHECKPOINT_RESTART) + mode = SQLITE_CHECKPOINT_RESTART; +#elif defined(SQLITE_CHECKPOINT_FULL) + mode = SQLITE_CHECKPOINT_FULL; +#endif + /* Perform wal checkpoint (might be long) */ + if (sqlite3_wal_checkpoint_v2(db, + NULL, + mode, + &wf, + &wc) != SQLITE_OK) { + return FALSE; + } +#endif + + if (wal_frames) { + *wal_frames = wf; + } + if (wal_checkpoints) { + *wal_checkpoints = wc; + } + + return TRUE; +} diff --git a/src/libutil/sqlite_utils.h b/src/libutil/sqlite_utils.h new file mode 100644 index 0000000..5411a47 --- /dev/null +++ b/src/libutil/sqlite_utils.h @@ -0,0 +1,90 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_SQLITE_UTILS_H_ +#define SRC_LIBUTIL_SQLITE_UTILS_H_ + +#include "config.h" +#include "mem_pool.h" +#include "sqlite3.h" + +#define RSPAMD_SQLITE3_STMT_MULTIPLE (1 << 0) + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_sqlite3_prstmt { + gint idx; + const gchar *sql; + const gchar *args; + sqlite3_stmt *stmt; + gint result; + const gchar *ret; + gint flags; +}; + +/** + * Create prepared statements for specified database from init statements + * @param db + * @param max_idx + * @param err + * @return new prepared statements array or NULL + */ +GArray *rspamd_sqlite3_init_prstmt(sqlite3 *db, + struct rspamd_sqlite3_prstmt *init_stmt, + gint max_idx, + GError **err); + +/** + * Run prepared statements by its index getting parameters and setting results from + * varargs structure + * @param db + * @param stmts + * @param idx + * @return + */ +gint rspamd_sqlite3_run_prstmt(rspamd_mempool_t *pool, sqlite3 *db, GArray *stmts, + gint idx, ...); + +/** + * Close and free prepared statements + * @param db + * @param stmts + */ +void rspamd_sqlite3_close_prstmt(sqlite3 *db, GArray *stmts); + +/** + * Creates or opens sqlite database trying to share it between processes + * @param path + * @param create_sql + * @return + */ +sqlite3 *rspamd_sqlite3_open_or_create(rspamd_mempool_t *pool, + const gchar *path, const gchar *create_sql, + guint32 version, GError **err); + + +/** + * Sync sqlite3 db ensuring that all wal things are done + * @param db + */ +gboolean rspamd_sqlite3_sync(sqlite3 *db, gint *wal_frames, gint *wal_checkpoints); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_SQLITE_UTILS_H_ */ diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c new file mode 100644 index 0000000..bc99f2a --- /dev/null +++ b/src/libutil/str_util.c @@ -0,0 +1,3886 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "util.h" +#include "cryptobox.h" +#include "url.h" +#include "str_util.h" +#include "logger.h" +#include "contrib/t1ha/t1ha.h" +#include <unicode/uversion.h> +#include <unicode/ucnv.h> +#if U_ICU_VERSION_MAJOR_NUM >= 44 +#include <unicode/unorm2.h> +#endif +#include <math.h> + +#ifdef __x86_64__ +#include <immintrin.h> +#endif + +#include "contrib/fastutf8/fastutf8.h" + +const guchar lc_map[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}; + +guint rspamd_str_lc(gchar *str, guint size) +{ + guint leftover = size % 4; + guint fp, i; + const uint8_t *s = (const uint8_t *) str; + gchar *dest = str; + guchar c1, c2, c3, c4; + + fp = size - leftover; + + for (i = 0; i != fp; i += 4) { + c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; + dest[0] = lc_map[c1]; + dest[1] = lc_map[c2]; + dest[2] = lc_map[c3]; + dest[3] = lc_map[c4]; + dest += 4; + } + + switch (leftover) { + case 3: + *dest++ = lc_map[(guchar) str[i++]]; + /* FALLTHRU */ + case 2: + *dest++ = lc_map[(guchar) str[i++]]; + /* FALLTHRU */ + case 1: + *dest = lc_map[(guchar) str[i]]; + } + + return size; +} + +gsize rspamd_str_copy_lc(const gchar *src, gchar *dst, gsize size) +{ + gchar *d = dst; + + /* Find aligned start */ + while ((0xf & (uintptr_t) src) && size > 0) { + *d++ = lc_map[(guchar) *src++]; + size--; + } + + /* Aligned start in src */ +#ifdef __x86_64__ + while (size >= 16) { + __m128i sv = _mm_load_si128((const __m128i *) src); + /* From A */ + __m128i rangeshift = _mm_sub_epi8(sv, _mm_set1_epi8((char) ('A' + 128))); + /* To Z */ + __m128i nomodify = _mm_cmpgt_epi8(rangeshift, _mm_set1_epi8(-128 + 25)); + /* ^ ' ' */ + __m128i flip = _mm_andnot_si128(nomodify, _mm_set1_epi8(0x20)); + __m128i uc = _mm_xor_si128(sv, flip); + _mm_storeu_si128((__m128i *) d, uc); + d += 16; + src += 16; + size -= 16; + } +#endif + + /* Leftover */ + while (size > 0) { + *d++ = lc_map[(guchar) *src++]; + size--; + } + + return (d - dst); +} + +gint rspamd_lc_cmp(const gchar *s, const gchar *d, gsize l) +{ + gsize fp, i; + guchar c1, c2, c3, c4; + union { + guchar c[4]; + guint32 n; + } cmp1, cmp2; + gsize leftover = l % 4; + gint ret = 0; + + fp = l - leftover; + + for (i = 0; i != fp; i += 4) { + c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; + cmp1.c[0] = lc_map[c1]; + cmp1.c[1] = lc_map[c2]; + cmp1.c[2] = lc_map[c3]; + cmp1.c[3] = lc_map[c4]; + + c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; + cmp2.c[0] = lc_map[c1]; + cmp2.c[1] = lc_map[c2]; + cmp2.c[2] = lc_map[c3]; + cmp2.c[3] = lc_map[c4]; + + if (cmp1.n != cmp2.n) { + return cmp1.n - cmp2.n; + } + } + + while (leftover > 0) { + if (g_ascii_tolower(s[i]) != g_ascii_tolower(d[i])) { + return s[i] - d[i]; + } + + leftover--; + i++; + } + + return ret; +} + +/* + * The purpose of this function is fast and in place conversion of a unicode + * string to lower case, so some locale peculiarities are simply ignored + * If the target string is longer than initial one, then we just trim it + */ +guint rspamd_str_lc_utf8(gchar *str, guint size) +{ + guchar *d = (guchar *) str, tst[6]; + gint32 i = 0, prev = 0; + UChar32 uc; + + while (i < size) { + prev = i; + + U8_NEXT((guint8 *) str, i, size, uc); + uc = u_tolower(uc); + + gint32 olen = 0; + U8_APPEND_UNSAFE(tst, olen, uc); + + if (olen <= (i - prev)) { + memcpy(d, tst, olen); + d += olen; + } + else { + /* Lowercasing has increased the length, so we need to ignore it */ + d += i - prev; + } + } + + return d - (guchar *) str; +} + +gboolean +rspamd_strcase_equal(gconstpointer v, gconstpointer v2) +{ + if (g_ascii_strcasecmp((const gchar *) v, (const gchar *) v2) == 0) { + return TRUE; + } + + return FALSE; +} + +guint64 +rspamd_icase_hash(const gchar *in, gsize len, guint64 seed) +{ + guint leftover = len % sizeof(guint64); + guint fp, i; + const uint8_t *s = (const uint8_t *) in; + union { + struct { + guchar c1, c2, c3, c4, c5, c6, c7, c8; + } c; + guint64 pp; + } u; + guint64 h = seed; + + fp = len - leftover; + + for (i = 0; i != fp; i += 8) { + u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; + u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7]; + u.c.c1 = lc_map[u.c.c1]; + u.c.c2 = lc_map[u.c.c2]; + u.c.c3 = lc_map[u.c.c3]; + u.c.c4 = lc_map[u.c.c4]; + u.c.c5 = lc_map[u.c.c5]; + u.c.c6 = lc_map[u.c.c6]; + u.c.c7 = lc_map[u.c.c7]; + u.c.c8 = lc_map[u.c.c8]; + h = t1ha(&u.pp, sizeof(u), h); + } + + u.pp = 0; + + switch (leftover) { + case 7: + u.c.c7 = lc_map[(guchar) s[i++]]; /* FALLTHRU */ + case 6: + u.c.c6 = lc_map[(guchar) s[i++]]; /* FALLTHRU */ + case 5: + u.c.c5 = lc_map[(guchar) s[i++]]; /* FALLTHRU */ + case 4: + u.c.c4 = lc_map[(guchar) s[i++]]; /* FALLTHRU */ + case 3: + u.c.c3 = lc_map[(guchar) s[i++]]; /* FALLTHRU */ + case 2: + u.c.c2 = lc_map[(guchar) s[i++]]; /* FALLTHRU */ + case 1: + u.c.c1 = lc_map[(guchar) s[i]]; + break; + } + + h = t1ha(&u.pp, sizeof(u), h); + + return h; +} + +guint rspamd_strcase_hash(gconstpointer key) +{ + const gchar *p = key; + gsize len; + + len = strlen(p); + + return (guint) rspamd_icase_hash(p, len, rspamd_hash_seed()); +} + +guint rspamd_str_hash(gconstpointer key) +{ + gsize len; + + len = strlen((const gchar *) key); + + return (guint) rspamd_cryptobox_fast_hash(key, len, rspamd_hash_seed()); +} + +gboolean +rspamd_str_equal(gconstpointer v, gconstpointer v2) +{ + return strcmp((const gchar *) v, (const gchar *) v2) == 0; +} + +gboolean +rspamd_ftok_icase_equal(gconstpointer v, gconstpointer v2) +{ + const rspamd_ftok_t *f1 = v, *f2 = v2; + + if (f1->len == f2->len && + rspamd_lc_cmp(f1->begin, f2->begin, f1->len) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint rspamd_ftok_icase_hash(gconstpointer key) +{ + const rspamd_ftok_t *f = key; + + return (guint) rspamd_icase_hash(f->begin, f->len, rspamd_hash_seed()); +} + +gboolean +rspamd_gstring_icase_equal(gconstpointer v, gconstpointer v2) +{ + const GString *f1 = v, *f2 = v2; + if (f1->len == f2->len && + rspamd_lc_cmp(f1->str, f2->str, f1->len) == 0) { + return TRUE; + } + + return FALSE; +} + +guint rspamd_gstring_icase_hash(gconstpointer key) +{ + const GString *f = key; + + return (guint) rspamd_icase_hash(f->str, f->len, rspamd_hash_seed()); +} + +/* https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */ +#define MEM_ALIGN (sizeof(gsize) - 1) +#if defined(__LP64__) || defined(_LP64) +#define WORD_TYPE guint64 +#define ZEROMASK 0x7F7F7F7F7F7F7F7FLLU +#else +#define WORD_TYPE guint32 +#define ZEROMASK 0x7F7F7F7FU +#endif + +#define HASZERO(x) ~(((((x) &ZEROMASK) + ZEROMASK) | (x)) | ZEROMASK) + +gsize rspamd_strlcpy_fast(gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + WORD_TYPE *wd; + const WORD_TYPE *ws; + + /* Copy as many bytes as will fit */ + if (n-- != 0) { + if (((uintptr_t) s & MEM_ALIGN) == ((uintptr_t) d & MEM_ALIGN)) { + /* Init copy byte by byte */ + for (; ((uintptr_t) s & MEM_ALIGN) && n && (*d = *s); n--, s++, d++) + ; + if (n && *s) { + wd = (void *) d; + ws = (const void *) s; + /* + * Copy by 32 or 64 bits (causes valgrind warnings) + */ + for (; n >= sizeof(WORD_TYPE) && !HASZERO(*ws); + n -= sizeof(WORD_TYPE), ws++, wd++) { + *wd = *ws; + } + + d = (void *) wd; + s = (const void *) ws; + } + } + + /* Copy the rest */ + for (; n && (*d = *s); n--, s++, d++) + ; + + *d = 0; + } + else { + return 0; + } + + return (d - dst); +} + +gsize rspamd_null_safe_copy(const gchar *src, gsize srclen, + gchar *dest, gsize destlen) +{ + gsize copied = 0, si = 0, di = 0; + + if (destlen == 0) { + return 0; + } + + while (si < srclen && di + 1 < destlen) { + if (src[si] != '\0') { + dest[di++] = src[si++]; + copied++; + } + else { + si++; + } + } + + dest[di] = '\0'; + + return copied; +} + + +size_t +rspamd_strlcpy_safe(gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + gsize nleft = siz; + + if (nleft != 0) { + while (--nleft != 0) { + if ((*d++ = *src++) == '\0') { + d--; + break; + } + } + } + + if (nleft == 0) { + if (siz != 0) { + *d = '\0'; + } + } + + return (d - dst); +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtol(const gchar *s, gsize len, glong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + glong v = 0; + const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10; + gboolean neg; + + /* Case negative values */ + if (*p == '-') { + neg = TRUE; + p++; + } + else { + neg = FALSE; + } + /* Some preparations for range errors */ + + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && c > cutlim)) { + /* Range error */ + *value = neg ? G_MINLONG : G_MAXLONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p++; + } + + *value = neg ? -(v) : v; + return TRUE; +} + +/* + * Try to convert string of length to long + */ +#define CONV_STR_LIM_DECIMAL(max_num) \ + do { \ + while (p < end) { \ + c = *p; \ + if (c >= '0' && c <= '9') { \ + c -= '0'; \ + if (v > cutoff || (v == cutoff && (guint8) c > cutlim)) { \ + *value = (max_num); \ + return FALSE; \ + } \ + else { \ + v *= 10; \ + v += c; \ + } \ + } \ + else { \ + *value = v; \ + return FALSE; \ + } \ + p++; \ + } \ + } while (0) + +gboolean +rspamd_strtoul(const gchar *s, gsize len, gulong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + gulong v = 0; + const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; + + /* Some preparations for range errors */ + CONV_STR_LIM_DECIMAL(G_MAXULONG); + + *value = v; + return TRUE; +} + +gboolean +rspamd_strtou64(const gchar *s, gsize len, guint64 *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + guint64 v = 0; + const guint64 cutoff = G_MAXUINT64 / 10, cutlim = G_MAXUINT64 % 10; + + /* Some preparations for range errors */ + CONV_STR_LIM_DECIMAL(G_MAXUINT64); + + *value = v; + return TRUE; +} + +gboolean +rspamd_xstrtoul(const gchar *s, gsize len, gulong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + gulong v = 0; + const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; + + /* Some preparations for range errors */ + while (p < end) { + c = g_ascii_tolower(*p); + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && (guint8) c > cutlim)) { + /* Range error */ + *value = G_MAXULONG; + return FALSE; + } + else { + v *= 16; + v += c; + } + } + else if (c >= 'a' || c <= 'f') { + c = c - 'a' + 10; + if (v > cutoff || (v == cutoff && (guint8) c > cutlim)) { + /* Range error */ + *value = G_MAXULONG; + return FALSE; + } + else { + v *= 16; + v += c; + } + } + else { + *value = v; + + return FALSE; + } + p++; + } + + *value = v; + return TRUE; +} + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer +rspamd_str_pool_copy(gconstpointer data, gpointer ud) +{ + rspamd_mempool_t *pool = ud; + + return data ? rspamd_mempool_strdup(pool, data) : NULL; +} + +/* + * We use here z-base32 encoding described here: + * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt + */ + +gint rspamd_encode_base32_buf(const guchar *in, gsize inlen, gchar *out, gsize outlen, + enum rspamd_base32_type type) +{ + static const char b32_default[] = "ybndrfg8ejkmcpqxot1uwisza345h769", + b32_bleach[] = "qpzry9x8gf2tvdw0s3jn54khce6mua7l", + b32_rfc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", + *b32; + gchar *o, *end; + gsize i; + gint remain = -1, x; + bool inverse_order = true; + + end = out + outlen; + o = out; + + switch (type) { + case RSPAMD_BASE32_DEFAULT: + b32 = b32_default; + break; + case RSPAMD_BASE32_BLEACH: + b32 = b32_bleach; + inverse_order = false; + break; + case RSPAMD_BASE32_RFC: + b32 = b32_rfc; + inverse_order = false; + break; + default: + g_assert_not_reached(); + abort(); + } + + if (inverse_order) { + /* Zbase32 as used in Rspamd */ + for (i = 0; i < inlen && o < end - 1; i++) { + switch (i % 5) { + case 0: + /* 8 bits of input and 3 to remain */ + x = in[i]; + remain = in[i] >> 5; + *o++ = b32[x & 0x1F]; + break; + case 1: + /* 11 bits of input, 1 to remain */ + x = remain | in[i] << 3; + *o++ = b32[x & 0x1F]; + *o++ = b32[x >> 5 & 0x1F]; + remain = x >> 10; + break; + case 2: + /* 9 bits of input, 4 to remain */ + x = remain | in[i] << 1; + *o++ = b32[x & 0x1F]; + remain = x >> 5; + break; + case 3: + /* 12 bits of input, 2 to remain */ + x = remain | in[i] << 4; + *o++ = b32[x & 0x1F]; + *o++ = b32[x >> 5 & 0x1F]; + remain = x >> 10 & 0x3; + break; + case 4: + /* 10 bits of output, nothing to remain */ + x = remain | in[i] << 2; + *o++ = b32[x & 0x1F]; + *o++ = b32[x >> 5 & 0x1F]; + remain = -1; + break; + default: + /* Not to be happen */ + break; + } + } + } + else { + /* Traditional base32 with no bits inversion */ + for (i = 0; i < inlen && o < end - 1; i++) { + switch (i % 5) { + case 0: + /* 8 bits of input and 3 to remain */ + x = in[i] >> 3; + remain = (in[i] & 7) << 2; + *o++ = b32[x & 0x1F]; + break; + case 1: + /* 11 bits of input, 1 to remain */ + x = (remain << 6) | in[i]; + *o++ = b32[(x >> 6) & 0x1F]; + *o++ = b32[(x >> 1) & 0x1F]; + remain = (x & 0x1) << 4; + break; + case 2: + /* 9 bits of input, 4 to remain */ + x = (remain << 4) | in[i]; + *o++ = b32[(x >> 4) & 0x1F]; + remain = (x & 15) << 1; + break; + case 3: + /* 12 bits of input, 2 to remain */ + x = (remain << 7) | in[i]; + *o++ = b32[(x >> 7) & 0x1F]; + *o++ = b32[(x >> 2) & 0x1F]; + remain = (x & 3) << 3; + break; + case 4: + /* 10 bits of output, nothing to remain */ + x = (remain << 5) | in[i]; + *o++ = b32[(x >> 5) & 0x1F]; + *o++ = b32[x & 0x1F]; + remain = -1; + break; + default: + /* Not to be happen */ + break; + } + } + } + if (remain >= 0 && o < end) { + *o++ = b32[remain & 0x1F]; + } + + if (o <= end) { + return (o - out); + } + + return -1; +} + +gchar * +rspamd_encode_base32(const guchar *in, gsize inlen, enum rspamd_base32_type type) +{ + gsize allocated_len = inlen * 8 / 5 + 2; + gchar *out; + gint outlen; + + out = g_malloc(allocated_len); + outlen = rspamd_encode_base32_buf(in, inlen, out, + allocated_len - 1, type); + + if (outlen >= 0) { + out[outlen] = 0; + + return out; + } + + g_free(out); + + return NULL; +} + +enum rspamd_base32_type +rspamd_base32_decode_type_from_str(const gchar *str) +{ + enum rspamd_base32_type ret = RSPAMD_BASE32_INVALID; + + if (str == NULL) { + return RSPAMD_BASE32_DEFAULT; + } + + if (strcmp(str, "default") == 0 || strcmp(str, "zbase") == 0) { + ret = RSPAMD_BASE32_ZBASE; + } + else if (strcmp(str, "bleach") == 0) { + ret = RSPAMD_BASE32_BLEACH; + } + else if (strcmp(str, "rfc") == 0) { + ret = RSPAMD_BASE32_RFC; + } + + return ret; +} + +static const guchar b32_dec_zbase[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d, + 0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06, + 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10, + 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14, + 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const guchar b32_dec_bleach[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0f, 0xff, 0x0a, 0x11, 0x15, 0x14, 0x1a, 0x1e, + 0x07, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x1d, 0xff, 0x18, 0x0d, 0x19, 0x09, 0x08, + 0x17, 0xff, 0x12, 0x16, 0x1f, 0x1b, 0x13, 0xff, + 0x01, 0x00, 0x03, 0x10, 0x0b, 0x1c, 0x0c, 0x0e, + 0x06, 0x04, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const guchar b32_dec_rfc[] = { + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0x1a, + 0x1b, + 0x1c, + 0x1d, + 0x1e, + 0x1f, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0x00, + 0x01, + 0x02, + 0x03, + 0x04, + 0x05, + 0x06, + 0x07, + 0x08, + 0x09, + 0x0a, + 0x0b, + 0x0c, + 0x0d, + 0x0e, + 0x0f, + 0x10, + 0x11, + 0x12, + 0x13, + 0x14, + 0x15, + 0x16, + 0x17, + 0x18, + 0x19, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, +}; + + +gint rspamd_decode_base32_buf(const gchar *in, gsize inlen, guchar *out, gsize outlen, + enum rspamd_base32_type type) +{ + guchar *o, *end, decoded; + guchar c; + guint acc = 0U; + guint processed_bits = 0; + gsize i; + const guchar *b32_dec; + bool inverse_bits = true; + + end = out + outlen; + o = out; + + switch (type) { + case RSPAMD_BASE32_DEFAULT: + b32_dec = b32_dec_zbase; + break; + case RSPAMD_BASE32_BLEACH: + b32_dec = b32_dec_bleach; + inverse_bits = false; + break; + case RSPAMD_BASE32_RFC: + b32_dec = b32_dec_rfc; + inverse_bits = false; + break; + default: + g_assert_not_reached(); + abort(); + } + + if (inverse_bits) { + for (i = 0; i < inlen; i++) { + c = (guchar) in[i]; + + if (processed_bits >= 8) { + /* Emit from left to right */ + processed_bits -= 8; + *o++ = acc & 0xFF; + acc >>= 8; + } + + decoded = b32_dec[c]; + if (decoded == 0xff || o >= end) { + return -1; + } + + acc = (decoded << processed_bits) | acc; + processed_bits += 5; + } + + if (processed_bits > 0 && o < end) { + *o++ = (acc & 0xFF); + } + else if (o > end) { + return -1; + } + } + else { + for (i = 0; i < inlen; i++) { + c = (guchar) in[i]; + + decoded = b32_dec[c]; + if (decoded == 0xff) { + return -1; + } + + acc = (acc << 5) | decoded; + processed_bits += 5; + + if (processed_bits >= 8) { + /* Emit from right to left */ + processed_bits -= 8; + + /* Output buffer overflow */ + if (o >= end) { + return -1; + } + + *o++ = (acc >> processed_bits) & 0xFF; + /* Preserve lowers at the higher parts of the input */ + acc = (acc & ((1u << processed_bits) - 1)); + } + } + + if (processed_bits > 0 && o < end && acc != 0) { + *o++ = (acc & 0xFF); + } + else if (o > end) { + return -1; + } + } + + return (o - out); +} + +guchar * +rspamd_decode_base32(const gchar *in, gsize inlen, gsize *outlen, + enum rspamd_base32_type type) +{ + guchar *res; + + gsize allocated_len = inlen * 5 / 8 + 2; + gssize olen; + + res = g_malloc(allocated_len); + + olen = rspamd_decode_base32_buf(in, inlen, res, allocated_len - 1, + type); + + if (olen >= 0) { + res[olen] = '\0'; + } + else { + g_free(res); + + if (outlen) { + *outlen = 0; + } + + return NULL; + } + + if (outlen) { + *outlen = olen; + } + + return res; +} + + +gchar * +rspamd_encode_base64_common(const guchar *in, gsize inlen, gint str_len, + gsize *outlen, gboolean fold, enum rspamd_newlines_type how) +{ +#define ADD_SPLIT \ + do { \ + if (how == RSPAMD_TASK_NEWLINES_CR || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\r'; \ + if (how == RSPAMD_TASK_NEWLINES_LF || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\n'; \ + if (fold) *o++ = '\t'; \ + } while (0) +#define CHECK_SPLIT \ + do { \ + if (str_len > 0 && cols >= str_len) { \ + ADD_SPLIT; \ + cols = 0; \ + } \ + } while (0) + + gsize allocated_len = (inlen / 3) * 4 + 5; + gchar *out, *o; + guint64 n; + guint32 rem, t, carry; + gint cols, shift; + static const char b64_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + if (str_len > 0) { + g_assert(str_len > 8); + if (fold) { + switch (how) { + case RSPAMD_TASK_NEWLINES_CR: + case RSPAMD_TASK_NEWLINES_LF: + allocated_len += (allocated_len / str_len + 1) * 2 + 1; + break; + default: + allocated_len += (allocated_len / str_len + 1) * 3 + 1; + break; + } + } + else { + switch (how) { + case RSPAMD_TASK_NEWLINES_CR: + case RSPAMD_TASK_NEWLINES_LF: + allocated_len += (allocated_len / str_len + 1) * 1 + 1; + break; + default: + allocated_len += (allocated_len / str_len + 1) * 2 + 1; + break; + } + } + } + + out = g_malloc(allocated_len); + o = out; + cols = 0; + + while (inlen > 6) { + memcpy(&n, in, sizeof(n)); + n = GUINT64_TO_BE(n); + + if (str_len <= 0 || cols <= str_len - 8) { + *o++ = b64_enc[(n >> 58) & 0x3F]; + *o++ = b64_enc[(n >> 52) & 0x3F]; + *o++ = b64_enc[(n >> 46) & 0x3F]; + *o++ = b64_enc[(n >> 40) & 0x3F]; + *o++ = b64_enc[(n >> 34) & 0x3F]; + *o++ = b64_enc[(n >> 28) & 0x3F]; + *o++ = b64_enc[(n >> 22) & 0x3F]; + *o++ = b64_enc[(n >> 16) & 0x3F]; + cols += 8; + } + else { + cols = str_len - cols; + shift = 58; + while (cols) { + *o++ = b64_enc[(n >> shift) & 0x3F]; + shift -= 6; + cols--; + } + + ADD_SPLIT; + + /* Remaining bytes */ + while (shift >= 16) { + *o++ = b64_enc[(n >> shift) & 0x3F]; + shift -= 6; + cols++; + } + } + + in += 6; + inlen -= 6; + } + + CHECK_SPLIT; + + rem = 0; + carry = 0; + + for (;;) { + /* Padding + remaining data (0 - 2 bytes) */ + switch (rem) { + case 0: + if (inlen-- == 0) { + goto end; + } + t = *in++; + *o++ = b64_enc[t >> 2]; + carry = (t << 4) & 0x30; + rem = 1; + cols++; + case 1: + if (inlen-- == 0) { + goto end; + } + CHECK_SPLIT; + t = *in++; + *o++ = b64_enc[carry | (t >> 4)]; + carry = (t << 2) & 0x3C; + rem = 2; + cols++; + default: + if (inlen-- == 0) { + goto end; + } + CHECK_SPLIT; + t = *in++; + *o++ = b64_enc[carry | (t >> 6)]; + cols++; + CHECK_SPLIT; + *o++ = b64_enc[t & 0x3F]; + cols++; + CHECK_SPLIT; + rem = 0; + } + } + +end: + if (rem == 1) { + *o++ = b64_enc[carry]; + cols++; + CHECK_SPLIT; + *o++ = '='; + cols++; + CHECK_SPLIT; + *o++ = '='; + cols++; + CHECK_SPLIT; + } + else if (rem == 2) { + *o++ = b64_enc[carry]; + cols++; + CHECK_SPLIT; + *o++ = '='; + cols++; + } + + CHECK_SPLIT; + + *o = '\0'; + + if (outlen != NULL) { + *outlen = o - out; + } + + return out; +} + +gchar * +rspamd_encode_base64(const guchar *in, gsize inlen, gint str_len, + gsize *outlen) +{ + return rspamd_encode_base64_common(in, inlen, str_len, outlen, FALSE, + RSPAMD_TASK_NEWLINES_CRLF); +} + +gchar * +rspamd_encode_base64_fold(const guchar *in, gsize inlen, gint str_len, + gsize *outlen, enum rspamd_newlines_type how) +{ + return rspamd_encode_base64_common(in, inlen, str_len, outlen, TRUE, how); +} + +#define QP_RANGE(x) (((x) >= 33 && (x) <= 60) || ((x) >= 62 && (x) <= 126) || (x) == '\r' || (x) == '\n' || (x) == ' ' || (x) == '\t') +#define QP_SPAN_NORMAL(span, str_len) ((str_len) > 0 && \ + ((span) + 1) >= (str_len)) +#define QP_SPAN_SPECIAL(span, str_len) ((str_len) > 0 && \ + ((span) + 4) >= (str_len)) + +gchar * +rspamd_encode_qp_fold(const guchar *in, gsize inlen, gint str_len, + gsize *outlen, enum rspamd_newlines_type how) +{ + gsize olen = 0, span = 0, i = 0, seen_spaces = 0; + gchar *out; + gint ch, last_sp; + const guchar *end = in + inlen, *p = in; + static const gchar hexdigests[16] = "0123456789ABCDEF"; + + while (p < end) { + ch = *p; + + if (QP_RANGE(ch)) { + olen++; + span++; + + if (ch == '\r' || ch == '\n') { + if (seen_spaces > 0) { + /* We must encode spaces at the end of line */ + olen += 3; + seen_spaces = 0; + /* Special stuff for space character at the end */ + if (QP_SPAN_SPECIAL(span, str_len)) { + if (how == RSPAMD_TASK_NEWLINES_CRLF) { + /* =\r\n */ + olen += 3; + } + else { + olen += 2; + } + } + /* Continue with the same `ch` but without spaces logic */ + continue; + } + + span = 0; + } + else if (ch == ' ' || ch == '\t') { + seen_spaces++; + last_sp = ch; + } + else { + seen_spaces = 0; + } + } + else { + if (QP_SPAN_SPECIAL(span, str_len)) { + if (how == RSPAMD_TASK_NEWLINES_CRLF) { + /* =\r\n */ + olen += 3; + } + else { + olen += 2; + } + span = 0; + } + + olen += 3; + span += 3; + } + + if (QP_SPAN_NORMAL(span, str_len)) { + if (how == RSPAMD_TASK_NEWLINES_CRLF) { + /* =\r\n */ + olen += 3; + } + else { + olen += 2; + } + span = 0; + } + + p++; + } + + if (seen_spaces > 0) { + /* Reserve length for the last space encoded */ + olen += 3; + } + + out = g_malloc(olen + 1); + p = in; + i = 0; + span = 0; + seen_spaces = 0; + + while (p < end) { + ch = *p; + + if (QP_RANGE(ch)) { + if (ch == '\r' || ch == '\n') { + if (seen_spaces > 0) { + if (QP_SPAN_SPECIAL(span, str_len)) { + /* Add soft newline */ + i--; + + if (p + 1 < end || span + 3 >= str_len) { + switch (how) { + default: + case RSPAMD_TASK_NEWLINES_CRLF: + out[i++] = '='; + out[i++] = '\r'; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_LF: + out[i++] = '='; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_CR: + out[i++] = '='; + out[i++] = '\r'; + break; + } + } + + /* Now write encoded `last_sp` but after newline */ + out[i++] = '='; + out[i++] = hexdigests[((last_sp >> 4) & 0xF)]; + out[i++] = hexdigests[(last_sp & 0xF)]; + + span = 0; + } + else { + /* Encode last space */ + --i; + out[i++] = '='; + out[i++] = hexdigests[((last_sp >> 4) & 0xF)]; + out[i++] = hexdigests[(last_sp & 0xF)]; + seen_spaces = 0; + } + + continue; + } + span = 0; + } + else if (ch == ' ' || ch == '\t') { + seen_spaces++; + last_sp = ch; + span++; + } + else { + seen_spaces = 0; + span++; + } + + out[i++] = ch; + } + else { + if (QP_SPAN_SPECIAL(span, str_len)) { + /* Add new line and then continue */ + if (p + 1 < end || span + 3 >= str_len) { + switch (how) { + default: + case RSPAMD_TASK_NEWLINES_CRLF: + out[i++] = '='; + out[i++] = '\r'; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_LF: + out[i++] = '='; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_CR: + out[i++] = '='; + out[i++] = '\r'; + break; + } + span = 0; + } + } + + out[i++] = '='; + out[i++] = hexdigests[((ch >> 4) & 0xF)]; + out[i++] = hexdigests[(ch & 0xF)]; + span += 3; + seen_spaces = 0; + } + + if (QP_SPAN_NORMAL(span, str_len)) { + /* Add new line and then continue */ + if (p + 1 < end || span > str_len || seen_spaces) { + switch (how) { + default: + case RSPAMD_TASK_NEWLINES_CRLF: + out[i++] = '='; + out[i++] = '\r'; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_LF: + out[i++] = '='; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_CR: + out[i++] = '='; + out[i++] = '\r'; + break; + } + span = 0; + seen_spaces = 0; + } + } + + g_assert(i <= olen); + p++; + } + + /* Deal with the last space character */ + if (seen_spaces > 0) { + i--; + out[i++] = '='; + out[i++] = hexdigests[((last_sp >> 4) & 0xF)]; + out[i++] = hexdigests[(last_sp & 0xF)]; + } + + out[i] = '\0'; + + if (outlen) { + *outlen = i; + } + + return out; +} + +#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) + +gint rspamd_strings_levenshtein_distance(const gchar *s1, gsize s1len, + const gchar *s2, gsize s2len, + guint replace_cost) +{ + gchar c1, c2, last_c2, last_c1; + static GArray *current_row = NULL, *prev_row = NULL, *transp_row = NULL; + gint eq; + static const guint max_cmp = 8192; + gint ret; + + g_assert(s1 != NULL); + g_assert(s2 != NULL); + + if (s1len == 0) { + s1len = strlen(s1); + } + if (s2len == 0) { + s2len = strlen(s2); + } + + if (MAX(s1len, s2len) > max_cmp) { + /* Cannot compare too many characters */ + return max_cmp; + } + + if (s1len > s2len) { + /* Exchange s1 and s2 */ + const gchar *tmp; + gsize tmplen; + + tmp = s2; + s2 = s1; + s1 = tmp; + + tmplen = s2len; + s2len = s1len; + s1len = tmplen; + } + + /* Adjust static space */ + if (current_row == NULL) { + current_row = g_array_sized_new(FALSE, FALSE, sizeof(gint), s1len + 1); + prev_row = g_array_sized_new(FALSE, FALSE, sizeof(gint), s1len + 1); + transp_row = g_array_sized_new(FALSE, FALSE, sizeof(gint), s1len + 1); + g_array_set_size(current_row, s1len + 1); + g_array_set_size(prev_row, s1len + 1); + g_array_set_size(transp_row, s1len + 1); + } + else if (current_row->len < s1len + 1) { + g_array_set_size(current_row, s1len + 1); + g_array_set_size(prev_row, s1len + 1); + g_array_set_size(transp_row, s1len + 1); + } + + memset(current_row->data, 0, (s1len + 1) * sizeof(gint)); + memset(transp_row->data, 0, (s1len + 1) * sizeof(gint)); + + for (gint i = 0; i <= s1len; i++) { + g_array_index(prev_row, gint, i) = i; + } + + last_c2 = '\0'; + + for (gint i = 1; i <= s2len; i++) { + c2 = s2[i - 1]; + g_array_index(current_row, gint, 0) = i; + last_c1 = '\0'; + + for (gint j = 1; j <= s1len; j++) { + c1 = s1[j - 1]; + eq = c1 == c2 ? 0 : replace_cost; + ret = MIN3(g_array_index(current_row, gint, j - 1) + 1, /* Insert */ + g_array_index(prev_row, gint, j) + 1, /* Remove */ + g_array_index(prev_row, gint, j - 1) + eq /* Replace */); + + /* Take reordering into account */ + if (c1 == last_c2 && c2 == last_c1 && j >= 2) { + ret = MIN(ret, g_array_index(transp_row, gint, j - 2) + eq); + } + + g_array_index(current_row, gint, j) = ret; + last_c1 = c1; + } + + last_c2 = c2; + + /* Exchange pointers */ + GArray *tmp; + tmp = transp_row; + transp_row = prev_row; + prev_row = current_row; + current_row = tmp; + } + + ret = g_array_index(prev_row, gint, s1len); + + return ret; +} + +GString * +rspamd_header_value_fold(const gchar *name, gsize name_len, + const gchar *value, + gsize value_len, + guint fold_max, + enum rspamd_newlines_type how, + const gchar *fold_on_chars) +{ + GString *res; + const guint default_fold_max = 76; + guint cur_len; + const gchar *p, *c, *end, *fold_sequence; + guint nspaces = 0; + gboolean first_token = TRUE; + enum { + fold_before = 0, + fold_after + } fold_type = fold_before; + enum { + read_token = 0, + read_quoted, + after_quote, + fold_token, + } state = read_token, + next_state = read_token; + + g_assert(name != NULL); + g_assert(value != NULL); + + /* Filter insane values */ + if (fold_max < 20) { + fold_max = default_fold_max; + } + + switch (how) { + case RSPAMD_TASK_NEWLINES_LF: + fold_sequence = "\n\t"; + break; + case RSPAMD_TASK_NEWLINES_CR: + fold_sequence = "\r\t"; + break; + case RSPAMD_TASK_NEWLINES_CRLF: + default: + fold_sequence = "\r\n\t"; + break; + } + + res = g_string_sized_new(value_len); + + c = value; + p = c; + end = value + value_len; + /* name:<WSP> */ + cur_len = name_len + 2; + + while (p < end) { + switch (state) { + + case read_token: + if (fold_on_chars) { + if (strchr(fold_on_chars, *p) != NULL) { + fold_type = fold_after; + state = fold_token; + next_state = read_token; + } + + p++; + } + else { + if (*p == ',' || *p == ';') { + /* We have something similar to the token's end, so check len */ + if (cur_len > fold_max * 0.8 && cur_len < fold_max) { + /* We want fold */ + fold_type = fold_after; + state = fold_token; + next_state = read_token; + } + else if (cur_len > fold_max && !first_token) { + fold_type = fold_before; + state = fold_token; + next_state = read_token; + } + else { + g_string_append_len(res, c, p - c + 1); + c = p + 1; + first_token = FALSE; + } + p++; + } + else if (*p == '"') { + /* Fold before quoted tokens */ + g_string_append_len(res, c, p - c); + c = p; + state = read_quoted; + } + else if (*p == '\r' || *p == '\n') { + if (cur_len > fold_max && !first_token) { + fold_type = fold_before; + state = fold_token; + next_state = read_token; + } + else { + /* We need to ensure that it is a folding and not something else */ + + const char *t = p; + bool seen_fold = false; + + while (t < end) { + if (*t == ' ' || *t == '\t') { + seen_fold = true; + break; + } + else if (!g_ascii_isspace(*t)) { + break; + } + + t++; + } + + if (seen_fold) { + /* Reset line length */ + cur_len = 0; + + while (g_ascii_isspace(*p)) { + p++; + } + + g_string_append_len(res, c, p - c); + c = p; + first_token = TRUE; + } + else { + /* Not seen folding, inject it */ + g_string_append_len(res, c, p - c); + g_string_append(res, fold_sequence); + p = t; /* Adjust p to ensure that we do not append extra stuff */ + state = read_token; + first_token = TRUE; + c = p; + } + } + } + else if (g_ascii_isspace(*p)) { + if (cur_len > fold_max * 0.8 && cur_len < fold_max) { + /* We want fold */ + fold_type = fold_after; + state = fold_token; + next_state = read_token; + } + else if (cur_len > fold_max && !first_token) { + fold_type = fold_before; + state = fold_token; + next_state = read_token; + } + else { + g_string_append_len(res, c, p - c); + c = p; + first_token = FALSE; + p++; + cur_len++; + } + } + else { + p++; + cur_len++; + } + } + break; + case fold_token: + /* Here, we have token start at 'c' and token end at 'p' */ + if (fold_type == fold_after) { + nspaces = 0; + if (p > c) { + g_string_append_len(res, c, p - c); + + /* + * Check any spaces that are appended to the result + * before folding + */ + const gchar *last = &res->str[res->len - 1]; + + while (g_ascii_isspace(*last)) { + last--; + nspaces++; + res->len--; + } + } + + g_string_append(res, fold_sequence); + + /* Skip space if needed */ + if (g_ascii_isspace(*p)) { + p++; + } + + /* Move leftover spaces */ + while (nspaces) { + g_string_append_c(res, ' '); + nspaces--; + } + + cur_len = 0; + } + else { + const gchar *last; + + /* Skip space if needed */ + if (g_ascii_isspace(*c) && p > c) { + c++; + } + + /* Avoid double folding */ + last = &res->str[res->len - 1]; + last--; + + if (*last != '\r' && *last != '\n') { + last++; + while (g_ascii_isspace(*last)) { + last--; + nspaces++; + res->len--; + } + + g_string_append(res, fold_sequence); + } + + /* Move leftover spaces */ + cur_len = nspaces; + + while (nspaces) { + g_string_append_c(res, ' '); + nspaces--; + } + + if (p > c) { + g_string_append_len(res, c, p - c); + cur_len += p - c; + } + else { + cur_len = 0; + } + } + + first_token = TRUE; + c = p; + state = next_state; + break; + + case read_quoted: + if (p != c && *p == '"') { + state = after_quote; + } + p++; + cur_len++; + break; + + case after_quote: + state = read_token; + /* Skip one more character after the quote */ + p++; + cur_len++; + g_string_append_len(res, c, p - c); + c = p; + first_token = TRUE; + break; + } + } + + /* Last token */ + switch (state) { + case read_token: + if (!fold_on_chars && cur_len > fold_max && !first_token) { + if (g_ascii_isspace(*c)) { + c++; + } + g_string_append(res, fold_sequence); + g_string_append_len(res, c, p - c); + } + else { + g_string_append_len(res, c, p - c); + } + break; + case read_quoted: + case after_quote: + g_string_append_len(res, c, p - c); + break; + case fold_token: + /* Here, we have token start at 'c' and token end at 'p' */ + if (g_ascii_isspace(res->str[res->len - 1])) { + g_string_append_len(res, c, p - c); + } + else { + if (*c != '\r' && *c != '\n') { + /* We need to add folding as well */ + g_string_append(res, fold_sequence); + g_string_append_len(res, c, p - c); + } + else { + g_string_append_len(res, c, p - c); + } + } + break; + default: + g_assert(p == c); + break; + } + + return res; +} + +static inline bool rspamd_substring_cmp_func(guchar a, guchar b) +{ + return a == b; +} + +static inline bool rspamd_substring_casecmp_func(guchar a, guchar b) +{ + return lc_map[a] == lc_map[b]; +} + +typedef bool (*rspamd_cmpchar_func_t)(guchar a, guchar b); + +static inline void +rspamd_substring_preprocess_kmp(const gchar *pat, gsize len, goffset *fsm, + rspamd_cmpchar_func_t f) +{ + goffset i, j; + + i = 0; + j = -1; + fsm[0] = -1; + + while (i < len) { + while (j > -1 && !f(pat[i], pat[j])) { + j = fsm[j]; + } + + i++; + j++; + + if (i < len && j < len && f(pat[i], pat[j])) { + fsm[i] = fsm[j]; + } + else { + fsm[i] = j; + } + } +} + +static inline goffset +rspamd_substring_search_preprocessed(const gchar *in, gsize inlen, + const gchar *srch, + gsize srchlen, + const goffset *fsm, + rspamd_cmpchar_func_t f) +{ + goffset i, j, k, ell; + + for (ell = 1; f(srch[ell - 1], srch[ell]); ell++) {} + if (ell == srchlen) { + ell = 0; + } + + /* Searching */ + i = ell; + j = k = 0; + + while (j <= inlen - srchlen) { + while (i < srchlen && f(srch[i], in[i + j])) { + ++i; + } + + if (i >= srchlen) { + while (k < ell && f(srch[k], in[j + k])) { + ++k; + } + + if (k >= ell) { + return j; + } + } + + j += (i - fsm[i]); + + if (i == ell) { + k = MAX(0, k - 1); + } + else { + if (fsm[i] <= ell) { + k = MAX(0, fsm[i]); + i = ell; + } + else { + k = ell; + i = fsm[i]; + } + } + } + + return -1; +} + +static inline goffset +rspamd_substring_search_common(const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen, rspamd_cmpchar_func_t f) +{ + static goffset st_fsm[128]; + goffset *fsm, ret; + + if (G_LIKELY(srchlen < G_N_ELEMENTS(st_fsm))) { + fsm = st_fsm; + } + else { + fsm = g_malloc((srchlen + 1) * sizeof(*fsm)); + } + + rspamd_substring_preprocess_kmp(srch, srchlen, fsm, f); + ret = rspamd_substring_search_preprocessed(in, inlen, srch, srchlen, fsm, f); + + if (G_UNLIKELY(srchlen >= G_N_ELEMENTS(st_fsm))) { + g_free(fsm); + } + + return ret; +} + +goffset +rspamd_substring_search(const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen) +{ + if (inlen > srchlen) { + if (G_UNLIKELY(srchlen == 1)) { + const gchar *p; + + p = memchr(in, srch[0], inlen); + + if (p) { + return p - in; + } + + return (-1); + } + else if (G_UNLIKELY(srchlen == 0)) { + return 0; + } + + return rspamd_substring_search_common(in, inlen, srch, srchlen, + rspamd_substring_cmp_func); + } + else if (inlen == srchlen) { + return (rspamd_lc_cmp(srch, in, srchlen) == 0 ? 0 : -1); + } + else { + return (-1); + } + + return (-1); +} + +goffset +rspamd_substring_search_caseless(const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen) +{ + if (inlen > srchlen) { + if (G_UNLIKELY(srchlen == 1)) { + goffset i; + gchar s = lc_map[(guchar) srch[0]]; + + for (i = 0; i < inlen; i++) { + if (lc_map[(guchar) in[i]] == s) { + return i; + } + } + + return (-1); + } + + return rspamd_substring_search_common(in, inlen, srch, srchlen, + rspamd_substring_casecmp_func); + } + else if (inlen == srchlen) { + return rspamd_lc_cmp(srch, in, srchlen) == 0 ? 0 : (-1); + } + + return (-1); +} + +goffset +rspamd_string_find_eoh(GString *input, goffset *body_start) +{ + const gchar *p, *c = NULL, *end; + enum { + skip_char = 0, + got_cr, + got_lf, + got_linebreak, + got_linebreak_cr, + got_linebreak_lf, + obs_fws + } state = skip_char; + + g_assert(input != NULL); + + p = input->str; + end = p + input->len; + + while (p < end) { + switch (state) { + case skip_char: + if (*p == '\r') { + p++; + state = got_cr; + } + else if (*p == '\n') { + p++; + state = got_lf; + } + else { + p++; + } + break; + + case got_cr: + if (*p == '\r') { + /* + * Double \r\r, so need to check the current char + * if it is '\n', then we have \r\r\n sequence, that is NOT + * double end of line + */ + if (p < end && p[1] == '\n') { + p++; + state = got_lf; + } + else { + /* We have \r\r[^\n] */ + if (body_start) { + *body_start = p - input->str + 1; + } + + return p - input->str; + } + } + else if (*p == '\n') { + p++; + state = got_lf; + } + else if (g_ascii_isspace(*p)) { + /* We have \r<space>*, allow to stay in this state */ + c = p; + p++; + state = obs_fws; + } + else { + p++; + state = skip_char; + } + break; + case got_lf: + if (*p == '\n') { + /* We have \n\n, which is obviously end of headers */ + if (body_start) { + *body_start = p - input->str + 1; + } + return p - input->str; + } + else if (*p == '\r') { + state = got_linebreak; + } + else if (g_ascii_isspace(*p)) { + /* We have \n<space>*, allow to stay in this state */ + c = p; + p++; + state = obs_fws; + } + else { + p++; + state = skip_char; + } + break; + case got_linebreak: + if (*p == '\r') { + c = p; + p++; + state = got_linebreak_cr; + } + else if (*p == '\n') { + c = p; + p++; + state = got_linebreak_lf; + } + else if (g_ascii_isspace(*p)) { + /* We have <linebreak><space>*, allow to stay in this state */ + c = p; + p++; + state = obs_fws; + } + else { + p++; + state = skip_char; + } + break; + case got_linebreak_cr: + if (*p == '\r') { + /* Got double \r\r after \n, so does not treat it as EOH */ + state = got_linebreak_cr; + p++; + } + else if (*p == '\n') { + state = got_linebreak_lf; + p++; + } + else if (g_ascii_isspace(*p)) { + /* We have \r\n<space>*, allow to keep in this state */ + c = p; + state = obs_fws; + p++; + } + else { + p++; + state = skip_char; + } + break; + case got_linebreak_lf: + g_assert(c != NULL); + if (body_start) { + /* \r\n\r\n */ + *body_start = p - input->str; + } + + return c - input->str; + case obs_fws: + if (*p == ' ' || *p == '\t') { + p++; + } + else if (*p == '\r') { + /* Perform lookahead due to #2349 */ + if (end - p > 2) { + if (p[1] == '\n' && g_ascii_isspace(p[2])) { + /* Real obs_fws state, switch */ + c = p; + p++; + state = got_cr; + } + else if (g_ascii_isspace(p[1])) { + p++; + state = obs_fws; + } + else { + /* + * <nline> <wsp>+ \r <nwsp>. + * It is an empty header likely, so we can go further... + * https://tools.ietf.org/html/rfc2822#section-4.2 + */ + c = p; + p++; + state = got_cr; + } + } + else { + /* shortage */ + if (body_start) { + *body_start = p - input->str + 1; + } + + return p - input->str; + } + } + else if (*p == '\n') { + /* Perform lookahead due to #2349 */ + if (end - p > 1) { + /* Continue folding with an empty line */ + if (p[1] == ' ' || p[1] == '\t') { + c = p; + p++; + state = obs_fws; + } + else if (p[1] == '\r') { + /* WTF state: we have seen spaces, \n and then it follows \r */ + c = p; + p++; + state = got_lf; + } + else if (p[1] == '\n') { + /* + * Switching to got_lf state here will let us to finish + * the cycle. + */ + c = p; + p++; + state = got_lf; + } + else { + /* + * <nline> <wsp>+ \n <nwsp>. + * It is an empty header likely, so we can go further... + * https://tools.ietf.org/html/rfc2822#section-4.2 + */ + c = p; + p++; + state = got_lf; + } + } + else { + /* shortage */ + if (body_start) { + *body_start = p - input->str + 1; + } + + return p - input->str; + } + } + else { + p++; + state = skip_char; + } + break; + } + } + + if (state == got_linebreak_lf) { + if (body_start) { + /* \r\n\r\n */ + *body_start = p - input->str; + } + + return c - input->str; + } + + return -1; +} + +gint rspamd_encode_hex_buf(const guchar *in, gsize inlen, gchar *out, + gsize outlen) +{ + gchar *o, *end; + const guchar *p; + static const gchar hexdigests[16] = "0123456789abcdef"; + + end = out + outlen; + o = out; + p = in; + + while (inlen > 0 && o < end - 1) { + *o++ = hexdigests[((*p >> 4) & 0xF)]; + *o++ = hexdigests[((*p++) & 0xF)]; + inlen--; + } + + if (o <= end) { + return (o - out); + } + + return -1; +} + +gchar * +rspamd_encode_hex(const guchar *in, gsize inlen) +{ + gchar *out; + gsize outlen = inlen * 2 + 1; + gint olen; + + if (in == NULL) { + return NULL; + } + + out = g_malloc(outlen); + olen = rspamd_encode_hex_buf(in, inlen, out, outlen - 1); + + if (olen >= 0) { + out[olen] = '\0'; + } + else { + g_free(out); + + return NULL; + } + + return out; +} + +gssize +rspamd_decode_hex_buf(const gchar *in, gsize inlen, + guchar *out, gsize outlen) +{ + guchar *o, *end, ret = 0; + const gchar *p; + gchar c; + + end = out + outlen; + o = out; + p = in; + + /* We ignore trailing chars if we have not even input */ + inlen = inlen - inlen % 2; + + while (inlen > 1 && o < end) { + c = *p++; + + if (c >= '0' && c <= '9') ret = c - '0'; + else if (c >= 'A' && c <= 'F') + ret = c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + ret = c - 'a' + 10; + + c = *p++; + ret *= 16; + + if (c >= '0' && c <= '9') ret += c - '0'; + else if (c >= 'A' && c <= 'F') + ret += c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + ret += c - 'a' + 10; + + *o++ = ret; + + inlen -= 2; + } + + if (o <= end) { + return (o - out); + } + + return -1; +} + +guchar * +rspamd_decode_hex(const gchar *in, gsize inlen) +{ + guchar *out; + gsize outlen = (inlen / 2 + inlen % 2) + 1; + gint olen; + + if (in == NULL) { + return NULL; + } + + out = g_malloc(outlen); + + olen = rspamd_decode_hex_buf(in, inlen, out, outlen - 1); + + if (olen >= 0) { + out[olen] = '\0'; + + return out; + } + + g_free(out); + + return NULL; +} + +gssize +rspamd_decode_qp_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen) +{ + gchar *o, *end, *pos, c; + const gchar *p; + guchar ret; + gssize remain, processed; + + p = in; + o = out; + end = out + outlen; + remain = inlen; + + while (remain > 0 && o < end) { + if (*p == '=') { + remain--; + + if (remain == 0) { + /* Last '=' character, bugon */ + if (end - o > 0) { + *o++ = *p; + } + else { + /* Buffer overflow */ + return (-1); + } + + break; + } + + p++; + decode: + /* Decode character after '=' */ + c = *p++; + remain--; + ret = 0; + + if (c >= '0' && c <= '9') { + ret = c - '0'; + } + else if (c >= 'A' && c <= 'F') { + ret = c - 'A' + 10; + } + else if (c >= 'a' && c <= 'f') { + ret = c - 'a' + 10; + } + else if (c == '\r') { + /* Eat one more endline */ + if (remain > 0 && *p == '\n') { + p++; + remain--; + } + + continue; + } + else if (c == '\n') { + /* Soft line break */ + continue; + } + else { + /* Hack, hack, hack, treat =<garbage> as =<garbage> */ + if (end - o > 1) { + *o++ = '='; + *o++ = *(p - 1); + } + else { + return (-1); + } + + continue; + } + + if (remain > 0) { + c = *p++; + ret *= 16; + remain--; + + if (c >= '0' && c <= '9') { + ret += c - '0'; + } + else if (c >= 'A' && c <= 'F') { + ret += c - 'A' + 10; + } + else if (c >= 'a' && c <= 'f') { + ret += c - 'a' + 10; + } + else { + /* Treat =<good><rubbish> as =<good><rubbish> */ + if (end - o > 2) { + *o++ = '='; + *o++ = *(p - 2); + *o++ = *(p - 1); + } + else { + return (-1); + } + + continue; + } + + if (end - o > 0) { + *o++ = (gchar) ret; + } + else { + return (-1); + } + } + } + else { + if (end - o >= remain) { + if ((pos = memccpy(o, p, '=', remain)) == NULL) { + /* All copied */ + o += remain; + break; + } + else { + processed = pos - o; + remain -= processed; + p += processed; + + if (remain > 0) { + o = pos - 1; + /* + * Skip comparison and jump inside decode branch, + * as we know that we have found match + */ + goto decode; + } + else { + /* Last '=' character, bugon */ + o = pos; + + if (end - o > 0) { + *o = '='; + } + else { + /* Buffer overflow */ + return (-1); + } + + break; + } + } + } + else { + /* Buffer overflow */ + return (-1); + } + } + } + + return (o - out); +} + +gssize +rspamd_decode_uue_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen) +{ + gchar *o, *out_end; + const gchar *p; + gssize remain; + gboolean base64 = FALSE; + goffset pos; + const gchar *nline = "\r\n"; + + p = in; + o = out; + out_end = out + outlen; + remain = inlen; + + /* Skip newlines */ +#define SKIP_NEWLINE \ + do { \ + while (remain > 0 && (*p == '\n' || *p == '\r')) { \ + p++; \ + remain--; \ + } \ + } while (0) + SKIP_NEWLINE; + + /* First of all, we need to read the first line (and probably skip it) */ + if (remain < sizeof("begin-base64 ")) { + /* Obviously truncated */ + return -1; + } + + if (memcmp(p, "begin ", sizeof("begin ") - 1) == 0) { + p += sizeof("begin ") - 1; + remain -= sizeof("begin ") - 1; + + pos = rspamd_memcspn(p, nline, remain); + } + else if (memcmp(p, "begin-base64 ", sizeof("begin-base64 ") - 1) == 0) { + base64 = TRUE; + p += sizeof("begin-base64 ") - 1; + remain -= sizeof("begin-base64 ") - 1; + pos = rspamd_memcspn(p, nline, remain); + } + else { + /* Crap */ + return (-1); + } + + if (pos == -1 || remain == 0) { + /* Crap */ + return (-1); + } + +#define DEC(c) (((c) - ' ') & 077) /* single character decode */ +#define IS_DEC(c) ((((c) - ' ') >= 0) && (((c) - ' ') <= 077 + 1)) +#define CHAR_OUT(c) \ + do { \ + if (o < out_end) { *o++ = c; } \ + else { \ + return (-1); \ + } \ + } while (0) + + remain -= pos; + p = p + pos; + SKIP_NEWLINE; + + if (base64) { + if (!rspamd_cryptobox_base64_decode(p, + remain, + out, &outlen)) { + return (-1); + } + + return outlen; + } + + while (remain > 0 && o < out_end) { + /* Main cycle */ + const gchar *eol; + gint i, ch; + + pos = rspamd_memcspn(p, nline, remain); + + if (pos == 0) { + /* Skip empty lines */ + SKIP_NEWLINE; + + if (remain == 0) { + break; + } + } + + eol = p + pos; + remain -= eol - p; + + if ((i = DEC(*p)) <= 0) { + /* Last pos */ + break; + } + + /* i can be less than eol - p, it means uue padding which we ignore */ + for (++p; i > 0 && p < eol; p += 4, i -= 3) { + if (i >= 3 && p + 3 < eol) { + /* Process 4 bytes of input */ + if (!IS_DEC(*p)) { + return (-1); + } + if (!IS_DEC(*(p + 1))) { + return (-1); + } + if (!IS_DEC(*(p + 2))) { + return (-1); + } + if (!IS_DEC(*(p + 3))) { + return (-1); + } + ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4; + CHAR_OUT(ch); + ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2; + CHAR_OUT(ch); + ch = DEC(p[2]) << 6 | DEC(p[3]); + CHAR_OUT(ch); + } + else { + if (i >= 1 && p + 1 < eol) { + if (!IS_DEC(*p)) { + return (-1); + } + if (!IS_DEC(*(p + 1))) { + return (-1); + } + + ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4; + CHAR_OUT(ch); + } + if (i >= 2 && p + 2 < eol) { + if (!IS_DEC(*(p + 1))) { + return (-1); + } + if (!IS_DEC(*(p + 2))) { + return (-1); + } + + ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2; + CHAR_OUT(ch); + } + } + } + /* Skip newline */ + p = eol; + SKIP_NEWLINE; + } + + return (o - out); +} + +#define BITOP(a, b, op) \ + ((a)[(gsize) (b) / (8 * sizeof *(a))] op(gsize) 1 << ((gsize) (b) % (8 * sizeof *(a)))) + + +gsize rspamd_memcspn(const gchar *s, const gchar *e, gsize len) +{ + gsize byteset[32 / sizeof(gsize)]; + const gchar *p = s, *end = s + len; + + if (!e[1]) { + for (; p < end && *p != *e; p++) + ; + return p - s; + } + + memset(byteset, 0, sizeof byteset); + + for (; *e && BITOP(byteset, *(guchar *) e, |=); e++) + ; + for (; p < end && !BITOP(byteset, *(guchar *) p, &); p++) + ; + + return p - s; +} + +gsize rspamd_memspn(const gchar *s, const gchar *e, gsize len) +{ + gsize byteset[32 / sizeof(gsize)]; + const gchar *p = s, *end = s + len; + + if (!e[1]) { + for (; p < end && *p == *e; p++) + ; + return p - s; + } + + memset(byteset, 0, sizeof byteset); + + for (; *e && BITOP(byteset, *(guchar *) e, |=); e++) + ; + for (; p < end && BITOP(byteset, *(guchar *) p, &); p++) + ; + + return p - s; +} + +gssize +rspamd_decode_qp2047_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen) +{ + gchar *o, *end, c; + const gchar *p; + guchar ret; + gsize remain, processed; + + p = in; + o = out; + end = out + outlen; + remain = inlen; + + while (remain > 0 && o < end) { + if (*p == '=') { + p++; + remain--; + + if (remain == 0) { + if (end - o > 0) { + *o++ = *p; + break; + } + } + decode: + /* Decode character after '=' */ + c = *p++; + remain--; + ret = 0; + + if (c >= '0' && c <= '9') { ret = c - '0'; } + else if (c >= 'A' && c <= 'F') { + ret = c - 'A' + 10; + } + else if (c >= 'a' && c <= 'f') { + ret = c - 'a' + 10; + } + else if (c == '\r' || c == '\n') { + /* Soft line break */ + while (remain > 0 && (*p == '\r' || *p == '\n')) { + remain--; + p++; + } + + continue; + } + + if (remain > 0) { + c = *p++; + ret *= 16; + + if (c >= '0' && c <= '9') { ret += c - '0'; } + else if (c >= 'A' && c <= 'F') { + ret += c - 'A' + 10; + } + else if (c >= 'a' && c <= 'f') { + ret += c - 'a' + 10; + } + + if (end - o > 0) { + *o++ = (gchar) ret; + } + else { + return (-1); + } + + remain--; + } + } + else { + if (end - o >= remain) { + processed = rspamd_memcspn(p, "=_", remain); + memcpy(o, p, processed); + o += processed; + + if (processed == remain) { + break; + } + else { + + remain -= processed; + p += processed; + + if (G_LIKELY(*p == '=')) { + p++; + /* Skip comparison, as we know that we have found match */ + remain--; + goto decode; + } + else { + *o++ = ' '; + p++; + remain--; + } + } + } + else { + /* Buffer overflow */ + return (-1); + } + } + } + + return (o - out); +} + +gssize +rspamd_encode_qp2047_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen) +{ + gchar *o = out, *end = out + outlen, c; + static const gchar hexdigests[16] = "0123456789ABCDEF"; + + while (inlen > 0 && o < end) { + c = *in; + + if (g_ascii_isalnum(c)) { + *o++ = c; + } + else if (c == ' ') { + *o++ = '_'; + } + else if (end - o >= 3) { + *o++ = '='; + *o++ = hexdigests[((c >> 4) & 0xF)]; + *o++ = hexdigests[(c & 0xF)]; + } + else { + return (-1); + } + + in++; + inlen--; + } + + if (inlen != 0) { + return (-1); + } + + return (o - out); +} + + +/* + * GString ucl emitting functions + */ +static int +rspamd_gstring_append_character(unsigned char c, size_t len, void *ud) +{ + GString *buf = ud; + gsize old_len; + + if (len == 1) { + g_string_append_c(buf, c); + } + else { + if (buf->allocated_len - buf->len <= len) { + old_len = buf->len; + g_string_set_size(buf, buf->len + len + 1); + buf->len = old_len; + } + memset(&buf->str[buf->len], c, len); + buf->len += len; + } + + return 0; +} + +static int +rspamd_gstring_append_len(const unsigned char *str, size_t len, void *ud) +{ + GString *buf = ud; + + g_string_append_len(buf, str, len); + + return 0; +} + +static int +rspamd_gstring_append_int(int64_t val, void *ud) +{ + GString *buf = ud; + + rspamd_printf_gstring(buf, "%L", (intmax_t) val); + return 0; +} + +static int +rspamd_gstring_append_double(double val, void *ud) +{ + GString *buf = ud; + const double delta = 0.0000001; + + if (isfinite(val)) { + if (val == (double) (int) val) { + rspamd_printf_gstring(buf, "%.1f", val); + } + else if (fabs(val - (double) (int) val) < delta) { + /* Write at maximum precision */ + rspamd_printf_gstring(buf, "%.*g", DBL_DIG, val); + } + else { + rspamd_printf_gstring(buf, "%f", val); + } + } + else { + rspamd_printf_gstring(buf, "null"); + } + + return 0; +} + +void rspamd_ucl_emit_gstring_comments(const ucl_object_t *obj, + enum ucl_emitter emit_type, + GString *target, + const ucl_object_t *comments) +{ + struct ucl_emitter_functions func = { + .ucl_emitter_append_character = rspamd_gstring_append_character, + .ucl_emitter_append_len = rspamd_gstring_append_len, + .ucl_emitter_append_int = rspamd_gstring_append_int, + .ucl_emitter_append_double = rspamd_gstring_append_double}; + + func.ud = target; + ucl_object_emit_full(obj, emit_type, &func, comments); +} + +/* + * FString ucl emitting functions + */ +static int +rspamd_fstring_emit_append_character(unsigned char c, size_t len, void *ud) +{ + rspamd_fstring_t **buf = ud; + + *buf = rspamd_fstring_append_chars(*buf, c, len); + + return 0; +} + +static int +rspamd_fstring_emit_append_len(const unsigned char *str, size_t len, void *ud) +{ + rspamd_fstring_t **buf = ud; + + *buf = rspamd_fstring_append(*buf, str, len); + + return 0; +} + +static int +rspamd_fstring_emit_append_int(int64_t val, void *ud) +{ + rspamd_fstring_t **buf = ud; + + rspamd_printf_fstring(buf, "%L", (intmax_t) val); + return 0; +} + +static int +rspamd_fstring_emit_append_double(double val, void *ud) +{ + rspamd_fstring_t **buf = ud; +#define MAX_PRECISION 6 + + if (isfinite(val)) { + if (val == (double) ((gint) val)) { + rspamd_printf_fstring(buf, "%.1f", val); + } + else { + rspamd_printf_fstring(buf, "%." G_STRINGIFY(MAX_PRECISION) "f", + val); + } + } + else { + rspamd_printf_fstring(buf, "null"); + } + + return 0; +} + +void rspamd_ucl_emit_fstring_comments(const ucl_object_t *obj, + enum ucl_emitter emit_type, + rspamd_fstring_t **buf, + const ucl_object_t *comments) +{ + struct ucl_emitter_functions func = { + .ucl_emitter_append_character = rspamd_fstring_emit_append_character, + .ucl_emitter_append_len = rspamd_fstring_emit_append_len, + .ucl_emitter_append_int = rspamd_fstring_emit_append_int, + .ucl_emitter_append_double = rspamd_fstring_emit_append_double}; + + func.ud = buf; + ucl_object_emit_full(obj, emit_type, &func, comments); +} + +#ifndef HAVE_MEMRCHR +void * +rspamd_memrchr(const void *m, gint c, gsize len) +{ + const guint8 *p = m; + + for (gsize i = len; i > 0; i--) { + if (p[i - 1] == c) { + return (void *) (p + i - 1); + } + } + + return NULL; +} +#endif + +struct UConverter * +rspamd_get_utf8_converter(void) +{ + static UConverter *utf8_conv = NULL; + UErrorCode uc_err = U_ZERO_ERROR; + + if (utf8_conv == NULL) { + utf8_conv = ucnv_open("UTF-8", &uc_err); + if (!U_SUCCESS(uc_err)) { + msg_err("FATAL error: cannot open converter for utf8: %s", + u_errorName(uc_err)); + + g_assert_not_reached(); + } + + ucnv_setFromUCallBack(utf8_conv, + UCNV_FROM_U_CALLBACK_SUBSTITUTE, + NULL, + NULL, + NULL, + &uc_err); + ucnv_setToUCallBack(utf8_conv, + UCNV_TO_U_CALLBACK_SUBSTITUTE, + NULL, + NULL, + NULL, + &uc_err); + } + + return utf8_conv; +} + + +const struct UNormalizer2 * +rspamd_get_unicode_normalizer(void) +{ +#if U_ICU_VERSION_MAJOR_NUM >= 44 + UErrorCode uc_err = U_ZERO_ERROR; + static const UNormalizer2 *norm = NULL; + + if (norm == NULL) { + norm = unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &uc_err); + g_assert(U_SUCCESS(uc_err)); + } + + return norm; +#else + /* Old libicu */ + return NULL; +#endif +} + +gchar * +rspamd_str_regexp_escape(const gchar *pattern, gsize slen, + gsize *dst_len, enum rspamd_regexp_escape_flags flags) +{ + const gchar *p, *end = pattern + slen; + gchar *res, *d, t, *tmp_utf = NULL, *dend; + gsize len; + static const gchar hexdigests[16] = "0123456789abcdef"; + + len = 0; + p = pattern; + + /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */ + while (p < end) { + t = *p++; + + switch (t) { + case '[': + case ']': + case '-': + case '\\': + case '{': + case '}': + case '(': + case ')': + case '*': + case '+': + case '?': + case '.': + case ',': + case '^': + case '$': + case '|': + case '#': + if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) { + len++; + } + break; + default: + if (g_ascii_isspace(t)) { + len++; + } + else { + if (!g_ascii_isprint(t) || (t & 0x80)) { + + if (flags & RSPAMD_REGEXP_ESCAPE_UTF) { + /* \x{code}, where code can be up to 5 digits */ + len += 4; + } + else { + /* \\xHH -> 4 symbols */ + len += 3; + } + } + } + break; + } + } + + if (flags & RSPAMD_REGEXP_ESCAPE_UTF) { + if (rspamd_fast_utf8_validate(pattern, slen) != 0) { + tmp_utf = rspamd_str_make_utf_valid(pattern, slen, NULL, NULL); + } + } + + if (len == 0) { + /* No need to escape anything */ + + if (dst_len) { + *dst_len = slen; + } + + if (tmp_utf) { + return tmp_utf; + } + else { + return g_strdup(pattern); + } + } + + /* Escape logic */ + if (tmp_utf) { + pattern = tmp_utf; + } + + len = slen + len; + res = g_malloc(len + 1); + p = pattern; + d = res; + dend = d + len; + + while (p < end) { + g_assert(d < dend); + t = *p++; + + switch (t) { + case '[': + case ']': + case '\\': + case '{': + case '}': + case '(': + case ')': + case '.': + case ',': + case '^': + case '$': + case '|': + case '#': + if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) { + *d++ = '\\'; + } + break; + case '-': + if (flags & RSPAMD_REGEXP_ESCAPE_GLOB) { + /* + * For glob patterns, we need to ensure that a previous character is alphanumeric + * and there is `[` symbol somewhere before + */ + bool seen_brace = false; + const char *search = p; + + while (search > pattern) { + if (!g_ascii_isalnum(*search) && *search != '-') { + break; + } + if (*search == '[') { + seen_brace = true; + break; + } + + search--; + } + + if (!seen_brace) { + /* Escape `-` symbol */ + *d++ = '\\'; + } + } + else if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) { + *d++ = '\\'; + } + break; + case '*': + case '?': + case '+': + if (flags & RSPAMD_REGEXP_ESCAPE_GLOB) { + /* Treat * as .* and ? as .? */ + *d++ = '.'; + } + else if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) { + *d++ = '\\'; + } + break; + default: + if (g_ascii_isspace(t)) { + if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) { + *d++ = '\\'; + } + } + else if (t & 0x80 || !g_ascii_isprint(t)) { + if (!(flags & RSPAMD_REGEXP_ESCAPE_UTF)) { + *d++ = '\\'; + *d++ = 'x'; + *d++ = hexdigests[((t >> 4) & 0xF)]; + *d++ = hexdigests[((t) &0xF)]; + continue; /* To avoid *d++ = t; */ + } + else { + if (flags & (RSPAMD_REGEXP_ESCAPE_RE | RSPAMD_REGEXP_ESCAPE_GLOB)) { + UChar32 uc; + gint32 off = p - pattern - 1; + U8_NEXT(pattern, off, slen, uc); + + if (uc > 0) { + d += rspamd_snprintf(d, dend - d, + "\\x{%xd}", uc); + p = pattern + off; + } + + continue; /* To avoid *d++ = t; */ + } + } + } + break; + } + + *d++ = t; + } + + *d = '\0'; + + if (dst_len) { + *dst_len = d - res; + } + + if (tmp_utf) { + g_free(tmp_utf); + } + + return res; +} + + +gchar * +rspamd_str_make_utf_valid(const guchar *src, gsize slen, + gsize *dstlen, + rspamd_mempool_t *pool) +{ + UChar32 uc; + goffset err_offset; + const guchar *p; + gchar *dst, *d; + gsize remain = slen, dlen = 0; + + if (src == NULL) { + return NULL; + } + + if (slen == 0) { + if (dstlen) { + *dstlen = 0; + } + + return pool ? rspamd_mempool_strdup(pool, "") : g_strdup(""); + } + + p = src; + dlen = slen + 1; /* As we add '\0' */ + + /* Check space required */ + while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(p, remain)) > 0) { + gint i = 0; + + err_offset--; /* As it returns it 1 indexed */ + p += err_offset; + remain -= err_offset; + dlen += err_offset; + + /* Each invalid character of input requires 3 bytes of output (+2 bytes) */ + while (i < remain) { + U8_NEXT(p, i, remain, uc); + + if (uc < 0) { + dlen += 2; + } + else { + break; + } + } + + p += i; + remain -= i; + } + + if (pool) { + dst = rspamd_mempool_alloc(pool, dlen + 1); + } + else { + dst = g_malloc(dlen + 1); + } + + p = src; + d = dst; + remain = slen; + + while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(p, remain)) > 0) { + /* Copy valid */ + err_offset--; /* As it returns it 1 indexed */ + memcpy(d, p, err_offset); + d += err_offset; + + /* Append 0xFFFD for each bad character */ + gint i = 0; + + p += err_offset; + remain -= err_offset; + + while (i < remain) { + gint old_i = i; + U8_NEXT(p, i, remain, uc); + + if (uc < 0) { + *d++ = '\357'; + *d++ = '\277'; + *d++ = '\275'; + } + else { + /* Adjust p and remaining stuff and go to the outer cycle */ + i = old_i; + break; + } + } + /* + * Now p is the first valid utf8 character and remain is the rest of the string + * so we can continue our loop + */ + p += i; + remain -= i; + } + + if (err_offset == 0 && remain > 0) { + /* Last piece */ + memcpy(d, p, remain); + d += remain; + } + + /* Last '\0' */ + g_assert(dlen > d - dst); + *d = '\0'; + + if (dstlen) { + *dstlen = d - dst; + } + + return dst; +} + +gsize rspamd_gstring_strip(GString *s, const gchar *strip_chars) +{ + const gchar *p, *sc; + gsize strip_len = 0, total = 0; + + p = s->str + s->len - 1; + + while (p >= s->str) { + gboolean seen = FALSE; + + sc = strip_chars; + + while (*sc != '\0') { + if (*p == *sc) { + strip_len++; + seen = TRUE; + break; + } + + sc++; + } + + if (!seen) { + break; + } + + p--; + } + + if (strip_len > 0) { + s->len -= strip_len; + s->str[s->len] = '\0'; + total += strip_len; + } + + if (s->len > 0) { + strip_len = rspamd_memspn(s->str, strip_chars, s->len); + + if (strip_len > 0) { + memmove(s->str, s->str + strip_len, s->len - strip_len); + s->len -= strip_len; + total += strip_len; + } + } + + return total; +} + +const gchar *rspamd_string_len_strip(const gchar *in, + gsize *len, + const gchar *strip_chars) +{ + const gchar *p, *sc; + gsize strip_len = 0, old_len = *len; + + p = in + old_len - 1; + + /* Trail */ + while (p >= in) { + gboolean seen = FALSE; + + sc = strip_chars; + + while (*sc != '\0') { + if (*p == *sc) { + strip_len++; + seen = TRUE; + break; + } + + sc++; + } + + if (!seen) { + break; + } + + p--; + } + + if (strip_len > 0) { + *len -= strip_len; + } + + /* Head */ + old_len = *len; + + if (old_len > 0) { + strip_len = rspamd_memspn(in, strip_chars, old_len); + + if (strip_len > 0) { + *len -= strip_len; + + return in + strip_len; + } + } + + return in; +} + +gchar ** +rspamd_string_len_split(const gchar *in, gsize len, const gchar *spill, + gint max_elts, rspamd_mempool_t *pool) +{ + const gchar *p = in, *end = in + len; + gsize detected_elts = 0; + gchar **res; + + /* Detect number of elements */ + while (p < end) { + gsize cur_fragment = rspamd_memcspn(p, spill, end - p); + + if (cur_fragment > 0) { + detected_elts++; + p += cur_fragment; + + if (max_elts > 0 && detected_elts >= max_elts) { + break; + } + } + + /* Something like a,,b produces {'a', 'b'} not {'a', '', 'b'} */ + p += rspamd_memspn(p, spill, end - p); + } + + res = pool ? rspamd_mempool_alloc(pool, sizeof(gchar *) * (detected_elts + 1)) : g_malloc(sizeof(gchar *) * (detected_elts + 1)); + /* Last one */ + res[detected_elts] = NULL; + detected_elts = 0; + p = in; + + while (p < end) { + gsize cur_fragment = rspamd_memcspn(p, spill, end - p); + + if (cur_fragment > 0) { + gchar *elt; + + elt = pool ? rspamd_mempool_alloc(pool, cur_fragment + 1) : g_malloc(cur_fragment + 1); + + memcpy(elt, p, cur_fragment); + elt[cur_fragment] = '\0'; + + res[detected_elts++] = elt; + p += cur_fragment; + + if (max_elts > 0 && detected_elts >= max_elts) { + break; + } + } + + p += rspamd_memspn(p, spill, end - p); + } + + return res; +} + +#if defined(__x86_64__) +#include <x86intrin.h> +#endif + +static inline gboolean +rspamd_str_has_8bit_u64(const guchar *beg, gsize len) +{ + guint8 orb = 0; + + if (len >= 16) { + const guchar *nextd = beg + sizeof(guint64); + guint64 n1 = 0, n2 = 0; + + do { + guint64 t; + memcpy(&t, beg, sizeof(t)); + n1 |= t; + memcpy(&t, nextd, sizeof(t)); + n2 |= t; + beg += 16; + nextd += 16; + len -= 16; + } while (len >= 16); + + /* + * Idea from Benny Halevy <bhalevy@scylladb.com> + * - 7-th bit set ==> orb = !(non-zero) - 1 = 0 - 1 = 0xFF + * - 7-th bit clear ==> orb = !0 - 1 = 1 - 1 = 0x00 + */ + orb = !((n1 | n2) & 0x8080808080808080ULL) - 1; + } + + while (len--) { + orb |= *beg++; + } + + return orb >= 0x80; +} + +gboolean +rspamd_str_has_8bit(const guchar *beg, gsize len) +{ +#if defined(__x86_64__) + if (len >= 32) { + const uint8_t *nextd = beg + 16; + + __m128i n1 = _mm_set1_epi8(0), n2; + + n2 = n1; + + while (len >= 32) { + __m128i xmm1 = _mm_loadu_si128((const __m128i *) beg); + __m128i xmm2 = _mm_loadu_si128((const __m128i *) nextd); + + n1 = _mm_or_si128(n1, xmm1); + n2 = _mm_or_si128(n2, xmm2); + + beg += 32; + nextd += 32; + len -= 32; + } + + n1 = _mm_or_si128(n1, n2); + + /* We assume 2 complement here */ + if (_mm_movemask_epi8(n1)) { + return TRUE; + } + } +#endif + + return rspamd_str_has_8bit_u64(beg, len); +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h new file mode 100644 index 0000000..07560cc --- /dev/null +++ b/src/libutil/str_util.h @@ -0,0 +1,565 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_STR_UTIL_H_ +#define SRC_LIBUTIL_STR_UTIL_H_ + +#include "config.h" +#include "ucl.h" +#include "fstring.h" + +#include <stdalign.h> + +#ifdef __cplusplus +extern "C" { +#endif + +enum rspamd_newlines_type { + RSPAMD_TASK_NEWLINES_CR = 0, + RSPAMD_TASK_NEWLINES_LF, + RSPAMD_TASK_NEWLINES_CRLF, + RSPAMD_TASK_NEWLINES_MAX +}; + +/** + * Compare two memory regions of size `l` using case insensitive matching + */ +gint rspamd_lc_cmp(const gchar *s, const gchar *d, gsize l); + +/** + * Convert string to lowercase in-place using ASCII conversion + */ +guint rspamd_str_lc(gchar *str, guint size); + +/** + * Performs ascii copy & lowercase + * @param src + * @param size + * @return + */ +gsize rspamd_str_copy_lc(const gchar *src, gchar *dst, gsize size); + +/** + * Convert string to lowercase in-place using utf (limited) conversion + */ +guint rspamd_str_lc_utf8(gchar *str, guint size); + +/* + * Hash table utility functions for case insensitive hashing + */ +guint64 rspamd_icase_hash(const gchar *in, gsize len, guint64 seed); + +guint rspamd_strcase_hash(gconstpointer key); + +gboolean rspamd_strcase_equal(gconstpointer v, gconstpointer v2); + +/* + * Hash table utility functions for case sensitive hashing + */ +guint rspamd_str_hash(gconstpointer key); + +gboolean rspamd_str_equal(gconstpointer v, gconstpointer v2); + + +/* + * Hash table utility functions for hashing fixed strings + */ +guint rspamd_ftok_icase_hash(gconstpointer key); + +gboolean rspamd_ftok_icase_equal(gconstpointer v, gconstpointer v2); + +/* Use in khash for speed */ +#define rspamd_ftok_hash(key) _wyhash32((key)->begin, (key)->len, 0) +#define rspamd_ftok_equal(v1, v2) ((v1)->len == (v2)->len && memcmp((v1)->begin, (v2)->begin, (v1)->len) == 0) + +guint rspamd_gstring_icase_hash(gconstpointer key); + +gboolean rspamd_gstring_icase_equal(gconstpointer v, gconstpointer v2); + +/** + * Copy src to dest limited to len, in compare with standard strlcpy(3) rspamd strlcpy does not + * traverse the whole string and it is possible to use it for non NULL terminated strings. This is + * more like memccpy(dst, src, size, '\0') + * + * @param dst destination string + * @param src source string + * @param siz length of destination buffer + * @return bytes copied + */ +gsize rspamd_strlcpy_fast(gchar *dst, const gchar *src, gsize siz); + +gsize rspamd_strlcpy_safe(gchar *dst, const gchar *src, gsize siz); + +#if defined(__has_feature) +#if __has_feature(address_sanitizer) +#define rspamd_strlcpy rspamd_strlcpy_safe +#else +#ifdef __SANITIZE_ADDRESS__ +#define rspamd_strlcpy rspamd_strlcpy_safe +#else +#define rspamd_strlcpy rspamd_strlcpy_fast +#endif +#endif +#else +#ifdef __SANITIZE_ADDRESS__ +#define rspamd_strlcpy rspamd_strlcpy_safe +#else +#define rspamd_strlcpy rspamd_strlcpy_fast +#endif +#endif + +/** + * Copies `srclen` characters from `src` to `dst` ignoring \0 + * @param src + * @param srclen + * @param dest + * @param destlen + * @return number of bytes copied + */ +gsize rspamd_null_safe_copy(const gchar *src, gsize srclen, + gchar *dest, gsize destlen); + +/* + * Try to convert string of length to long + */ +gboolean rspamd_strtol(const gchar *s, gsize len, glong *value); + +/* + * Try to convert a string of length to unsigned long + */ +gboolean rspamd_strtoul(const gchar *s, gsize len, gulong *value); +gboolean rspamd_strtou64(const gchar *s, gsize len, guint64 *value); + +/* + * Try to convert a hex string of length to unsigned long + */ +gboolean rspamd_xstrtoul(const gchar *s, gsize len, gulong *value); + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer rspamd_str_pool_copy(gconstpointer data, gpointer ud); + +/** + * Encode string using hex encoding + * @param in input + * @param inlen input length + * @return freshly allocated base32 encoding of a specified string + */ +gchar *rspamd_encode_hex(const guchar *in, gsize inlen); + +/** + * Decode string using hex encoding + * @param in input + * @param inlen input length + * @return freshly allocated base32 decoded value or NULL if input is invalid + */ +guchar *rspamd_decode_hex(const gchar *in, gsize inlen); + +enum rspamd_base32_type { + RSPAMD_BASE32_DEFAULT = 0, + RSPAMD_BASE32_ZBASE = 0, + RSPAMD_BASE32_BLEACH, + RSPAMD_BASE32_RFC, + RSPAMD_BASE32_INVALID = -1, +}; + +/** + * Returns base32 type from a string or RSPAMD_BASE32_INVALID + * @param str + * @return + */ +enum rspamd_base32_type rspamd_base32_decode_type_from_str(const gchar *str); + +/** + * Encode string using base32 encoding + * @param in input + * @param inlen input length + * @return freshly allocated base32 encoding of a specified string + */ +gchar *rspamd_encode_base32(const guchar *in, gsize inlen, + enum rspamd_base32_type type); + +/** + * Decode string using base32 encoding + * @param in input + * @param inlen input length + * @return freshly allocated base32 decoded value or NULL if input is invalid + */ +guchar *rspamd_decode_base32(const gchar *in, gsize inlen, gsize *outlen, enum rspamd_base32_type type); + +/** + * Encode string using base32 encoding + * @param in input + * @param inlen input length + * @param out output buf + * @param outlen output buf len + * @return encoded len if `outlen` is enough to encode `inlen` + */ +gint rspamd_encode_base32_buf(const guchar *in, gsize inlen, gchar *out, + gsize outlen, enum rspamd_base32_type type); + +/** + * Decode string using base32 encoding + * @param in input + * @param inlen input length + * @param out output buf (may overlap with `in`) + * @param outlen output buf len + * @return decoded len if in is valid base32 and `outlen` is enough to encode `inlen` + */ +gint rspamd_decode_base32_buf(const gchar *in, gsize inlen, guchar *out, + gsize outlen, enum rspamd_base32_type type); + +/** + * Encode string using hex encoding + * @param in input + * @param inlen input length + * @param out output buf + * @param outlen output buf len + * @return encoded len if `outlen` is enough to encode `inlen` + */ +gint rspamd_encode_hex_buf(const guchar *in, gsize inlen, gchar *out, + gsize outlen); + + +/** + * Decode string using hex encoding + * @param in input + * @param inlen input length + * @param out output buf (may overlap with `in`) + * @param outlen output buf len + * @return decoded len if in is valid hex and `outlen` is enough to encode `inlen` + */ +gssize rspamd_decode_hex_buf(const gchar *in, gsize inlen, + guchar *out, gsize outlen); + +/** + * Common version of base64 encoder + * @param in + * @param inlen + * @param str_len + * @param outlen + * @param fold + * @param how + * @return + */ +gchar * +rspamd_encode_base64_common(const guchar *in, + gsize inlen, + gint str_len, + gsize *outlen, + gboolean fold, + enum rspamd_newlines_type how); + +/** + * Encode string using base64 encoding + * @param in input + * @param inlen input length + * @param str_len maximum string length (if <= 0 then no lines are split) + * @return freshly allocated base64 encoded value or NULL if input is invalid + */ +gchar *rspamd_encode_base64(const guchar *in, gsize inlen, gint str_len, + gsize *outlen); + +/** + * Encode and fold string using base64 encoding + * @param in input + * @param inlen input length + * @param str_len maximum string length (if <= 0 then no lines are split) + * @return freshly allocated base64 encoded value or NULL if input is invalid + */ +gchar *rspamd_encode_base64_fold(const guchar *in, gsize inlen, gint str_len, + gsize *outlen, enum rspamd_newlines_type how); + +/** + * Encode and fold string using quoted printable encoding + * @param in input + * @param inlen input length + * @param str_len maximum string length (if <= 0 then no lines are split) + * @return freshly allocated base64 encoded value or NULL if input is invalid + */ +gchar *rspamd_encode_qp_fold(const guchar *in, gsize inlen, gint str_len, + gsize *outlen, enum rspamd_newlines_type how); + +/** + * Decode quoted-printable encoded buffer, input and output must not overlap + * @param in input + * @param inlen length of input + * @param out output + * @param outlen length of output + * @return real size of decoded output or (-1) if outlen is not enough + */ +gssize rspamd_decode_qp_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen); + +/** + * Decode uuencode encoded buffer, input and output must not overlap + * @param in input + * @param inlen length of input + * @param out output + * @param outlen length of output + * @return real size of decoded output or (-1) if outlen is not enough + */ +gssize rspamd_decode_uue_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen); + +/** + * Decode quoted-printable encoded buffer using rfc2047 format, input and output must not overlap + * @param in input + * @param inlen length of input + * @param out output + * @param outlen length of output + * @return real size of decoded output or (-1) if outlen is not enough + */ +gssize rspamd_decode_qp2047_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen); + +/** + * Encode quoted-printable buffer using rfc2047 format, input and output must not overlap + * @param in + * @param inlen + * @param out + * @param outlen + * @return + */ +gssize rspamd_encode_qp2047_buf(const gchar *in, gsize inlen, + gchar *out, gsize outlen); + +#ifndef g_tolower +#define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) +#endif + +/** + * Return levenstein distance between two strings + * @param s1 + * @param s1len + * @param s2 + * @param s2len + * @return + */ +gint rspamd_strings_levenshtein_distance(const gchar *s1, gsize s1len, + const gchar *s2, gsize s2len, guint replace_cost); + +/** + * Fold header using rfc822 rules, return new GString from the previous one + * @param name name of header (used just for folding) + * @param value value of header + * @param fold_max + * @param how + * @param fold_on_chars + * @return new GString with the folded value + */ +GString *rspamd_header_value_fold(const gchar *name, + gsize name_len, + const gchar *value, + gsize value_len, + guint fold_max, + enum rspamd_newlines_type how, + const gchar *fold_on_chars); + +/** + * Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm + * http://www-igm.univ-mlv.fr/~lecroq/string/node12.html#SECTION00120 + * @param in input + * @param inlen input len + * @param srch search string + * @param srchlen length of the search string + * @return position of the first substring match or (-1) if not found + */ +goffset rspamd_substring_search(const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen); + +/** + * Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm in caseless matter (ASCII only) + * http://www-igm.univ-mlv.fr/~lecroq/string/node12.html#SECTION00120 + * @param in input + * @param inlen input len + * @param srch search string + * @param srchlen length of the search string + * @return position of the first substring match or (-1) if not found + */ +goffset rspamd_substring_search_caseless(const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen); + +/** + * Search for end-of-headers mark in the input string. Returns position just after + * the last header in message (but before the last newline character). + * Hence, to obtain the real EOH position, it is also required to skip + * space characters + */ +goffset rspamd_string_find_eoh(GString *input, goffset *body_start); + + +#define rspamd_ucl_emit_gstring(o, t, target) \ + rspamd_ucl_emit_gstring_comments((o), (t), (target), NULL) + +/** + * Emit UCL object to gstring + * @param obj object to emit + * @param emit_type emitter type + * @param comments optional comments object + * @param target target string + */ +void rspamd_ucl_emit_gstring_comments(const ucl_object_t *obj, + enum ucl_emitter emit_type, + GString *target, + const ucl_object_t *comments); + +#define rspamd_ucl_emit_fstring(o, t, target) \ + rspamd_ucl_emit_fstring_comments((o), (t), (target), NULL) + +/** + * Emit UCL object to fstring + * @param obj object to emit + * @param emit_type emitter type + * * @param comments optional comments object + * @param target target string + */ +void rspamd_ucl_emit_fstring_comments(const ucl_object_t *obj, + enum ucl_emitter emit_type, + rspamd_fstring_t **target, + const ucl_object_t *comments); + +extern const guchar lc_map[256]; + +/** + * Search for the last occurrence of character `c` in memory block of size `len` + * @param m + * @param c + * @param len + * @return pointer to the last occurrence or NULL + */ +#ifdef HAVE_MEMRCHR +#define rspamd_memrchr memrchr +#else +void *rspamd_memrchr(const void *m, gint c, gsize len); +#endif + +/** + * Return length of memory segment starting in `s` that contains no chars from `e` + * @param s any input + * @param e zero terminated string of exceptions + * @param len length of `s` + * @return segment size + */ +gsize rspamd_memcspn(const gchar *s, const gchar *e, gsize len); + +/** + * Return length of memory segment starting in `s` that contains only chars from `e` + * @param s any input + * @param e zero terminated string of inclusions + * @param len length of `s` + * @return segment size + */ +gsize rspamd_memspn(const gchar *s, const gchar *e, gsize len); + +/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */ +#define rspamd_str_hasmore(x, n) ((((x) + ~0UL / 255 * (127 - (n))) | (x)) & ~0UL / 255 * 128) +/* + * Check if a pointer is aligned; n must be power of two + */ +#define rspamd_is_aligned(p, n) (((uintptr_t) (p) & ((uintptr_t) (n) -1)) == 0) +#define rspamd_is_aligned_as(p, v) rspamd_is_aligned(p, RSPAMD_ALIGNOF(__typeof((v)))) +gboolean rspamd_str_has_8bit(const guchar *beg, gsize len); + +struct UConverter; + +struct UConverter *rspamd_get_utf8_converter(void); + +struct UNormalizer2; + +const struct UNormalizer2 *rspamd_get_unicode_normalizer(void); + + +enum rspamd_regexp_escape_flags { + RSPAMD_REGEXP_ESCAPE_ASCII = 0, + RSPAMD_REGEXP_ESCAPE_UTF = 1u << 0, + RSPAMD_REGEXP_ESCAPE_GLOB = 1u << 1, + RSPAMD_REGEXP_ESCAPE_RE = 1u << 2, +}; + +/** + * Escapes special characters when reading plain data to be processed in pcre + * @param pattern pattern to process + * @param slen source length + * @param dst_len destination length pointer (can be NULL) + * @param allow_glob allow glob expressions to be translated into pcre + * @return newly allocated zero terminated escaped pattern + */ +gchar * +rspamd_str_regexp_escape(const gchar *pattern, gsize slen, + gsize *dst_len, enum rspamd_regexp_escape_flags flags) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Returns copy of src (zero terminated) where all unicode is made valid or replaced + * to FFFD characters. Caller must free string after usage + * @param src + * @param slen + * @param dstelen + * @return + */ +gchar *rspamd_str_make_utf_valid(const guchar *src, gsize slen, gsize *dstlen, + rspamd_mempool_t *pool) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Strips characters in `strip_chars` from start and end of the GString + * @param s + * @param strip_chars + */ +gsize rspamd_gstring_strip(GString *s, const gchar *strip_chars); + +/** + * Strips characters in `strip_chars` from start and end of the sized string + * @param s + * @param strip_chars + */ +const gchar *rspamd_string_len_strip(const gchar *in, + gsize *len, const gchar *strip_chars) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Returns a NULL terminated list of zero terminated strings based on splitting of + * the base string into parts. If pool is not NULL then memory is allocated from + * the pool. Otherwise, it is allocated from the heap using `g_malloc` (so + * g_strfreev could be used to free stuff) + * @param in + * @param len + * @param spill + * @param max_elts + * @return + */ +gchar **rspamd_string_len_split(const gchar *in, gsize len, + const gchar *spill, gint max_elts, rspamd_mempool_t *pool); + +#define IS_ZERO_WIDTH_SPACE(uc) ((uc) == 0x200B || \ + (uc) == 0x200C || \ + (uc) == 0x200D || \ + (uc) == 0xFEFF || \ + (uc) == 0x00AD) +#define IS_OBSCURED_CHAR(uc) (((uc) >= 0x200B && (uc) <= 0x200F) || \ + ((uc) >= 0x2028 && (uc) <= 0x202F) || \ + ((uc) >= 0x205F && (uc) <= 0x206F) || \ + (uc) == 0xFEFF) + +#define RSPAMD_LEN_CHECK_STARTS_WITH(s, len, lit) \ + ((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp((s), (lit), sizeof(lit) - 1) == 0) + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_STR_UTIL_H_ */ diff --git a/src/libutil/unix-std.h b/src/libutil/unix-std.h new file mode 100644 index 0000000..0ce2442 --- /dev/null +++ b/src/libutil/unix-std.h @@ -0,0 +1,79 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RSPAMD_UNIX_STD_H_H +#define RSPAMD_UNIX_STD_H_H + +#include "config.h" + +/* + * Default unix system includes + */ + +/* sys/file.h */ +#ifdef HAVE_SYS_FILE_H +#include <sys/file.h> +#endif + +/* sys/uio.h */ +#ifdef HAVE_SYS_UIO_H +#include <sys/uio.h> +#endif + +/* sys/mman */ +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + + +/* timedb */ +#ifdef HAVE_SYS_TIMEB_H +#include <sys/timeb.h> +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +/* strings */ +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +/* fcntl */ +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + + +#ifdef HAVE_DIRENT_H +#include <dirent.h> +#endif + +#include <signal.h> + +#ifdef HAVE_LIBGEN_H +#include <libgen.h> +#endif + +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> +#endif + +#endif diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c new file mode 100644 index 0000000..f536a2c --- /dev/null +++ b/src/libutil/upstream.c @@ -0,0 +1,1761 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "upstream.h" +#include "ottery.h" +#include "ref.h" +#include "cfg_file.h" +#include "rdns.h" +#include "cryptobox.h" +#include "utlist.h" +#include "contrib/libev/ev.h" +#include "logger.h" +#include "contrib/librdns/rdns.h" +#include "contrib/mumhash/mum.h" + +#include <math.h> + + +struct upstream_inet_addr_entry { + rspamd_inet_addr_t *addr; + guint priority; + struct upstream_inet_addr_entry *next; +}; + +struct upstream_addr_elt { + rspamd_inet_addr_t *addr; + guint priority; + guint errors; +}; + +struct upstream_list_watcher { + rspamd_upstream_watch_func func; + GFreeFunc dtor; + gpointer ud; + enum rspamd_upstreams_watch_event events_mask; + struct upstream_list_watcher *next, *prev; +}; + +struct upstream { + guint weight; + guint cur_weight; + guint errors; + guint checked; + guint dns_requests; + gint active_idx; + guint ttl; + gchar *name; + ev_timer ev; + gdouble last_fail; + gdouble last_resolve; + gpointer ud; + enum rspamd_upstream_flag flags; + struct upstream_list *ls; + GList *ctx_pos; + struct upstream_ctx *ctx; + + struct { + GPtrArray *addr; /* struct upstream_addr_elt */ + guint cur; + } addrs; + + struct upstream_inet_addr_entry *new_addrs; + gpointer data; + gchar uid[8]; + ref_entry_t ref; +#ifdef UPSTREAMS_THREAD_SAFE + rspamd_mutex_t *lock; +#endif +}; + +struct upstream_limits { + gdouble revive_time; + gdouble revive_jitter; + gdouble error_time; + gdouble dns_timeout; + gdouble lazy_resolve_time; + guint max_errors; + guint dns_retransmits; +}; + +struct upstream_list { + gchar *ups_line; + struct upstream_ctx *ctx; + GPtrArray *ups; + GPtrArray *alive; + struct upstream_list_watcher *watchers; + guint64 hash_seed; + const struct upstream_limits *limits; + enum rspamd_upstream_flag flags; + guint cur_elt; + enum rspamd_upstream_rotation rot_alg; +#ifdef UPSTREAMS_THREAD_SAFE + rspamd_mutex_t *lock; +#endif +}; + +struct upstream_ctx { + struct rdns_resolver *res; + struct ev_loop *event_loop; + struct upstream_limits limits; + GQueue *upstreams; + gboolean configured; + rspamd_mempool_t *pool; + ref_entry_t ref; +}; + +#ifndef UPSTREAMS_THREAD_SAFE +#define RSPAMD_UPSTREAM_LOCK(x) \ + do { \ + } while (0) +#define RSPAMD_UPSTREAM_UNLOCK(x) \ + do { \ + } while (0) +#else +#define RSPAMD_UPSTREAM_LOCK(x) rspamd_mutex_lock(x->lock) +#define RSPAMD_UPSTREAM_UNLOCK(x) rspamd_mutex_unlock(x->lock) +#endif + +#define msg_debug_upstream(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_upstream_log_id, "upstream", upstream->uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_upstream(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "upstream", upstream->uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_err_upstream(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "upstream", upstream->uid, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(upstream) + +/* 4 errors in 10 seconds */ +#define DEFAULT_MAX_ERRORS 4 +static const guint default_max_errors = DEFAULT_MAX_ERRORS; +#define DEFAULT_REVIVE_TIME 60 +static const gdouble default_revive_time = DEFAULT_REVIVE_TIME; +#define DEFAULT_REVIVE_JITTER 0.4 +static const gdouble default_revive_jitter = DEFAULT_REVIVE_JITTER; +#define DEFAULT_ERROR_TIME 10 +static const gdouble default_error_time = DEFAULT_ERROR_TIME; +#define DEFAULT_DNS_TIMEOUT 1.0 +static const gdouble default_dns_timeout = DEFAULT_DNS_TIMEOUT; +#define DEFAULT_DNS_RETRANSMITS 2 +static const guint default_dns_retransmits = DEFAULT_DNS_RETRANSMITS; +/* TODO: make it configurable */ +#define DEFAULT_LAZY_RESOLVE_TIME 3600.0 +static const gdouble default_lazy_resolve_time = DEFAULT_LAZY_RESOLVE_TIME; + +static const struct upstream_limits default_limits = { + .revive_time = DEFAULT_REVIVE_TIME, + .revive_jitter = DEFAULT_REVIVE_JITTER, + .error_time = DEFAULT_ERROR_TIME, + .dns_timeout = DEFAULT_DNS_TIMEOUT, + .dns_retransmits = DEFAULT_DNS_RETRANSMITS, + .max_errors = DEFAULT_MAX_ERRORS, + .lazy_resolve_time = DEFAULT_LAZY_RESOLVE_TIME, +}; + +static void rspamd_upstream_lazy_resolve_cb(struct ev_loop *, ev_timer *, int); + +void rspamd_upstreams_library_config(struct rspamd_config *cfg, + struct upstream_ctx *ctx, + struct ev_loop *event_loop, + struct rdns_resolver *resolver) +{ + g_assert(ctx != NULL); + g_assert(cfg != NULL); + + if (cfg->upstream_error_time) { + ctx->limits.error_time = cfg->upstream_error_time; + } + if (cfg->upstream_max_errors) { + ctx->limits.max_errors = cfg->upstream_max_errors; + } + if (cfg->upstream_revive_time) { + ctx->limits.revive_time = cfg->upstream_revive_time; + } + if (cfg->upstream_lazy_resolve_time) { + ctx->limits.lazy_resolve_time = cfg->upstream_lazy_resolve_time; + } + if (cfg->dns_retransmits) { + ctx->limits.dns_retransmits = cfg->dns_retransmits; + } + if (cfg->dns_timeout) { + ctx->limits.dns_timeout = cfg->dns_timeout; + } + + ctx->event_loop = event_loop; + ctx->res = resolver; + ctx->configured = TRUE; + + /* Start lazy resolving */ + if (event_loop && resolver) { + GList *cur; + struct upstream *upstream; + + cur = ctx->upstreams->head; + + while (cur) { + upstream = cur->data; + if (!ev_can_stop(&upstream->ev) && upstream->ls && + !(upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE)) { + gdouble when; + + if (upstream->flags & RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE) { + /* Resolve them immediately ! */ + when = 0.0; + } + else { + when = rspamd_time_jitter(upstream->ls->limits->lazy_resolve_time, + upstream->ls->limits->lazy_resolve_time * .1); + } + + ev_timer_init(&upstream->ev, rspamd_upstream_lazy_resolve_cb, + when, 0); + upstream->ev.data = upstream; + ev_timer_start(ctx->event_loop, &upstream->ev); + } + + cur = g_list_next(cur); + } + } +} + +static void +rspamd_upstream_ctx_dtor(struct upstream_ctx *ctx) +{ + GList *cur; + struct upstream *u; + + cur = ctx->upstreams->head; + + while (cur) { + u = cur->data; + u->ctx = NULL; + u->ctx_pos = NULL; + cur = g_list_next(cur); + } + + g_queue_free(ctx->upstreams); + rspamd_mempool_delete(ctx->pool); + g_free(ctx); +} + +void rspamd_upstreams_library_unref(struct upstream_ctx *ctx) +{ + REF_RELEASE(ctx); +} + +struct upstream_ctx * +rspamd_upstreams_library_init(void) +{ + struct upstream_ctx *ctx; + + ctx = g_malloc0(sizeof(*ctx)); + memcpy(&ctx->limits, &default_limits, sizeof(ctx->limits)); + ctx->pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "upstreams", 0); + + ctx->upstreams = g_queue_new(); + REF_INIT_RETAIN(ctx, rspamd_upstream_ctx_dtor); + + return ctx; +} + +static gint +rspamd_upstream_af_to_weight(const rspamd_inet_addr_t *addr) +{ + int ret; + + switch (rspamd_inet_address_get_af(addr)) { + case AF_UNIX: + ret = 2; + break; + case AF_INET: + ret = 1; + break; + default: + ret = 0; + break; + } + + return ret; +} + +/* + * Select IPv4 addresses before IPv6 + */ +static gint +rspamd_upstream_addr_sort_func(gconstpointer a, gconstpointer b) +{ + const struct upstream_addr_elt *ip1 = *(const struct upstream_addr_elt **) a, + *ip2 = *(const struct upstream_addr_elt **) b; + gint w1, w2; + + if (ip1->priority == 0 && ip2->priority == 0) { + w1 = rspamd_upstream_af_to_weight(ip1->addr); + w2 = rspamd_upstream_af_to_weight(ip2->addr); + } + else { + w1 = ip1->priority; + w2 = ip2->priority; + } + + /* Inverse order */ + return w2 - w1; +} + +static void +rspamd_upstream_set_active(struct upstream_list *ls, struct upstream *upstream) +{ + RSPAMD_UPSTREAM_LOCK(ls); + g_ptr_array_add(ls->alive, upstream); + upstream->active_idx = ls->alive->len - 1; + + if (upstream->ctx && upstream->ctx->configured && + !(upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE)) { + + if (ev_can_stop(&upstream->ev)) { + ev_timer_stop(upstream->ctx->event_loop, &upstream->ev); + } + + /* Start lazy (or not so lazy) names resolution */ + gdouble when; + + if (upstream->flags & RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE) { + /* Resolve them immediately ! */ + when = 0.0; + } + else { + when = rspamd_time_jitter(upstream->ls->limits->lazy_resolve_time, + upstream->ls->limits->lazy_resolve_time * .1); + } + ev_timer_init(&upstream->ev, rspamd_upstream_lazy_resolve_cb, + when, 0); + upstream->ev.data = upstream; + msg_debug_upstream("start lazy resolving for %s in %.0f seconds", + upstream->name, when); + ev_timer_start(upstream->ctx->event_loop, &upstream->ev); + } + + RSPAMD_UPSTREAM_UNLOCK(ls); +} + +static void +rspamd_upstream_addr_elt_dtor(gpointer a) +{ + struct upstream_addr_elt *elt = a; + + if (elt) { + rspamd_inet_address_free(elt->addr); + g_free(elt); + } +} + +static void +rspamd_upstream_update_addrs(struct upstream *upstream) +{ + guint addr_cnt, i, port; + gboolean seen_addr, reset_errors = FALSE; + struct upstream_inet_addr_entry *cur, *tmp; + GPtrArray *new_addrs; + struct upstream_addr_elt *addr_elt, *naddr; + + /* + * We need first of all get the saved port, since DNS gives us no + * idea about what port has been used previously + */ + RSPAMD_UPSTREAM_LOCK(upstream); + + if (upstream->addrs.addr->len > 0 && upstream->new_addrs) { + addr_elt = g_ptr_array_index(upstream->addrs.addr, 0); + port = rspamd_inet_address_get_port(addr_elt->addr); + + /* Now calculate new addrs count */ + addr_cnt = 0; + LL_FOREACH(upstream->new_addrs, cur) + { + addr_cnt++; + } + + /* At 10% probability reset errors on addr elements */ + if (rspamd_random_double_fast() > 0.9) { + reset_errors = TRUE; + msg_debug_upstream("reset errors on upstream %s", + upstream->name); + } + + new_addrs = g_ptr_array_new_full(addr_cnt, rspamd_upstream_addr_elt_dtor); + + /* Copy addrs back */ + LL_FOREACH(upstream->new_addrs, cur) + { + seen_addr = FALSE; + naddr = NULL; + /* Ports are problematic, set to compare in the next block */ + rspamd_inet_address_set_port(cur->addr, port); + + PTR_ARRAY_FOREACH(upstream->addrs.addr, i, addr_elt) + { + if (rspamd_inet_address_compare(addr_elt->addr, cur->addr, FALSE) == 0) { + naddr = g_malloc0(sizeof(*naddr)); + naddr->addr = cur->addr; + naddr->errors = reset_errors ? 0 : addr_elt->errors; + seen_addr = TRUE; + + break; + } + } + + if (!seen_addr) { + naddr = g_malloc0(sizeof(*naddr)); + naddr->addr = cur->addr; + naddr->errors = 0; + msg_debug_upstream("new address for %s: %s", + upstream->name, + rspamd_inet_address_to_string_pretty(naddr->addr)); + } + else { + msg_debug_upstream("existing address for %s: %s", + upstream->name, + rspamd_inet_address_to_string_pretty(cur->addr)); + } + + g_ptr_array_add(new_addrs, naddr); + } + + /* Free old addresses */ + g_ptr_array_free(upstream->addrs.addr, TRUE); + + upstream->addrs.cur = 0; + upstream->addrs.addr = new_addrs; + g_ptr_array_sort(upstream->addrs.addr, rspamd_upstream_addr_sort_func); + } + + LL_FOREACH_SAFE(upstream->new_addrs, cur, tmp) + { + /* Do not free inet address pointer since it has been transferred to up */ + g_free(cur); + } + + upstream->new_addrs = NULL; + RSPAMD_UPSTREAM_UNLOCK(upstream); +} + +static void +rspamd_upstream_dns_cb(struct rdns_reply *reply, void *arg) +{ + struct upstream *up = (struct upstream *) arg; + struct rdns_reply_entry *entry; + struct upstream_inet_addr_entry *up_ent; + + if (reply->code == RDNS_RC_NOERROR) { + entry = reply->entries; + + RSPAMD_UPSTREAM_LOCK(up); + while (entry) { + + if (entry->type == RDNS_REQUEST_A) { + up_ent = g_malloc0(sizeof(*up_ent)); + up_ent->addr = rspamd_inet_address_new(AF_INET, + &entry->content.a.addr); + LL_PREPEND(up->new_addrs, up_ent); + } + else if (entry->type == RDNS_REQUEST_AAAA) { + up_ent = g_malloc0(sizeof(*up_ent)); + up_ent->addr = rspamd_inet_address_new(AF_INET6, + &entry->content.aaa.addr); + LL_PREPEND(up->new_addrs, up_ent); + } + entry = entry->next; + } + + RSPAMD_UPSTREAM_UNLOCK(up); + } + + up->dns_requests--; + + if (up->dns_requests == 0) { + rspamd_upstream_update_addrs(up); + } + + REF_RELEASE(up); +} + +struct rspamd_upstream_srv_dns_cb { + struct upstream *up; + guint priority; + guint port; + guint requests_inflight; +}; + +/* Used when we have resolved SRV record and resolved addrs */ +static void +rspamd_upstream_dns_srv_phase2_cb(struct rdns_reply *reply, void *arg) +{ + struct rspamd_upstream_srv_dns_cb *cbdata = + (struct rspamd_upstream_srv_dns_cb *) arg; + struct upstream *up; + struct rdns_reply_entry *entry; + struct upstream_inet_addr_entry *up_ent; + + up = cbdata->up; + + if (reply->code == RDNS_RC_NOERROR) { + entry = reply->entries; + + RSPAMD_UPSTREAM_LOCK(up); + while (entry) { + + if (entry->type == RDNS_REQUEST_A) { + up_ent = g_malloc0(sizeof(*up_ent)); + up_ent->addr = rspamd_inet_address_new(AF_INET, + &entry->content.a.addr); + up_ent->priority = cbdata->priority; + rspamd_inet_address_set_port(up_ent->addr, cbdata->port); + LL_PREPEND(up->new_addrs, up_ent); + } + else if (entry->type == RDNS_REQUEST_AAAA) { + up_ent = g_malloc0(sizeof(*up_ent)); + up_ent->addr = rspamd_inet_address_new(AF_INET6, + &entry->content.aaa.addr); + up_ent->priority = cbdata->priority; + rspamd_inet_address_set_port(up_ent->addr, cbdata->port); + LL_PREPEND(up->new_addrs, up_ent); + } + entry = entry->next; + } + + RSPAMD_UPSTREAM_UNLOCK(up); + } + + up->dns_requests--; + cbdata->requests_inflight--; + + if (cbdata->requests_inflight == 0) { + g_free(cbdata); + } + + if (up->dns_requests == 0) { + rspamd_upstream_update_addrs(up); + } + + REF_RELEASE(up); +} + +static void +rspamd_upstream_dns_srv_cb(struct rdns_reply *reply, void *arg) +{ + struct upstream *upstream = (struct upstream *) arg; + struct rdns_reply_entry *entry; + struct rspamd_upstream_srv_dns_cb *ncbdata; + + if (reply->code == RDNS_RC_NOERROR) { + entry = reply->entries; + + RSPAMD_UPSTREAM_LOCK(upstream); + while (entry) { + /* XXX: we ignore weight as it contradicts with upstreams logic */ + if (entry->type == RDNS_REQUEST_SRV) { + msg_debug_upstream("got srv reply for %s: %s " + "(weight=%d, priority=%d, port=%d)", + upstream->name, entry->content.srv.target, + entry->content.srv.weight, entry->content.srv.priority, + entry->content.srv.port); + ncbdata = g_malloc0(sizeof(*ncbdata)); + ncbdata->priority = entry->content.srv.weight; + ncbdata->port = entry->content.srv.port; + /* XXX: for all entries? */ + upstream->ttl = entry->ttl; + + if (rdns_make_request_full(upstream->ctx->res, + rspamd_upstream_dns_srv_phase2_cb, ncbdata, + upstream->ls->limits->dns_timeout, + upstream->ls->limits->dns_retransmits, + 1, entry->content.srv.target, RDNS_REQUEST_A) != NULL) { + upstream->dns_requests++; + REF_RETAIN(upstream); + ncbdata->requests_inflight++; + } + + if (rdns_make_request_full(upstream->ctx->res, + rspamd_upstream_dns_srv_phase2_cb, ncbdata, + upstream->ls->limits->dns_timeout, + upstream->ls->limits->dns_retransmits, + 1, entry->content.srv.target, RDNS_REQUEST_AAAA) != NULL) { + upstream->dns_requests++; + REF_RETAIN(upstream); + ncbdata->requests_inflight++; + } + + if (ncbdata->requests_inflight == 0) { + g_free(ncbdata); + } + } + entry = entry->next; + } + + RSPAMD_UPSTREAM_UNLOCK(upstream); + } + + upstream->dns_requests--; + REF_RELEASE(upstream); +} + +static void +rspamd_upstream_revive_cb(struct ev_loop *loop, ev_timer *w, int revents) +{ + struct upstream *upstream = (struct upstream *) w->data; + + RSPAMD_UPSTREAM_LOCK(upstream); + ev_timer_stop(loop, w); + + msg_debug_upstream("revive upstream %s", upstream->name); + + if (upstream->ls) { + rspamd_upstream_set_active(upstream->ls, upstream); + } + + RSPAMD_UPSTREAM_UNLOCK(upstream); + g_assert(upstream->ref.refcount > 1); + REF_RELEASE(upstream); +} + +static void +rspamd_upstream_resolve_addrs(const struct upstream_list *ls, + struct upstream *upstream) +{ + /* XXX: maybe make it configurable */ + static const gdouble min_resolve_interval = 60.0; + + if (upstream->ctx->res != NULL && + upstream->ctx->configured && + upstream->dns_requests == 0 && + !(upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE)) { + + gdouble now = ev_now(upstream->ctx->event_loop); + + if (now - upstream->last_resolve < min_resolve_interval) { + msg_info_upstream("do not resolve upstream %s as it was checked %.0f " + "seconds ago (%.0f is minimum)", + upstream->name, now - upstream->last_resolve, + min_resolve_interval); + + return; + } + + /* Resolve name of the upstream one more time */ + if (upstream->name[0] != '/') { + upstream->last_resolve = now; + + /* + * If upstream name has a port, then we definitely need to resolve + * merely host part! + */ + char dns_name[253 + 1]; /* 253 == max dns name + \0 */ + const char *semicolon_pos = strchr(upstream->name, ':'); + + if (semicolon_pos != NULL && semicolon_pos > upstream->name) { + if (sizeof(dns_name) > semicolon_pos - upstream->name) { + rspamd_strlcpy(dns_name, upstream->name, + semicolon_pos - upstream->name + 1); + } + else { + /* XXX: truncated */ + msg_err_upstream("internal error: upstream name is larger than" + "max DNS name: %s", + upstream->name); + rspamd_strlcpy(dns_name, upstream->name, sizeof(dns_name)); + } + } + else { + rspamd_strlcpy(dns_name, upstream->name, sizeof(dns_name)); + } + + if (upstream->flags & RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE) { + if (rdns_make_request_full(upstream->ctx->res, + rspamd_upstream_dns_srv_cb, upstream, + ls->limits->dns_timeout, ls->limits->dns_retransmits, + 1, dns_name, RDNS_REQUEST_SRV) != NULL) { + upstream->dns_requests++; + REF_RETAIN(upstream); + } + } + else { + if (rdns_make_request_full(upstream->ctx->res, + rspamd_upstream_dns_cb, upstream, + ls->limits->dns_timeout, ls->limits->dns_retransmits, + 1, dns_name, RDNS_REQUEST_A) != NULL) { + upstream->dns_requests++; + REF_RETAIN(upstream); + } + + if (rdns_make_request_full(upstream->ctx->res, + rspamd_upstream_dns_cb, upstream, + ls->limits->dns_timeout, ls->limits->dns_retransmits, + 1, dns_name, RDNS_REQUEST_AAAA) != NULL) { + upstream->dns_requests++; + REF_RETAIN(upstream); + } + } + } + } + else if (upstream->dns_requests != 0) { + msg_info_upstream("do not resolve upstream %s as another request for " + "resolving has been already issued", + upstream->name); + } +} + +static void +rspamd_upstream_lazy_resolve_cb(struct ev_loop *loop, ev_timer *w, int revents) +{ + struct upstream *up = (struct upstream *) w->data; + + RSPAMD_UPSTREAM_LOCK(up); + ev_timer_stop(loop, w); + + if (up->ls) { + rspamd_upstream_resolve_addrs(up->ls, up); + + if (up->ttl == 0 || up->ttl > up->ls->limits->lazy_resolve_time) { + w->repeat = rspamd_time_jitter(up->ls->limits->lazy_resolve_time, + up->ls->limits->lazy_resolve_time * .1); + } + else { + w->repeat = up->ttl; + } + + ev_timer_again(loop, w); + } + + RSPAMD_UPSTREAM_UNLOCK(up); +} + +static void +rspamd_upstream_set_inactive(struct upstream_list *ls, struct upstream *upstream) +{ + gdouble ntim; + guint i; + struct upstream *cur; + struct upstream_list_watcher *w; + + RSPAMD_UPSTREAM_LOCK(ls); + g_ptr_array_remove_index(ls->alive, upstream->active_idx); + upstream->active_idx = -1; + + /* We need to update all indices */ + for (i = 0; i < ls->alive->len; i++) { + cur = g_ptr_array_index(ls->alive, i); + cur->active_idx = i; + } + + if (upstream->ctx) { + rspamd_upstream_resolve_addrs(ls, upstream); + + REF_RETAIN(upstream); + ntim = rspamd_time_jitter(ls->limits->revive_time, + ls->limits->revive_time * ls->limits->revive_jitter); + + if (ev_can_stop(&upstream->ev)) { + ev_timer_stop(upstream->ctx->event_loop, &upstream->ev); + } + + msg_debug_upstream("mark upstream %s inactive; revive in %.0f seconds", + upstream->name, ntim); + ev_timer_init(&upstream->ev, rspamd_upstream_revive_cb, ntim, 0); + upstream->ev.data = upstream; + + if (upstream->ctx->event_loop != NULL && upstream->ctx->configured) { + ev_timer_start(upstream->ctx->event_loop, &upstream->ev); + } + } + + DL_FOREACH(upstream->ls->watchers, w) + { + if (w->events_mask & RSPAMD_UPSTREAM_WATCH_OFFLINE) { + w->func(upstream, RSPAMD_UPSTREAM_WATCH_OFFLINE, upstream->errors, w->ud); + } + } + + RSPAMD_UPSTREAM_UNLOCK(ls); +} + +void rspamd_upstream_fail(struct upstream *upstream, + gboolean addr_failure, + const gchar *reason) +{ + gdouble error_rate = 0, max_error_rate = 0; + gdouble sec_last, sec_cur; + struct upstream_addr_elt *addr_elt; + struct upstream_list_watcher *w; + + msg_debug_upstream("upstream %s failed; reason: %s", + upstream->name, + reason); + + if (upstream->ctx && upstream->active_idx != -1 && upstream->ls) { + sec_cur = rspamd_get_ticks(FALSE); + + RSPAMD_UPSTREAM_LOCK(upstream); + if (upstream->errors == 0) { + /* We have the first error */ + upstream->last_fail = sec_cur; + upstream->errors = 1; + + if (upstream->ls && upstream->dns_requests == 0) { + /* Try to re-resolve address immediately */ + rspamd_upstream_resolve_addrs(upstream->ls, upstream); + } + + DL_FOREACH(upstream->ls->watchers, w) + { + if (w->events_mask & RSPAMD_UPSTREAM_WATCH_FAILURE) { + w->func(upstream, RSPAMD_UPSTREAM_WATCH_FAILURE, 1, w->ud); + } + } + } + else { + sec_last = upstream->last_fail; + + if (sec_cur >= sec_last) { + upstream->errors++; + + + DL_FOREACH(upstream->ls->watchers, w) + { + if (w->events_mask & RSPAMD_UPSTREAM_WATCH_FAILURE) { + w->func(upstream, RSPAMD_UPSTREAM_WATCH_FAILURE, + upstream->errors, w->ud); + } + } + + if (sec_cur - sec_last >= upstream->ls->limits->error_time) { + error_rate = ((gdouble) upstream->errors) / (sec_cur - sec_last); + max_error_rate = ((gdouble) upstream->ls->limits->max_errors) / + upstream->ls->limits->error_time; + } + + if (error_rate > max_error_rate) { + /* Remove upstream from the active list */ + if (upstream->ls->ups->len > 1) { + msg_debug_upstream("mark upstream %s inactive; " + "reason: %s; %.2f " + "error rate (%d errors), " + "%.2f max error rate, " + "%.1f first error time, " + "%.1f current ts, " + "%d upstreams left", + upstream->name, + reason, + error_rate, + upstream->errors, + max_error_rate, + sec_last, + sec_cur, + upstream->ls->alive->len - 1); + rspamd_upstream_set_inactive(upstream->ls, upstream); + upstream->errors = 0; + } + else { + msg_debug_upstream("cannot mark last alive upstream %s " + "inactive; reason: %s; %.2f " + "error rate (%d errors), " + "%.2f max error rate, " + "%.1f first error time, " + "%.1f current ts", + upstream->name, + reason, + error_rate, + upstream->errors, + max_error_rate, + sec_last, + sec_cur); + /* Just re-resolve addresses */ + if (sec_cur - sec_last > upstream->ls->limits->revive_time) { + upstream->errors = 0; + rspamd_upstream_resolve_addrs(upstream->ls, upstream); + } + } + } + else if (sec_cur - sec_last >= upstream->ls->limits->error_time) { + /* Forget the whole interval */ + upstream->last_fail = sec_cur; + upstream->errors = 1; + } + } + } + + if (addr_failure) { + /* Also increase count of errors for this specific address */ + if (upstream->addrs.addr) { + addr_elt = g_ptr_array_index(upstream->addrs.addr, + upstream->addrs.cur); + addr_elt->errors++; + } + } + + RSPAMD_UPSTREAM_UNLOCK(upstream); + } +} + +void rspamd_upstream_ok(struct upstream *upstream) +{ + struct upstream_addr_elt *addr_elt; + struct upstream_list_watcher *w; + + RSPAMD_UPSTREAM_LOCK(upstream); + if (upstream->errors > 0 && upstream->active_idx != -1 && upstream->ls) { + /* We touch upstream if and only if it is active */ + msg_debug_upstream("reset errors on upstream %s (was %ud)", upstream->name, upstream->errors); + upstream->errors = 0; + + if (upstream->addrs.addr) { + addr_elt = g_ptr_array_index(upstream->addrs.addr, upstream->addrs.cur); + addr_elt->errors = 0; + } + + DL_FOREACH(upstream->ls->watchers, w) + { + if (w->events_mask & RSPAMD_UPSTREAM_WATCH_SUCCESS) { + w->func(upstream, RSPAMD_UPSTREAM_WATCH_SUCCESS, 0, w->ud); + } + } + } + + RSPAMD_UPSTREAM_UNLOCK(upstream); +} + +void rspamd_upstream_set_weight(struct upstream *up, guint weight) +{ + RSPAMD_UPSTREAM_LOCK(up); + up->weight = weight; + RSPAMD_UPSTREAM_UNLOCK(up); +} + +#define SEED_CONSTANT 0xa574de7df64e9b9dULL + +struct upstream_list * +rspamd_upstreams_create(struct upstream_ctx *ctx) +{ + struct upstream_list *ls; + + ls = g_malloc0(sizeof(*ls)); + ls->hash_seed = SEED_CONSTANT; + ls->ups = g_ptr_array_new(); + ls->alive = g_ptr_array_new(); + +#ifdef UPSTREAMS_THREAD_SAFE + ls->lock = rspamd_mutex_new(); +#endif + ls->cur_elt = 0; + ls->ctx = ctx; + ls->rot_alg = RSPAMD_UPSTREAM_UNDEF; + + if (ctx) { + ls->limits = &ctx->limits; + } + else { + ls->limits = &default_limits; + } + + return ls; +} + +gsize rspamd_upstreams_count(struct upstream_list *ups) +{ + return ups != NULL ? ups->ups->len : 0; +} + +gsize rspamd_upstreams_alive(struct upstream_list *ups) +{ + return ups != NULL ? ups->alive->len : 0; +} + +static void +rspamd_upstream_dtor(struct upstream *up) +{ + struct upstream_inet_addr_entry *cur, *tmp; + + if (up->new_addrs) { + LL_FOREACH_SAFE(up->new_addrs, cur, tmp) + { + /* Here we need to free pointer as well */ + rspamd_inet_address_free(cur->addr); + g_free(cur); + } + } + + if (up->addrs.addr) { + g_ptr_array_free(up->addrs.addr, TRUE); + } + +#ifdef UPSTREAMS_THREAD_SAFE + rspamd_mutex_free(up->lock); +#endif + + if (up->ctx) { + + if (ev_can_stop(&up->ev)) { + ev_timer_stop(up->ctx->event_loop, &up->ev); + } + + g_queue_delete_link(up->ctx->upstreams, up->ctx_pos); + REF_RELEASE(up->ctx); + } + + g_free(up); +} + +rspamd_inet_addr_t * +rspamd_upstream_addr_next(struct upstream *up) +{ + guint idx, next_idx; + struct upstream_addr_elt *e1, *e2; + + do { + idx = up->addrs.cur; + next_idx = (idx + 1) % up->addrs.addr->len; + e1 = g_ptr_array_index(up->addrs.addr, idx); + e2 = g_ptr_array_index(up->addrs.addr, next_idx); + up->addrs.cur = next_idx; + } while (e2->errors > e1->errors); + + return e2->addr; +} + +rspamd_inet_addr_t * +rspamd_upstream_addr_cur(const struct upstream *up) +{ + struct upstream_addr_elt *elt; + + elt = g_ptr_array_index(up->addrs.addr, up->addrs.cur); + + return elt->addr; +} + +const gchar * +rspamd_upstream_name(struct upstream *up) +{ + return up->name; +} + +gint rspamd_upstream_port(struct upstream *up) +{ + struct upstream_addr_elt *elt; + + elt = g_ptr_array_index(up->addrs.addr, up->addrs.cur); + return rspamd_inet_address_get_port(elt->addr); +} + +gboolean +rspamd_upstreams_add_upstream(struct upstream_list *ups, const gchar *str, + guint16 def_port, enum rspamd_upstream_parse_type parse_type, + void *data) +{ + struct upstream *upstream; + GPtrArray *addrs = NULL; + guint i, slen; + rspamd_inet_addr_t *addr; + enum rspamd_parse_host_port_result ret = RSPAMD_PARSE_ADDR_FAIL; + + upstream = g_malloc0(sizeof(*upstream)); + slen = strlen(str); + + switch (parse_type) { + case RSPAMD_UPSTREAM_PARSE_DEFAULT: + if (slen > sizeof("service=") && + RSPAMD_LEN_CHECK_STARTS_WITH(str, slen, "service=")) { + const gchar *plus_pos, *service_pos, *semicolon_pos; + + /* Accept service=srv_name+hostname[:priority] */ + service_pos = str + sizeof("service=") - 1; + plus_pos = strchr(service_pos, '+'); + + if (plus_pos != NULL) { + semicolon_pos = strchr(plus_pos + 1, ':'); + + if (semicolon_pos) { + upstream->weight = strtoul(semicolon_pos + 1, NULL, 10); + } + else { + semicolon_pos = plus_pos + strlen(plus_pos); + } + + /* + * Now our name is _service._tcp.<domain> + * where <domain> is string between semicolon_pos and plus_pos +1 + * while service is a string between service_pos and plus_pos + */ + guint namelen = (semicolon_pos - (plus_pos + 1)) + + (plus_pos - service_pos) + + (sizeof("tcp") - 1) + + 4; + addrs = g_ptr_array_sized_new(1); + upstream->name = ups->ctx ? rspamd_mempool_alloc(ups->ctx->pool, namelen + 1) : g_malloc(namelen + 1); + + rspamd_snprintf(upstream->name, namelen + 1, + "_%*s._tcp.%*s", + (gint) (plus_pos - service_pos), service_pos, + (gint) (semicolon_pos - (plus_pos + 1)), plus_pos + 1); + upstream->flags |= RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE; + ret = RSPAMD_PARSE_ADDR_RESOLVED; + } + } + else { + ret = rspamd_parse_host_port_priority(str, &addrs, + &upstream->weight, + &upstream->name, def_port, + FALSE, + ups->ctx ? ups->ctx->pool : NULL); + } + break; + case RSPAMD_UPSTREAM_PARSE_NAMESERVER: + addrs = g_ptr_array_sized_new(1); + if (rspamd_parse_inet_address(&addr, str, strlen(str), + RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + if (ups->ctx) { + upstream->name = rspamd_mempool_strdup(ups->ctx->pool, str); + } + else { + upstream->name = g_strdup(str); + } + if (rspamd_inet_address_get_port(addr) == 0) { + rspamd_inet_address_set_port(addr, def_port); + } + + g_ptr_array_add(addrs, addr); + ret = RSPAMD_PARSE_ADDR_NUMERIC; + + if (ups->ctx) { + rspamd_mempool_add_destructor(ups->ctx->pool, + (rspamd_mempool_destruct_t) rspamd_inet_address_free, + addr); + rspamd_mempool_add_destructor(ups->ctx->pool, + (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard, + addrs); + } + } + else { + g_ptr_array_free(addrs, TRUE); + } + + break; + } + + if (ret == RSPAMD_PARSE_ADDR_FAIL) { + g_free(upstream); + return FALSE; + } + else { + upstream->flags |= ups->flags; + + if (ret == RSPAMD_PARSE_ADDR_NUMERIC) { + /* Add noresolve flag */ + upstream->flags |= RSPAMD_UPSTREAM_FLAG_NORESOLVE; + } + for (i = 0; i < addrs->len; i++) { + addr = g_ptr_array_index(addrs, i); + rspamd_upstream_add_addr(upstream, rspamd_inet_address_copy(addr, NULL)); + } + } + + if (upstream->weight == 0 && ups->rot_alg == RSPAMD_UPSTREAM_MASTER_SLAVE) { + /* Special heuristic for master-slave rotation */ + if (ups->ups->len == 0) { + /* Prioritize the first */ + upstream->weight = 1; + } + } + + g_ptr_array_add(ups->ups, upstream); + upstream->ud = data; + upstream->cur_weight = upstream->weight; + upstream->ls = ups; + REF_INIT_RETAIN(upstream, rspamd_upstream_dtor); +#ifdef UPSTREAMS_THREAD_SAFE + upstream->lock = rspamd_mutex_new(); +#endif + upstream->ctx = ups->ctx; + + if (upstream->ctx) { + REF_RETAIN(ups->ctx); + g_queue_push_tail(ups->ctx->upstreams, upstream); + upstream->ctx_pos = g_queue_peek_tail_link(ups->ctx->upstreams); + } + + guint h = rspamd_cryptobox_fast_hash(upstream->name, + strlen(upstream->name), 0); + memset(upstream->uid, 0, sizeof(upstream->uid)); + rspamd_encode_base32_buf((const guchar *) &h, sizeof(h), + upstream->uid, sizeof(upstream->uid) - 1, RSPAMD_BASE32_DEFAULT); + + msg_debug_upstream("added upstream %s (%s)", upstream->name, + upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE ? "numeric ip" : "DNS name"); + g_ptr_array_sort(upstream->addrs.addr, rspamd_upstream_addr_sort_func); + rspamd_upstream_set_active(ups, upstream); + + return TRUE; +} + +void rspamd_upstreams_set_flags(struct upstream_list *ups, + enum rspamd_upstream_flag flags) +{ + ups->flags = flags; +} + +void rspamd_upstreams_set_rotation(struct upstream_list *ups, + enum rspamd_upstream_rotation rot) +{ + ups->rot_alg = rot; +} + +gboolean +rspamd_upstream_add_addr(struct upstream *up, rspamd_inet_addr_t *addr) +{ + struct upstream_addr_elt *elt; + /* + * XXX: slow and inefficient + */ + if (up->addrs.addr == NULL) { + up->addrs.addr = g_ptr_array_new_full(8, rspamd_upstream_addr_elt_dtor); + } + + elt = g_malloc0(sizeof(*elt)); + elt->addr = addr; + g_ptr_array_add(up->addrs.addr, elt); + g_ptr_array_sort(up->addrs.addr, rspamd_upstream_addr_sort_func); + + return TRUE; +} + +gboolean +rspamd_upstreams_parse_line_len(struct upstream_list *ups, + const gchar *str, gsize len, guint16 def_port, void *data) +{ + const gchar *end = str + len, *p = str; + const gchar *separators = ";, \n\r\t"; + gchar *tmp; + guint span_len; + gboolean ret = FALSE; + + if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "random:")) { + ups->rot_alg = RSPAMD_UPSTREAM_RANDOM; + p += sizeof("random:") - 1; + } + else if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "master-slave:")) { + ups->rot_alg = RSPAMD_UPSTREAM_MASTER_SLAVE; + p += sizeof("master-slave:") - 1; + } + else if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "round-robin:")) { + ups->rot_alg = RSPAMD_UPSTREAM_ROUND_ROBIN; + p += sizeof("round-robin:") - 1; + } + else if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "hash:")) { + ups->rot_alg = RSPAMD_UPSTREAM_HASHED; + p += sizeof("hash:") - 1; + } + + while (p < end) { + span_len = rspamd_memcspn(p, separators, end - p); + + if (span_len > 0) { + tmp = g_malloc(span_len + 1); + rspamd_strlcpy(tmp, p, span_len + 1); + + if (rspamd_upstreams_add_upstream(ups, tmp, def_port, + RSPAMD_UPSTREAM_PARSE_DEFAULT, + data)) { + ret = TRUE; + } + + g_free(tmp); + } + + p += span_len; + /* Skip separators */ + if (p < end) { + p += rspamd_memspn(p, separators, end - p); + } + } + + if (!ups->ups_line) { + ups->ups_line = g_malloc(len + 1); + rspamd_strlcpy(ups->ups_line, str, len + 1); + } + + return ret; +} + + +gboolean +rspamd_upstreams_parse_line(struct upstream_list *ups, + const gchar *str, guint16 def_port, void *data) +{ + return rspamd_upstreams_parse_line_len(ups, str, strlen(str), + def_port, data); +} + +gboolean +rspamd_upstreams_from_ucl(struct upstream_list *ups, + const ucl_object_t *in, guint16 def_port, void *data) +{ + gboolean ret = FALSE; + const ucl_object_t *cur; + ucl_object_iter_t it = NULL; + + it = ucl_object_iterate_new(in); + + while ((cur = ucl_object_iterate_safe(it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_STRING) { + ret = rspamd_upstreams_parse_line(ups, ucl_object_tostring(cur), + def_port, data); + } + } + + ucl_object_iterate_free(it); + + return ret; +} + +void rspamd_upstreams_destroy(struct upstream_list *ups) +{ + guint i; + struct upstream *up; + struct upstream_list_watcher *w, *tmp; + + if (ups != NULL) { + g_ptr_array_free(ups->alive, TRUE); + + for (i = 0; i < ups->ups->len; i++) { + up = g_ptr_array_index(ups->ups, i); + up->ls = NULL; + REF_RELEASE(up); + } + + DL_FOREACH_SAFE(ups->watchers, w, tmp) + { + if (w->dtor) { + w->dtor(w->ud); + } + g_free(w); + } + + g_free(ups->ups_line); + g_ptr_array_free(ups->ups, TRUE); +#ifdef UPSTREAMS_THREAD_SAFE + rspamd_mutex_free(ups->lock); +#endif + g_free(ups); + } +} + +static void +rspamd_upstream_restore_cb(gpointer elt, gpointer ls) +{ + struct upstream *up = (struct upstream *) elt; + struct upstream_list *ups = (struct upstream_list *) ls; + struct upstream_list_watcher *w; + + /* Here the upstreams list is already locked */ + RSPAMD_UPSTREAM_LOCK(up); + + if (ev_can_stop(&up->ev)) { + ev_timer_stop(up->ctx->event_loop, &up->ev); + } + + g_ptr_array_add(ups->alive, up); + up->active_idx = ups->alive->len - 1; + RSPAMD_UPSTREAM_UNLOCK(up); + + DL_FOREACH(up->ls->watchers, w) + { + if (w->events_mask & RSPAMD_UPSTREAM_WATCH_ONLINE) { + w->func(up, RSPAMD_UPSTREAM_WATCH_ONLINE, up->errors, w->ud); + } + } + + /* For revive event */ + g_assert(up->ref.refcount > 1); + REF_RELEASE(up); +} + +static struct upstream * +rspamd_upstream_get_random(struct upstream_list *ups, + struct upstream *except) +{ + for (;;) { + guint idx = ottery_rand_range(ups->alive->len - 1); + struct upstream *up; + + up = g_ptr_array_index(ups->alive, idx); + + if (except && up == except) { + continue; + } + + return up; + } +} + +static struct upstream * +rspamd_upstream_get_round_robin(struct upstream_list *ups, + struct upstream *except, + gboolean use_cur) +{ + guint max_weight = 0, min_checked = G_MAXUINT; + struct upstream *up = NULL, *selected = NULL, *min_checked_sel = NULL; + guint i; + + /* Select upstream with the maximum cur_weight */ + RSPAMD_UPSTREAM_LOCK(ups); + + for (i = 0; i < ups->alive->len; i++) { + up = g_ptr_array_index(ups->alive, i); + + if (except != NULL && up == except) { + continue; + } + + if (use_cur) { + if (up->cur_weight > max_weight) { + selected = up; + max_weight = up->cur_weight; + } + } + else { + if (up->weight > max_weight) { + selected = up; + max_weight = up->weight; + } + } + + /* + * This code is used when all upstreams have zero weight + * The logic is to select least currently used upstream and penalise + * upstream with errors. The error penalty should no be too high + * to avoid sudden traffic drop in this case. + */ + if (up->checked + up->errors * 2 < min_checked) { + min_checked_sel = up; + min_checked = up->checked; + } + } + + if (max_weight == 0) { + /* All upstreams have zero weight */ + if (min_checked > G_MAXUINT / 2) { + /* Reset all checked counters to avoid overflow */ + for (i = 0; i < ups->alive->len; i++) { + up = g_ptr_array_index(ups->alive, i); + up->checked = 0; + } + } + + selected = min_checked_sel; + } + + if (use_cur && selected) { + if (selected->cur_weight > 0) { + selected->cur_weight--; + } + else { + selected->cur_weight = selected->weight; + } + } + + RSPAMD_UPSTREAM_UNLOCK(ups); + + return selected; +} + +/* + * The key idea of this function is obtained from the following paper: + * A Fast, Minimal Memory, Consistent Hash Algorithm + * John Lamping, Eric Veach + * + * http://arxiv.org/abs/1406.2294 + */ +static guint32 +rspamd_consistent_hash(guint64 key, guint32 nbuckets) +{ + gint64 b = -1, j = 0; + + while (j < nbuckets) { + b = j; + key *= 2862933555777941757ULL + 1; + j = (b + 1) * (double) (1ULL << 31) / (double) ((key >> 33) + 1ULL); + } + + return b; +} + +static struct upstream * +rspamd_upstream_get_hashed(struct upstream_list *ups, + struct upstream *except, + const guint8 *key, guint keylen) +{ + guint64 k; + guint32 idx; + static const guint max_tries = 20; + struct upstream *up = NULL; + + /* Generate 64 bits input key */ + k = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64, + key, keylen, ups->hash_seed); + + RSPAMD_UPSTREAM_LOCK(ups); + /* + * Select new upstream from all upstreams + */ + for (guint i = 0; i < max_tries; i++) { + idx = rspamd_consistent_hash(k, ups->ups->len); + up = g_ptr_array_index(ups->ups, idx); + + if (up->active_idx < 0 || (except != NULL && up == except)) { + /* Found inactive or excluded upstream */ + k = mum_hash_step(k, ups->hash_seed); + } + else { + break; + } + } + RSPAMD_UPSTREAM_UNLOCK(ups); + + if (up->active_idx >= 0) { + return up; + } + + /* We failed to find any active upstream */ + up = rspamd_upstream_get_random(ups, except); + msg_info("failed to find hashed upstream for %s, fallback to random: %s", + ups->ups_line, up->name); + + return up; +} + +static struct upstream * +rspamd_upstream_get_common(struct upstream_list *ups, + struct upstream *except, + enum rspamd_upstream_rotation default_type, + const guchar *key, gsize keylen, + gboolean forced) +{ + enum rspamd_upstream_rotation type; + struct upstream *up = NULL; + + RSPAMD_UPSTREAM_LOCK(ups); + if (ups->alive->len == 0) { + /* We have no upstreams alive */ + msg_warn("there are no alive upstreams left for %s, revive all of them", + ups->ups_line); + g_ptr_array_foreach(ups->ups, rspamd_upstream_restore_cb, ups); + } + RSPAMD_UPSTREAM_UNLOCK(ups); + + if (ups->alive->len == 1 && default_type != RSPAMD_UPSTREAM_SEQUENTIAL) { + /* Fast path */ + up = g_ptr_array_index(ups->alive, 0); + goto end; + } + + if (!forced) { + type = ups->rot_alg != RSPAMD_UPSTREAM_UNDEF ? ups->rot_alg : default_type; + } + else { + type = default_type != RSPAMD_UPSTREAM_UNDEF ? default_type : ups->rot_alg; + } + + if (type == RSPAMD_UPSTREAM_HASHED && (keylen == 0 || key == NULL)) { + /* Cannot use hashed rotation when no key is specified, switch to random */ + type = RSPAMD_UPSTREAM_RANDOM; + } + + switch (type) { + default: + case RSPAMD_UPSTREAM_RANDOM: + up = rspamd_upstream_get_random(ups, except); + break; + case RSPAMD_UPSTREAM_HASHED: + up = rspamd_upstream_get_hashed(ups, except, key, keylen); + break; + case RSPAMD_UPSTREAM_ROUND_ROBIN: + up = rspamd_upstream_get_round_robin(ups, except, TRUE); + break; + case RSPAMD_UPSTREAM_MASTER_SLAVE: + up = rspamd_upstream_get_round_robin(ups, except, FALSE); + break; + case RSPAMD_UPSTREAM_SEQUENTIAL: + if (ups->cur_elt >= ups->alive->len) { + ups->cur_elt = 0; + return NULL; + } + + up = g_ptr_array_index(ups->alive, ups->cur_elt++); + break; + } + +end: + if (up) { + up->checked++; + } + + return up; +} + +struct upstream * +rspamd_upstream_get(struct upstream_list *ups, + enum rspamd_upstream_rotation default_type, + const guchar *key, gsize keylen) +{ + return rspamd_upstream_get_common(ups, NULL, default_type, key, keylen, FALSE); +} + +struct upstream * +rspamd_upstream_get_forced(struct upstream_list *ups, + enum rspamd_upstream_rotation forced_type, + const guchar *key, gsize keylen) +{ + return rspamd_upstream_get_common(ups, NULL, forced_type, key, keylen, TRUE); +} + +struct upstream *rspamd_upstream_get_except(struct upstream_list *ups, + struct upstream *except, + enum rspamd_upstream_rotation default_type, + const guchar *key, gsize keylen) +{ + return rspamd_upstream_get_common(ups, except, default_type, key, keylen, FALSE); +} + +void rspamd_upstream_reresolve(struct upstream_ctx *ctx) +{ + GList *cur; + struct upstream *up; + + cur = ctx->upstreams->head; + + while (cur) { + up = cur->data; + REF_RETAIN(up); + rspamd_upstream_resolve_addrs(up->ls, up); + REF_RELEASE(up); + cur = g_list_next(cur); + } +} + +gpointer +rspamd_upstream_set_data(struct upstream *up, gpointer data) +{ + gpointer prev_data = up->data; + up->data = data; + + return prev_data; +} + +gpointer +rspamd_upstream_get_data(struct upstream *up) +{ + return up->data; +} + + +void rspamd_upstreams_foreach(struct upstream_list *ups, + rspamd_upstream_traverse_func cb, void *ud) +{ + struct upstream *up; + guint i; + + for (i = 0; i < ups->ups->len; i++) { + up = g_ptr_array_index(ups->ups, i); + + cb(up, i, ud); + } +} + +void rspamd_upstreams_set_limits(struct upstream_list *ups, + gdouble revive_time, + gdouble revive_jitter, + gdouble error_time, + gdouble dns_timeout, + guint max_errors, + guint dns_retransmits) +{ + struct upstream_limits *nlimits; + g_assert(ups != NULL); + + nlimits = rspamd_mempool_alloc(ups->ctx->pool, sizeof(*nlimits)); + memcpy(nlimits, ups->limits, sizeof(*nlimits)); + + if (!isnan(revive_time)) { + nlimits->revive_time = revive_time; + } + + if (!isnan(revive_jitter)) { + nlimits->revive_jitter = revive_jitter; + } + + if (!isnan(error_time)) { + nlimits->error_time = error_time; + } + + if (!isnan(dns_timeout)) { + nlimits->dns_timeout = dns_timeout; + } + + if (max_errors > 0) { + nlimits->max_errors = max_errors; + } + + if (dns_retransmits > 0) { + nlimits->dns_retransmits = dns_retransmits; + } + + ups->limits = nlimits; +} + +void rspamd_upstreams_add_watch_callback(struct upstream_list *ups, + enum rspamd_upstreams_watch_event events, + rspamd_upstream_watch_func func, + GFreeFunc dtor, + gpointer ud) +{ + struct upstream_list_watcher *nw; + + g_assert((events & RSPAMD_UPSTREAM_WATCH_ALL) != 0); + + nw = g_malloc(sizeof(*nw)); + nw->func = func; + nw->events_mask = events; + nw->ud = ud; + nw->dtor = dtor; + + DL_APPEND(ups->watchers, nw); +} + +struct upstream * +rspamd_upstream_ref(struct upstream *up) +{ + REF_RETAIN(up); + return up; +} + +void rspamd_upstream_unref(struct upstream *up) +{ + REF_RELEASE(up); +} diff --git a/src/libutil/upstream.h b/src/libutil/upstream.h new file mode 100644 index 0000000..22a020c --- /dev/null +++ b/src/libutil/upstream.h @@ -0,0 +1,344 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UPSTREAM_H +#define UPSTREAM_H + +#include "config.h" +#include "util.h" +#include "rdns.h" +#include "ucl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward declaration */ +struct ev_loop; + +enum rspamd_upstream_rotation { + RSPAMD_UPSTREAM_RANDOM = 0, + RSPAMD_UPSTREAM_HASHED, + RSPAMD_UPSTREAM_ROUND_ROBIN, + RSPAMD_UPSTREAM_MASTER_SLAVE, + RSPAMD_UPSTREAM_SEQUENTIAL, + RSPAMD_UPSTREAM_UNDEF +}; + +enum rspamd_upstream_flag { + RSPAMD_UPSTREAM_FLAG_NORESOLVE = (1 << 0), + RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE = (1 << 1), +}; + +struct rspamd_config; +/* Opaque upstream structures */ +struct upstream; +struct upstream_list; +struct upstream_ctx; + +/** + * Init upstreams library + * @param resolver + */ +struct upstream_ctx *rspamd_upstreams_library_init(void); + +/** + * Remove reference from upstreams library + */ +void rspamd_upstreams_library_unref(struct upstream_ctx *ctx); + +/** + * Configure attributes of upstreams library + * @param cfg + */ +void rspamd_upstreams_library_config(struct rspamd_config *cfg, + struct upstream_ctx *ctx, struct ev_loop *event_loop, + struct rdns_resolver *resolver); + +/** + * Upstream error logic + * 1. During error time we count upstream_ok and upstream_fail + * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time + * 3. After dead time we mark upstream as alive and go to the step 1 + * 4. If all upstreams are dead, marks every upstream as alive + */ + +/** + * Add an error to an upstream + */ +void rspamd_upstream_fail(struct upstream *upstream, gboolean addr_failure, const gchar *reason); + +/** + * Increase upstream successes count + */ +void rspamd_upstream_ok(struct upstream *up); + +/** + * Set weight for an upstream + * @param up + */ +void rspamd_upstream_set_weight(struct upstream *up, guint weight); + +/** + * Create new list of upstreams + * @return + */ +struct upstream_list *rspamd_upstreams_create(struct upstream_ctx *ctx); + +/** + * Sets specific flag to the upstream list + * @param ups + * @param flags + */ +void rspamd_upstreams_set_flags(struct upstream_list *ups, + enum rspamd_upstream_flag flags); + +/** + * Sets custom limits for upstreams + * This function allocates memory from the upstreams ctx pool and should + * not be called in cycles/constantly as this memory is likely persistent + * @param ups + * @param revive_time + * @param revive_jitter + * @param error_time + * @param dns_timeout + * @param max_errors + * @param dns_retransmits + */ +void rspamd_upstreams_set_limits(struct upstream_list *ups, + gdouble revive_time, + gdouble revive_jitter, + gdouble error_time, + gdouble dns_timeout, + guint max_errors, + guint dns_retransmits); + +/** + * Sets rotation policy for upstreams list + * @param ups + * @param rot + */ +void rspamd_upstreams_set_rotation(struct upstream_list *ups, + enum rspamd_upstream_rotation rot); + +/** + * Destroy list of upstreams + * @param ups + */ +void rspamd_upstreams_destroy(struct upstream_list *ups); + +/** + * Returns count of upstreams in a list + * @param ups + * @return + */ +gsize rspamd_upstreams_count(struct upstream_list *ups); + +/** + * Returns the number of upstreams in the list + * @param ups + * @return + */ +gsize rspamd_upstreams_alive(struct upstream_list *ups); + +enum rspamd_upstream_parse_type { + RSPAMD_UPSTREAM_PARSE_DEFAULT = 0, + RSPAMD_UPSTREAM_PARSE_NAMESERVER, +}; + +/** + * Add upstream from the string + * @param ups upstream list + * @param str string in format "name[:port[:priority]]" + * @param def_port default port number + * @param data optional userdata + * @return TRUE if upstream has been added + */ +gboolean rspamd_upstreams_add_upstream(struct upstream_list *ups, const gchar *str, + guint16 def_port, enum rspamd_upstream_parse_type parse_type, + void *data); + +/** + * Add multiple upstreams from comma, semicolon or space separated line + * @param ups upstream list + * @param str string in format "(<ups>([<sep>+]<ups>)*)+" + * @param def_port default port number + * @param data optional userdata + * @return TRUE if **any** of upstreams has been added + */ +gboolean rspamd_upstreams_parse_line(struct upstream_list *ups, + const gchar *str, guint16 def_port, void *data); + + +gboolean rspamd_upstreams_parse_line_len(struct upstream_list *ups, + const gchar *str, gsize len, + guint16 def_port, + void *data); + +/** + * Parse upstreams list from the UCL object + * @param ups + * @param in + * @param def_port + * @param data + * @return + */ +gboolean rspamd_upstreams_from_ucl(struct upstream_list *ups, + const ucl_object_t *in, guint16 def_port, void *data); + + +typedef void (*rspamd_upstream_traverse_func)(struct upstream *up, guint idx, + void *ud); + +/** + * Traverse upstreams list calling the function specified + * @param ups + * @param cb + * @param ud + */ +void rspamd_upstreams_foreach(struct upstream_list *ups, + rspamd_upstream_traverse_func cb, void *ud); + +enum rspamd_upstreams_watch_event { + RSPAMD_UPSTREAM_WATCH_SUCCESS = 1u << 0, + RSPAMD_UPSTREAM_WATCH_FAILURE = 1u << 1, + RSPAMD_UPSTREAM_WATCH_OFFLINE = 1u << 2, + RSPAMD_UPSTREAM_WATCH_ONLINE = 1u << 3, + RSPAMD_UPSTREAM_WATCH_ALL = (1u << 0) | (1u << 1) | (1u << 2) | (1u << 3), +}; + +typedef void (*rspamd_upstream_watch_func)(struct upstream *up, + enum rspamd_upstreams_watch_event event, + guint cur_errors, + void *ud); + +/** + * Adds new watcher to the upstreams list + * @param ups + * @param events + * @param func + * @param ud + */ +void rspamd_upstreams_add_watch_callback(struct upstream_list *ups, + enum rspamd_upstreams_watch_event events, + rspamd_upstream_watch_func func, + GFreeFunc free_func, + gpointer ud); + +/** + * Returns the next IP address of the upstream (internal rotation) + * @param up + * @return + */ +rspamd_inet_addr_t *rspamd_upstream_addr_next(struct upstream *up); + +/** + * Returns the current IP address of the upstream + * @param up + * @return + */ +rspamd_inet_addr_t *rspamd_upstream_addr_cur(const struct upstream *up); + +/** + * Add custom address for an upstream (ownership of addr is transferred to upstream) + * @param up + * @return + */ +gboolean rspamd_upstream_add_addr(struct upstream *up, + rspamd_inet_addr_t *addr); + +/** + * Returns the symbolic name of the upstream + * @param up + * @return + */ +const gchar *rspamd_upstream_name(struct upstream *up); + +/** + * Returns the port of the current address for the upstream + * @param up + * @return + */ +gint rspamd_upstream_port(struct upstream *up); + +/** + * Sets opaque user data associated with this upstream + * @param up + * @param data + * @return old data + */ +gpointer rspamd_upstream_set_data(struct upstream *up, gpointer data); + +/** + * Gets opaque user data associated with this upstream + * @param up + * @return + */ +gpointer rspamd_upstream_get_data(struct upstream *up); + +/** + * Get new upstream from the list + * @param ups upstream list + * @param type type of rotation algorithm, for `RSPAMD_UPSTREAM_HASHED` it is required to specify `key` and `keylen` as arguments + * @return + */ +struct upstream *rspamd_upstream_get(struct upstream_list *ups, + enum rspamd_upstream_rotation default_type, + const guchar *key, gsize keylen); + +/** + * Get new upstream from the list + * @param ups upstream list + * @param type type of rotation algorithm, for `RSPAMD_UPSTREAM_HASHED` it is required to specify `key` and `keylen` as arguments + * @return + */ +struct upstream *rspamd_upstream_get_forced(struct upstream_list *ups, + enum rspamd_upstream_rotation forced_type, + const guchar *key, gsize keylen); + +/** + * Get new upstream from the list excepting the upstream specified + * @param ups upstream list + * @param type type of rotation algorithm, for `RSPAMD_UPSTREAM_HASHED` it is required to specify `key` and `keylen` as arguments + * @return + */ +struct upstream *rspamd_upstream_get_except(struct upstream_list *ups, + struct upstream *except, + enum rspamd_upstream_rotation default_type, + const guchar *key, gsize keylen); + +/** + * Re-resolve addresses for all upstreams registered + */ +void rspamd_upstream_reresolve(struct upstream_ctx *ctx); + +/** + * Share ownership on upstream + * @param up + * @return + */ +struct upstream *rspamd_upstream_ref(struct upstream *up); +/** + * Unshare ownership on upstream + * @param up + */ +void rspamd_upstream_unref(struct upstream *up); + +#ifdef __cplusplus +} +#endif + +#endif /* UPSTREAM_H */ diff --git a/src/libutil/uthash_strcase.h b/src/libutil/uthash_strcase.h new file mode 100644 index 0000000..86075ee --- /dev/null +++ b/src/libutil/uthash_strcase.h @@ -0,0 +1,91 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef UTHASH_STRCASE_H_ +#define UTHASH_STRCASE_H_ + +#ifdef UTHASH_H +#error Invalid include order: uthash is already included +#endif + +#include "libcryptobox/cryptobox.h" +#include "libutil/util.h" + +/* Utils for uthash tuning */ +#ifndef HASH_CASELESS +#define HASH_FUNCTION(key, keylen, num_bkts, hashv, bkt) \ + do { \ + hashv = (__typeof(hashv)) rspamd_cryptobox_fast_hash(key, keylen, rspamd_hash_seed()); \ + bkt = (hashv) & (num_bkts - 1); \ + } while (0) + +#define HASH_KEYCMP(a, b, len) memcmp(a, b, len) +#else +#define HASH_FUNCTION(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned _len = keylen; \ + rspamd_cryptobox_fast_hash_state_t _hst; \ + unsigned _leftover = keylen % 8; \ + unsigned _fp, _i; \ + const uint8_t *_s = (const uint8_t *) (key); \ + union { \ + struct { \ + unsigned char c1, c2, c3, c4, c5, c6, c7, c8; \ + } c; \ + uint64_t pp; \ + } _u; \ + _fp = _len - _leftover; \ + rspamd_cryptobox_fast_hash_init(&_hst, rspamd_hash_seed()); \ + for (_i = 0; _i != _fp; _i += 8) { \ + _u.c.c1 = _s[_i], _u.c.c2 = _s[_i + 1], _u.c.c3 = _s[_i + 2], _u.c.c4 = _s[_i + 3]; \ + _u.c.c5 = _s[_i + 4], _u.c.c6 = _s[_i + 5], _u.c.c7 = _s[_i + 6], _u.c.c8 = _s[_i + 7]; \ + _u.c.c1 = lc_map[_u.c.c1]; \ + _u.c.c2 = lc_map[_u.c.c2]; \ + _u.c.c3 = lc_map[_u.c.c3]; \ + _u.c.c4 = lc_map[_u.c.c4]; \ + _u.c.c1 = lc_map[_u.c.c5]; \ + _u.c.c2 = lc_map[_u.c.c6]; \ + _u.c.c3 = lc_map[_u.c.c7]; \ + _u.c.c4 = lc_map[_u.c.c8]; \ + rspamd_cryptobox_fast_hash_update(&_hst, &_u, sizeof(_u)); \ + } \ + _u.pp = 0; \ + switch (_leftover) { \ + case 7: \ + /* fallthrough */ _u.c.c7 = lc_map[(unsigned char) _s[_i++]]; \ + case 6: \ + /* fallthrough */ _u.c.c6 = lc_map[(unsigned char) _s[_i++]]; \ + case 5: \ + /* fallthrough */ _u.c.c5 = lc_map[(unsigned char) _s[_i++]]; \ + case 4: \ + /* fallthrough */ _u.c.c4 = lc_map[(unsigned char) _s[_i++]]; \ + case 3: \ + /* fallthrough */ _u.c.c3 = lc_map[(unsigned char) _s[_i++]]; \ + case 2: \ + /* fallthrough */ _u.c.c2 = lc_map[(unsigned char) _s[_i++]]; \ + case 1: \ + /* fallthrough */ _u.c.c1 = lc_map[(unsigned char) _s[_i]]; \ + rspamd_cryptobox_fast_hash_update(&_hst, &_u, sizeof(_u)); \ + break; \ + } \ + hashv = (__typeof(hashv)) rspamd_cryptobox_fast_hash_final(&_hst); \ + bkt = (hashv) & (num_bkts - 1); \ + } while (0) +#define HASH_KEYCMP(a, b, len) rspamd_lc_cmp(a, b, len) +#endif + +#include "uthash.h" + +#endif /* UTHASH_STRCASE_H_ */ diff --git a/src/libutil/util.c b/src/libutil/util.c new file mode 100644 index 0000000..04200e3 --- /dev/null +++ b/src/libutil/util.c @@ -0,0 +1,2746 @@ +/* + * Copyright 2024 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "util.h" +#include "unix-std.h" + +#include "ottery.h" +#include "cryptobox.h" +#include "contrib/libev/ev.h" + +#ifdef HAVE_TERMIOS_H +#include <termios.h> +#endif +#ifdef HAVE_READPASSPHRASE_H +#include <readpassphrase.h> +#endif +/* libutil */ +#ifdef HAVE_LIBUTIL_H +#include <libutil.h> +#endif +#ifdef __APPLE__ +#include <mach/mach_time.h> +#include <mach/mach_init.h> +#include <mach/thread_act.h> +#include <mach/mach_port.h> +#endif +/* poll */ +#ifdef HAVE_POLL_H +#include <poll.h> +#endif + +#ifdef HAVE_SIGINFO_H +#include <siginfo.h> +#endif +/* sys/wait */ +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> +#endif +/* sys/resource.h */ +#ifdef HAVE_SYS_RESOURCE_H +#include <sys/resource.h> +#endif +#ifdef HAVE_RDTSC +#ifdef __x86_64__ +#include <x86intrin.h> +#endif +#endif + +#include <math.h> /* for pow */ +#include <glob.h> /* in fact, we require this file ultimately */ + +#include "zlib.h" +#include "contrib/uthash/utlist.h" +#include "blas-config.h" + +/* Check log messages intensity once per minute */ +#define CHECK_TIME 60 +/* More than 2 log messages per second */ +#define BUF_INTENSITY 2 +/* Default connect timeout for sync sockets */ +#define CONNECT_TIMEOUT 3 + +/* + * Should be defined in a single point + */ +const struct rspamd_controller_pbkdf pbkdf_list[] = { + {.name = "PBKDF2-blake2b", + .alias = "pbkdf2", + .description = "standard CPU intensive \"slow\" KDF using blake2b hash function", + .type = RSPAMD_CRYPTOBOX_PBKDF2, + .id = RSPAMD_PBKDF_ID_V1, + .complexity = 16000, + .salt_len = 20, + .key_len = rspamd_cryptobox_HASHBYTES / 2}, + {.name = "Catena-Butterfly", + .alias = "catena", + .description = "modern CPU and memory intensive KDF", + .type = RSPAMD_CRYPTOBOX_CATENA, + .id = RSPAMD_PBKDF_ID_V2, + .complexity = 10, + .salt_len = 20, + .key_len = rspamd_cryptobox_HASHBYTES / 2}}; + +gint rspamd_socket_nonblocking(gint fd) +{ + gint ofl; + + ofl = fcntl(fd, F_GETFL, 0); + + if (fcntl(fd, F_SETFL, ofl | O_NONBLOCK) == -1) { + return -1; + } + return 0; +} + +gint rspamd_socket_blocking(gint fd) +{ + gint ofl; + + ofl = fcntl(fd, F_GETFL, 0); + + if (fcntl(fd, F_SETFL, ofl & (~O_NONBLOCK)) == -1) { + return -1; + } + return 0; +} + +gint rspamd_socket_poll(gint fd, gint timeout, short events) +{ + gint r; + struct pollfd fds[1]; + + fds->fd = fd; + fds->events = events; + fds->revents = 0; + while ((r = poll(fds, 1, timeout)) < 0) { + if (errno != EINTR) { + break; + } + } + + return r; +} + +gint rspamd_socket_create(gint af, gint type, gint protocol, gboolean async) +{ + gint fd; + + fd = socket(af, type, protocol); + if (fd == -1) { + return -1; + } + + /* Set close on exec */ + if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + close(fd); + return -1; + } + if (async) { + if (rspamd_socket_nonblocking(fd) == -1) { + close(fd); + return -1; + } + } + + return fd; +} + +static gint +rspamd_inet_socket_create(gint type, struct addrinfo *addr, gboolean is_server, + gboolean async, GList **list) +{ + gint fd = -1, r, on = 1, s_error; + struct addrinfo *cur; + gpointer ptr; + socklen_t optlen; + + cur = addr; + while (cur) { + /* Create socket */ + fd = rspamd_socket_create(cur->ai_family, type, cur->ai_protocol, TRUE); + if (fd == -1) { + goto out; + } + + if (is_server) { + (void) setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *) &on, + sizeof(gint)); +#ifdef HAVE_IPV6_V6ONLY + if (cur->ai_family == AF_INET6) { + setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *) &on, + sizeof(gint)); + } +#endif + r = bind(fd, cur->ai_addr, cur->ai_addrlen); + } + else { + r = connect(fd, cur->ai_addr, cur->ai_addrlen); + } + + if (r == -1) { + if (errno != EINPROGRESS) { + goto out; + } + if (!async) { + /* Try to poll */ + if (rspamd_socket_poll(fd, CONNECT_TIMEOUT * 1000, + POLLOUT) <= 0) { + errno = ETIMEDOUT; + goto out; + } + else { + /* Make synced again */ + if (rspamd_socket_blocking(fd) < 0) { + goto out; + } + } + } + } + else { + /* Still need to check SO_ERROR on socket */ + optlen = sizeof(s_error); + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *) &s_error, &optlen) != -1) { + if (s_error) { + errno = s_error; + goto out; + } + } + } + if (list == NULL) { + /* Go out immediately */ + break; + } + else if (fd != -1) { + ptr = GINT_TO_POINTER(fd); + *list = g_list_prepend(*list, ptr); + cur = cur->ai_next; + continue; + } + out: + if (fd != -1) { + close(fd); + } + fd = -1; + cur = cur->ai_next; + } + + return (fd); +} + +gint rspamd_socket_tcp(struct addrinfo *addr, gboolean is_server, gboolean async) +{ + return rspamd_inet_socket_create(SOCK_STREAM, addr, is_server, async, NULL); +} + +gint rspamd_socket_udp(struct addrinfo *addr, gboolean is_server, gboolean async) +{ + return rspamd_inet_socket_create(SOCK_DGRAM, addr, is_server, async, NULL); +} + +gint rspamd_socket_unix(const gchar *path, + struct sockaddr_un *addr, + gint type, + gboolean is_server, + gboolean async) +{ + + socklen_t optlen; + gint fd = -1, s_error, r, serrno, on = 1; + struct stat st; + + if (path == NULL) + return -1; + + addr->sun_family = AF_UNIX; + + rspamd_strlcpy(addr->sun_path, path, sizeof(addr->sun_path)); +#ifdef FREEBSD + addr->sun_len = SUN_LEN(addr); +#endif + + if (is_server) { + /* Unlink socket if it exists already */ + if (lstat(addr->sun_path, &st) != -1) { + if (S_ISSOCK(st.st_mode)) { + if (unlink(addr->sun_path) == -1) { + goto out; + } + } + else { + goto out; + } + } + } + fd = socket(PF_LOCAL, type, 0); + + if (fd == -1) { + return -1; + } + + if (rspamd_socket_nonblocking(fd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + goto out; + } + if (is_server) { + (void) setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *) &on, + sizeof(gint)); + r = bind(fd, (struct sockaddr *) addr, SUN_LEN(addr)); + } + else { + r = connect(fd, (struct sockaddr *) addr, SUN_LEN(addr)); + } + + if (r == -1) { + if (errno != EINPROGRESS) { + goto out; + } + if (!async) { + /* Try to poll */ + if (rspamd_socket_poll(fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { + errno = ETIMEDOUT; + goto out; + } + else { + /* Make synced again */ + if (rspamd_socket_blocking(fd) < 0) { + goto out; + } + } + } + } + else { + /* Still need to check SO_ERROR on socket */ + optlen = sizeof(s_error); + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *) &s_error, &optlen) != -1) { + if (s_error) { + errno = s_error; + goto out; + } + } + } + + + return (fd); + +out: + serrno = errno; + if (fd != -1) { + close(fd); + } + errno = serrno; + return (-1); +} + +static int +rspamd_prefer_v4_hack(const struct addrinfo *a1, const struct addrinfo *a2) +{ + return a1->ai_addr->sa_family - a2->ai_addr->sa_family; +} + +/** + * Make a universal socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param async make this socket async + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +gint rspamd_socket(const gchar *credits, guint16 port, + gint type, gboolean async, gboolean is_server, gboolean try_resolve) +{ + struct sockaddr_un un; + struct stat st; + struct addrinfo hints, *res; + gint r; + gchar portbuf[8]; + + if (*credits == '/') { + if (is_server) { + return rspamd_socket_unix(credits, &un, type, is_server, async); + } + else { + r = stat(credits, &st); + if (r == -1) { + /* Unix socket doesn't exists it must be created first */ + errno = ENOENT; + return -1; + } + else { + if ((st.st_mode & S_IFSOCK) == 0) { + /* Path is not valid socket */ + errno = EINVAL; + return -1; + } + else { + return rspamd_socket_unix(credits, + &un, + type, + is_server, + async); + } + } + } + } + else { + /* TCP related part */ + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = type; /* Type of the socket */ + hints.ai_flags = is_server ? AI_PASSIVE : 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if (!try_resolve) { + hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; + } + + rspamd_snprintf(portbuf, sizeof(portbuf), "%d", (int) port); + if ((r = getaddrinfo(credits, portbuf, &hints, &res)) == 0) { + LL_SORT2(res, rspamd_prefer_v4_hack, ai_next); + r = rspamd_inet_socket_create(type, res, is_server, async, NULL); + freeaddrinfo(res); + return r; + } + else { + return -1; + } + } +} + +gboolean +rspamd_socketpair(gint pair[2], gint af) +{ + gint r = -1, serrno; + +#ifdef HAVE_SOCK_SEQPACKET + if (af == SOCK_SEQPACKET) { + r = socketpair(AF_LOCAL, SOCK_SEQPACKET, 0, pair); + + if (r == -1) { + r = socketpair(AF_LOCAL, SOCK_DGRAM, 0, pair); + } + } +#endif + if (r == -1) { + r = socketpair(AF_LOCAL, af, 0, pair); + } + + if (r == -1) { + return -1; + } + + /* Set close on exec */ + if (fcntl(pair[0], F_SETFD, FD_CLOEXEC) == -1) { + goto out; + } + if (fcntl(pair[1], F_SETFD, FD_CLOEXEC) == -1) { + goto out; + } + + return TRUE; + +out: + serrno = errno; + close(pair[0]); + close(pair[1]); + errno = serrno; + + return FALSE; +} + +#ifdef HAVE_SA_SIGINFO +void rspamd_signals_init(struct sigaction *signals, void (*sig_handler)(gint, + siginfo_t *, + void *)) +#else +void rspamd_signals_init(struct sigaction *signals, void (*sig_handler)(gint)) +#endif +{ + struct sigaction sigpipe_act; + /* Setting up signal handlers */ + /* SIGUSR1 - reopen config file */ + /* SIGUSR2 - worker is ready for accept */ + sigemptyset(&signals->sa_mask); + sigaddset(&signals->sa_mask, SIGTERM); + sigaddset(&signals->sa_mask, SIGINT); + sigaddset(&signals->sa_mask, SIGHUP); + sigaddset(&signals->sa_mask, SIGCHLD); + sigaddset(&signals->sa_mask, SIGUSR1); + sigaddset(&signals->sa_mask, SIGUSR2); + sigaddset(&signals->sa_mask, SIGALRM); +#ifdef SIGPOLL + sigaddset(&signals->sa_mask, SIGPOLL); +#endif +#ifdef SIGIO + sigaddset(&signals->sa_mask, SIGIO); +#endif + +#ifdef HAVE_SA_SIGINFO + signals->sa_flags = SA_SIGINFO; + signals->sa_handler = NULL; + signals->sa_sigaction = sig_handler; +#else + signals->sa_handler = sig_handler; + signals->sa_flags = 0; +#endif + sigaction(SIGTERM, signals, NULL); + sigaction(SIGINT, signals, NULL); + sigaction(SIGHUP, signals, NULL); + sigaction(SIGCHLD, signals, NULL); + sigaction(SIGUSR1, signals, NULL); + sigaction(SIGUSR2, signals, NULL); + sigaction(SIGALRM, signals, NULL); +#ifdef SIGPOLL + sigaction(SIGPOLL, signals, NULL); +#endif +#ifdef SIGIO + sigaction(SIGIO, signals, NULL); +#endif + + /* Ignore SIGPIPE as we handle write errors manually */ + sigemptyset(&sigpipe_act.sa_mask); + sigaddset(&sigpipe_act.sa_mask, SIGPIPE); + sigpipe_act.sa_handler = SIG_IGN; + sigpipe_act.sa_flags = 0; + sigaction(SIGPIPE, &sigpipe_act, NULL); +} + +#ifndef HAVE_SETPROCTITLE + +#ifdef LINUX +static gchar *title_buffer = NULL; +static size_t title_buffer_size = 0; +static gchar *title_progname, *title_progname_full; +gchar **old_environ = NULL; + +static void +rspamd_title_dtor(gpointer d) +{ + /* Restore old environment */ + if (old_environ != NULL) { + environ = old_environ; + } + + gchar **env = (gchar **) d; + guint i; + + for (i = 0; env[i] != NULL; i++) { + g_free(env[i]); + } + + g_free(env); +} +#endif /* ifdef LINUX */ + +#endif /* ifndef HAVE_SETPROCTITLE */ + +gint rspamd_init_title(rspamd_mempool_t *pool, + gint argc, gchar *argv[], gchar *envp[]) +{ +#if defined(LINUX) && !defined(HAVE_SETPROCTITLE) + gchar *begin_of_buffer = 0, *end_of_buffer = 0; + gint i; + + for (i = 0; i < argc; ++i) { + if (!begin_of_buffer) { + begin_of_buffer = argv[i]; + } + if (!end_of_buffer || end_of_buffer + 1 == argv[i]) { + end_of_buffer = argv[i] + strlen(argv[i]); + } + } + + for (i = 0; envp[i]; ++i) { + if (!begin_of_buffer) { + begin_of_buffer = envp[i]; + } + if (!end_of_buffer || end_of_buffer + 1 == envp[i]) { + end_of_buffer = envp[i] + strlen(envp[i]); + } + } + + if (!end_of_buffer) { + return 0; + } + + gchar **new_environ = g_malloc((i + 1) * sizeof(envp[0])); + + for (i = 0; envp[i]; ++i) { + new_environ[i] = g_strdup(envp[i]); + } + + new_environ[i] = NULL; + + if (program_invocation_name) { + title_progname_full = g_strdup(program_invocation_name); + + gchar *p = strrchr(title_progname_full, '/'); + + if (p) { + title_progname = p + 1; + } + else { + title_progname = title_progname_full; + } + + program_invocation_name = title_progname_full; + program_invocation_short_name = title_progname; + } + + old_environ = environ; + environ = new_environ; + title_buffer = begin_of_buffer; + title_buffer_size = end_of_buffer - begin_of_buffer; + + rspamd_mempool_add_destructor(pool, + rspamd_title_dtor, + new_environ); +#endif + + return 0; +} + +gint rspamd_setproctitle(const gchar *fmt, ...) +{ +#ifdef HAVE_SETPROCTITLE + if (fmt) { + static char titlebuf[4096]; + va_list ap; + + va_start(ap, fmt); + rspamd_vsnprintf(titlebuf, sizeof(titlebuf), fmt, ap); + va_end(ap); + + setproctitle("%s", titlebuf); + } +#else +#if defined(LINUX) + if (!title_buffer || !title_buffer_size) { + errno = ENOMEM; + return -1; + } + + memset(title_buffer, '\0', title_buffer_size); + + ssize_t written; + + if (fmt) { + va_list ap; + + written = rspamd_snprintf(title_buffer, + title_buffer_size, + "%s: ", + title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + + va_start(ap, fmt); + rspamd_vsnprintf(title_buffer + written, + title_buffer_size - written, + fmt, + ap); + va_end(ap); + } + else { + written = rspamd_snprintf(title_buffer, + title_buffer_size, + "%s", + title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + } + + written = strlen(title_buffer); + memset(title_buffer + written, '\0', title_buffer_size - written); +#elif defined(__APPLE__) + /* OSX is broken, ignore this brain damaged system */ +#else + /* Last resort (usually broken, but eh...) */ + GString *dest; + va_list ap; + + dest = g_string_new(""); + va_start(ap, fmt); + rspamd_vprintf_gstring(dest, fmt, ap); + va_end(ap); + + g_set_prgname(dest->str); + g_string_free(dest, TRUE); + +#endif /* defined(LINUX) */ + +#endif /* HAVE_SETPROCTITLE */ + return 0; +} + + +#ifndef HAVE_PIDFILE +static gint _rspamd_pidfile_remove(rspamd_pidfh_t *pfh, gint freeit); + +static gint +rspamd_pidfile_verify(rspamd_pidfh_t *pfh) +{ + struct stat sb; + + if (pfh == NULL || pfh->pf_fd == -1) + return (-1); + /* + * Check remembered descriptor. + */ + if (fstat(pfh->pf_fd, &sb) == -1) + return (errno); + if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino) + return -1; + return 0; +} + +static gint +rspamd_pidfile_read(const gchar *path, pid_t *pidptr) +{ + gchar buf[16], *endptr; + gint error, fd, i; + + fd = open(path, O_RDONLY); + if (fd == -1) + return (errno); + + i = read(fd, buf, sizeof(buf) - 1); + error = errno; /* Remember errno in case close() wants to change it. */ + close(fd); + if (i == -1) + return error; + else if (i == 0) + return EAGAIN; + buf[i] = '\0'; + + *pidptr = strtol(buf, &endptr, 10); + if (endptr != &buf[i]) + return EINVAL; + + return 0; +} + +rspamd_pidfh_t * +rspamd_pidfile_open(const gchar *path, mode_t mode, pid_t *pidptr) +{ + rspamd_pidfh_t *pfh; + struct stat sb; + gint error, fd, len, count; + struct timespec rqtp; + + pfh = g_malloc(sizeof(*pfh)); + if (pfh == NULL) + return NULL; + + if (path == NULL) + len = snprintf(pfh->pf_path, + sizeof(pfh->pf_path), + "/var/run/%s.pid", + g_get_prgname()); + else + len = snprintf(pfh->pf_path, sizeof(pfh->pf_path), "%s", path); + if (len >= (gint) sizeof(pfh->pf_path)) { + g_free(pfh); + errno = ENAMETOOLONG; + return NULL; + } + + /* + * Open the PID file and obtain exclusive lock. + * We truncate PID file here only to remove old PID immediately, + * PID file will be truncated again in pidfile_write(), so + * pidfile_write() can be called multiple times. + */ + fd = open(pfh->pf_path, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode); + rspamd_file_lock(fd, TRUE); + if (fd == -1) { + count = 0; + rqtp.tv_sec = 0; + rqtp.tv_nsec = 5000000; + if (errno == EWOULDBLOCK && pidptr != NULL) { + again: + errno = rspamd_pidfile_read(pfh->pf_path, pidptr); + if (errno == 0) + errno = EEXIST; + else if (errno == EAGAIN) { + if (++count <= 3) { + nanosleep(&rqtp, 0); + goto again; + } + } + } + g_free(pfh); + return NULL; + } + /* + * Remember file information, so in pidfile_write() we are sure we write + * to the proper descriptor. + */ + if (fstat(fd, &sb) == -1) { + error = errno; + unlink(pfh->pf_path); + close(fd); + g_free(pfh); + errno = error; + return NULL; + } + + pfh->pf_fd = fd; + pfh->pf_dev = sb.st_dev; + pfh->pf_ino = sb.st_ino; + + return pfh; +} + +gint rspamd_pidfile_write(rspamd_pidfh_t *pfh) +{ + gchar pidstr[16]; + gint error, fd; + + /* + * Check remembered descriptor, so we don't overwrite some other + * file if pidfile was closed and descriptor reused. + */ + errno = rspamd_pidfile_verify(pfh); + if (errno != 0) { + /* + * Don't close descriptor, because we are not sure if it's ours. + */ + return -1; + } + fd = pfh->pf_fd; + + /* + * Truncate PID file, so multiple calls of pidfile_write() are allowed. + */ + if (ftruncate(fd, 0) == -1) { + error = errno; + _rspamd_pidfile_remove(pfh, 0); + errno = error; + return -1; + } + + rspamd_snprintf(pidstr, sizeof(pidstr), "%P", getpid()); + if (pwrite(fd, pidstr, strlen(pidstr), 0) != (ssize_t) strlen(pidstr)) { + error = errno; + _rspamd_pidfile_remove(pfh, 0); + errno = error; + return -1; + } + + return 0; +} + +gint rspamd_pidfile_close(rspamd_pidfh_t *pfh) +{ + gint error; + + error = rspamd_pidfile_verify(pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (close(pfh->pf_fd) == -1) + error = errno; + g_free(pfh); + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +static gint +_rspamd_pidfile_remove(rspamd_pidfh_t *pfh, gint freeit) +{ + gint error; + + error = rspamd_pidfile_verify(pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (unlink(pfh->pf_path) == -1) + error = errno; + if (!rspamd_file_unlock(pfh->pf_fd, FALSE)) { + if (error == 0) + error = errno; + } + if (close(pfh->pf_fd) == -1) { + if (error == 0) + error = errno; + } + if (freeit) + g_free(pfh); + else + pfh->pf_fd = -1; + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh) +{ + + return (_rspamd_pidfile_remove(pfh, 1)); +} +#endif + +/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */ +gchar * +resolve_stat_filename(rspamd_mempool_t *pool, + gchar *pattern, + gchar *rcpt, + gchar *from) +{ + gint need_to_format = 0, len = 0; + gint rcptlen, fromlen; + gchar *c = pattern, *new, *s; + + if (rcpt) { + rcptlen = strlen(rcpt); + } + else { + rcptlen = 0; + } + + if (from) { + fromlen = strlen(from); + } + else { + fromlen = 0; + } + + /* Calculate length */ + while (*c++) { + if (*c == '%' && *(c + 1) == 'r') { + len += rcptlen; + c += 2; + need_to_format = 1; + continue; + } + else if (*c == '%' && *(c + 1) == 'f') { + len += fromlen; + c += 2; + need_to_format = 1; + continue; + } + len++; + } + + /* Do not allocate extra memory if we do not need to format string */ + if (!need_to_format) { + return pattern; + } + + /* Allocate new string */ + new = rspamd_mempool_alloc(pool, len); + c = pattern; + s = new; + + /* Format string */ + while (*c++) { + if (*c == '%' && *(c + 1) == 'r') { + c += 2; + memcpy(s, rcpt, rcptlen); + s += rcptlen; + continue; + } + *s++ = *c; + } + + *s = '\0'; + + return new; +} + +const gchar * +rspamd_log_check_time(gdouble start, gdouble end, gint resolution) +{ + gdouble diff; + static gchar res[64]; + gchar fmt[32]; + + diff = (end - start) * 1000.0; + + rspamd_snprintf(fmt, sizeof(fmt), "%%.%dfms", resolution); + rspamd_snprintf(res, sizeof(res), fmt, diff); + + return (const gchar *) res; +} + + +#ifdef HAVE_FLOCK +/* Flock version */ +gboolean +rspamd_file_lock(gint fd, gboolean async) +{ + gint flags; + + if (async) { + flags = LOCK_EX | LOCK_NB; + } + else { + flags = LOCK_EX; + } + + if (flock(fd, flags) == -1) { + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_file_unlock(gint fd, gboolean async) +{ + gint flags; + + if (async) { + flags = LOCK_UN | LOCK_NB; + } + else { + flags = LOCK_UN; + } + + if (flock(fd, flags) == -1) { + if (async && errno == EAGAIN) { + return FALSE; + } + + return FALSE; + } + + return TRUE; +} +#else /* HAVE_FLOCK */ +/* Fctnl version */ +gboolean +rspamd_file_lock(gint fd, gboolean async) +{ + struct flock fl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0}; + + if (fcntl(fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { + if (async && (errno == EAGAIN || errno == EACCES)) { + return FALSE; + } + + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_file_unlock(gint fd, gboolean async) +{ + struct flock fl = { + .l_type = F_UNLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0}; + + if (fcntl(fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { + if (async && (errno == EAGAIN || errno == EACCES)) { + return FALSE; + } + + return FALSE; + } + + return TRUE; +} +#endif /* HAVE_FLOCK */ + + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) +void g_ptr_array_unref(GPtrArray *array) +{ + g_ptr_array_free(array, TRUE); +} +gboolean +g_int64_equal(gconstpointer v1, gconstpointer v2) +{ + return *((const gint64 *) v1) == *((const gint64 *) v2); +} +guint g_int64_hash(gconstpointer v) +{ + guint64 v64 = *(guint64 *) v; + + return (guint) (v ^ (v >> 32)); +} +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) +void g_queue_clear(GQueue *queue) +{ + g_return_if_fail(queue != NULL); + + g_list_free(queue->head); + queue->head = queue->tail = NULL; + queue->length = 0; +} +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 30)) +GPtrArray * +g_ptr_array_new_full(guint reserved_size, + GDestroyNotify element_free_func) +{ + GPtrArray *array; + + array = g_ptr_array_sized_new(reserved_size); + g_ptr_array_set_free_func(array, element_free_func); + + return array; +} +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32)) +void g_queue_free_full(GQueue *queue, GDestroyNotify free_func) +{ + GList *cur; + + cur = queue->head; + + while (cur) { + free_func(cur->data); + cur = g_list_next(cur); + } + + g_queue_free(queue); +} +#endif + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 40)) +void g_ptr_array_insert(GPtrArray *array, gint index_, gpointer data) +{ + g_return_if_fail(array); + g_return_if_fail(index_ >= -1); + g_return_if_fail(index_ <= (gint) array->len); + + g_ptr_array_set_size(array, array->len + 1); + + if (index_ < 0) { + index_ = array->len; + } + + if (index_ < array->len) { + memmove(&(array->pdata[index_ + 1]), &(array->pdata[index_]), + (array->len - index_) * sizeof(gpointer)); + } + + array->pdata[index_] = data; +} +#endif + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32)) +const gchar * +g_environ_getenv(gchar **envp, const gchar *variable) +{ + gsize len; + gint i; + + if (envp == NULL) { + return NULL; + } + + len = strlen(variable); + + for (i = 0; envp[i]; i++) { + if (strncmp(envp[i], variable, len) == 0 && envp[i][len] == '=') { + return envp[i] + len + 1; + } + } + + return NULL; +} +#endif + +gint rspamd_fallocate(gint fd, off_t offset, off_t len) +{ +#if defined(HAVE_FALLOCATE) + return fallocate(fd, 0, offset, len); +#elif defined(HAVE_POSIX_FALLOCATE) + return posix_fallocate(fd, offset, len); +#else + /* Return 0 as nothing can be done on this system */ + return 0; +#endif +} + + +/** + * Create new mutex + * @return mutex or NULL + */ +inline rspamd_mutex_t * +rspamd_mutex_new(void) +{ + rspamd_mutex_t *new; + + new = g_malloc0(sizeof(rspamd_mutex_t)); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_init(&new->mtx); +#else + g_static_mutex_init(&new->mtx); +#endif + + return new; +} + +/** + * Lock mutex + * @param mtx + */ +inline void +rspamd_mutex_lock(rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_lock(&mtx->mtx); +#else + g_static_mutex_lock(&mtx->mtx); +#endif +} + +/** + * Unlock mutex + * @param mtx + */ +inline void +rspamd_mutex_unlock(rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_unlock(&mtx->mtx); +#else + g_static_mutex_unlock(&mtx->mtx); +#endif +} + +void rspamd_mutex_free(rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_clear(&mtx->mtx); +#endif + g_free(mtx); +} + +struct rspamd_thread_data { + gchar *name; + gint id; + GThreadFunc func; + gpointer data; +}; + +static gpointer +rspamd_thread_func(gpointer ud) +{ + struct rspamd_thread_data *td = ud; + sigset_t s_mask; + + /* Ignore signals in thread */ + sigemptyset(&s_mask); + sigaddset(&s_mask, SIGINT); + sigaddset(&s_mask, SIGHUP); + sigaddset(&s_mask, SIGCHLD); + sigaddset(&s_mask, SIGUSR1); + sigaddset(&s_mask, SIGUSR2); + sigaddset(&s_mask, SIGALRM); + sigaddset(&s_mask, SIGPIPE); + + pthread_sigmask(SIG_BLOCK, &s_mask, NULL); + + ud = td->func(td->data); + g_free(td->name); + g_free(td); + + return ud; +} + +struct hash_copy_callback_data { + gpointer (*key_copy_func)(gconstpointer data, gpointer ud); + gpointer (*value_copy_func)(gconstpointer data, gpointer ud); + gpointer ud; + GHashTable *dst; +}; + +static void +copy_foreach_callback(gpointer key, gpointer value, gpointer ud) +{ + struct hash_copy_callback_data *cb = ud; + gpointer nkey, nvalue; + + nkey = cb->key_copy_func ? cb->key_copy_func(key, cb->ud) : (gpointer) key; + nvalue = + cb->value_copy_func ? cb->value_copy_func(value, + cb->ud) + : (gpointer) value; + g_hash_table_insert(cb->dst, nkey, nvalue); +} +/** + * Deep copy of one hash table to another + * @param src source hash + * @param dst destination hash + * @param key_copy_func function called to copy or modify keys (or NULL) + * @param value_copy_func function called to copy or modify values (or NULL) + * @param ud user data for copy functions + */ +void rspamd_hash_table_copy(GHashTable *src, GHashTable *dst, + gpointer (*key_copy_func)(gconstpointer data, gpointer ud), + gpointer (*value_copy_func)(gconstpointer data, gpointer ud), + gpointer ud) +{ + struct hash_copy_callback_data cb; + if (src != NULL && dst != NULL) { + cb.key_copy_func = key_copy_func; + cb.value_copy_func = value_copy_func; + cb.ud = ud; + cb.dst = dst; + g_hash_table_foreach(src, copy_foreach_callback, &cb); + } +} + +static volatile sig_atomic_t saved_signo[NSIG]; + +static void +read_pass_tmp_sig_handler(int s) +{ + + saved_signo[s] = 1; +} + +#ifndef _PATH_TTY +#define _PATH_TTY "/dev/tty" +#endif + +gint rspamd_read_passphrase_with_prompt(const gchar *prompt, gchar *buf, gint size, bool echo, gpointer key) +{ +#ifdef HAVE_READPASSPHRASE_H + int flags = echo ? RPP_ECHO_ON : RPP_ECHO_OFF; + if (readpassphrase(prompt, buf, size, flags | RPP_REQUIRE_TTY) == NULL) { + return 0; + } + + return strlen(buf); +#else + struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; + struct sigaction savetstp, savettin, savettou, savepipe; + struct termios term, oterm; + gint input, output, i; + gchar *end, *p, ch; + +restart: + if ((input = output = open(_PATH_TTY, O_RDWR)) == -1) { + errno = ENOTTY; + return 0; + } + + (void) fcntl(input, F_SETFD, FD_CLOEXEC); + + /* Turn echo off */ + if (tcgetattr(input, &oterm) != 0) { + close(input); + errno = ENOTTY; + return 0; + } + + memcpy(&term, &oterm, sizeof(term)); + + if (!echo) { + term.c_lflag &= ~(ECHO | ECHONL); + } + + if (tcsetattr(input, TCSAFLUSH, &term) == -1) { + errno = ENOTTY; + close(input); + return 0; + } + + g_assert(write(output, prompt, sizeof("Enter passphrase: ") - 1) != -1); + + /* Save the current sighandler */ + for (i = 0; i < NSIG; i++) { + saved_signo[i] = 0; + } + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = read_pass_tmp_sig_handler; + (void) sigaction(SIGALRM, &sa, &savealrm); + (void) sigaction(SIGHUP, &sa, &savehup); + (void) sigaction(SIGINT, &sa, &saveint); + (void) sigaction(SIGPIPE, &sa, &savepipe); + (void) sigaction(SIGQUIT, &sa, &savequit); + (void) sigaction(SIGTERM, &sa, &saveterm); + (void) sigaction(SIGTSTP, &sa, &savetstp); + (void) sigaction(SIGTTIN, &sa, &savettin); + (void) sigaction(SIGTTOU, &sa, &savettou); + + /* Now read a passphrase */ + p = buf; + end = p + size - 1; + while (read(input, &ch, 1) == 1 && ch != '\n' && ch != '\r') { + if (p < end) { + *p++ = ch; + } + } + *p = '\0'; + g_assert(write(output, "\n", 1) == 1); + + /* Restore terminal state */ + if (memcmp(&term, &oterm, sizeof(term)) != 0) { + while (tcsetattr(input, TCSAFLUSH, &oterm) == -1 && + errno == EINTR && !saved_signo[SIGTTOU]) + ; + } + + /* Restore signal handlers */ + (void) sigaction(SIGALRM, &savealrm, NULL); + (void) sigaction(SIGHUP, &savehup, NULL); + (void) sigaction(SIGINT, &saveint, NULL); + (void) sigaction(SIGQUIT, &savequit, NULL); + (void) sigaction(SIGPIPE, &savepipe, NULL); + (void) sigaction(SIGTERM, &saveterm, NULL); + (void) sigaction(SIGTSTP, &savetstp, NULL); + (void) sigaction(SIGTTIN, &savettin, NULL); + (void) sigaction(SIGTTOU, &savettou, NULL); + + close(input); + + /* Send signals pending */ + for (i = 0; i < NSIG; i++) { + if (saved_signo[i]) { + kill(getpid(), i); + switch (i) { + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + goto restart; + } + } + } + + return (p - buf); +#endif +} + +#ifdef HAVE_CLOCK_GETTIME +#ifdef CLOCK_MONOTONIC_COARSE +#define RSPAMD_FAST_MONOTONIC_CLOCK CLOCK_MONOTONIC_COARSE +#elif defined(CLOCK_MONOTONIC_FAST) +#define RSPAMD_FAST_MONOTONIC_CLOCK CLOCK_MONOTONIC_FAST +#else +#define RSPAMD_FAST_MONOTONIC_CLOCK CLOCK_MONOTONIC +#endif +#endif + +gdouble +rspamd_get_ticks(gboolean rdtsc_ok) +{ + gdouble res; + +#ifdef HAVE_RDTSC +#ifdef __x86_64__ + guint64 r64; + + if (rdtsc_ok) { + __builtin_ia32_lfence(); + r64 = __rdtsc(); + /* Preserve lower 52 bits */ + res = r64 & ((1ULL << 53) - 1); + return res; + } +#endif +#endif +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + gint clk_id = RSPAMD_FAST_MONOTONIC_CLOCK; + + clock_gettime(clk_id, &ts); + + if (rdtsc_ok) { + res = (double) ts.tv_sec * 1e9 + ts.tv_nsec; + } + else { + res = (double) ts.tv_sec + ts.tv_nsec / 1000000000.; + } +#elif defined(__APPLE__) + if (rdtsc_ok) { + res = mach_absolute_time(); + } + else { + res = mach_absolute_time() / 1000000000.; + } +#else + struct timeval tv; + + (void) gettimeofday(&tv, NULL); + if (rdtsc_ok) { + res = (double) ts.tv_sec * 1e9 + tv.tv_usec * 1e3; + } + else { + res = (double) tv.tv_sec + tv.tv_usec / 1000000.; + } +#endif + + return res; +} + +gdouble +rspamd_get_virtual_ticks(void) +{ + gdouble res; + +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + static clockid_t cid = (clockid_t) -1; + if (cid == (clockid_t) -1) { +#ifdef HAVE_CLOCK_GETCPUCLOCKID + if (clock_getcpuclockid(0, &cid) == -1) { +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + cid = CLOCK_PROCESS_CPUTIME_ID; +#elif defined(CLOCK_PROF) + cid = CLOCK_PROF; +#else + cid = CLOCK_REALTIME; +#endif +#ifdef HAVE_CLOCK_GETCPUCLOCKID + } +#endif + } + + clock_gettime(cid, &ts); + res = (double) ts.tv_sec + ts.tv_nsec / 1000000000.; +#elif defined(__APPLE__) + thread_port_t thread = mach_thread_self(); + + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + thread_basic_info_data_t info; + if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count) != KERN_SUCCESS) { + return -1; + } + + res = info.user_time.seconds + info.system_time.seconds; + res += ((gdouble) (info.user_time.microseconds + info.system_time.microseconds)) / 1e6; + mach_port_deallocate(mach_task_self(), thread); +#elif defined(HAVE_RUSAGE_SELF) + struct rusage rusage; + if (getrusage(RUSAGE_SELF, &rusage) != -1) { + res = (double) rusage.ru_utime.tv_sec + + (double) rusage.ru_utime.tv_usec / 1000000.0; + } +#else + res = clock() / (double) CLOCKS_PER_SEC; +#endif + + return res; +} + +gdouble +rspamd_get_calendar_ticks(void) +{ + gdouble res; +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + + clock_gettime(CLOCK_REALTIME, &ts); + res = ts_to_double(&ts); +#else + struct timeval tv; + + if (gettimeofday(&tv, NULL) == 0) { + res = tv_to_double(&tv); + } + else { + res = time(NULL); + } +#endif + + return res; +} + +void rspamd_random_hex(gchar *buf, guint64 len) +{ + static const gchar hexdigests[16] = "0123456789abcdef"; + gint64 i; + + g_assert(len > 0); + + ottery_rand_bytes((void *) buf, ceil(len / 2.0)); + + for (i = (gint64) len - 1; i >= 0; i -= 2) { + buf[i] = hexdigests[buf[i / 2] & 0xf]; + + if (i > 0) { + buf[i - 1] = hexdigests[(buf[i / 2] >> 4) & 0xf]; + } + } +} + +gint rspamd_shmem_mkstemp(gchar *pattern) +{ + gint fd = -1; + gchar *nbuf, *xpos; + gsize blen; + + xpos = strchr(pattern, 'X'); + + if (xpos == NULL) { + errno = EINVAL; + return -1; + } + + blen = strlen(pattern); + nbuf = g_malloc(blen + 1); + rspamd_strlcpy(nbuf, pattern, blen + 1); + xpos = nbuf + (xpos - pattern); + + for (;;) { + rspamd_random_hex(xpos, blen - (xpos - nbuf)); + + fd = shm_open(nbuf, O_RDWR | O_EXCL | O_CREAT, 0600); + + if (fd != -1) { + rspamd_strlcpy(pattern, nbuf, blen + 1); + break; + } + else if (errno != EEXIST) { + g_free(nbuf); + + return -1; + } + } + + g_free(nbuf); + + return fd; +} + +void rspamd_ptr_array_free_hard(gpointer p) +{ + GPtrArray *ar = (GPtrArray *) p; + + g_ptr_array_free(ar, TRUE); +} + +void rspamd_array_free_hard(gpointer p) +{ + GArray *ar = (GArray *) p; + + g_array_free(ar, TRUE); +} + +void rspamd_gstring_free_hard(gpointer p) +{ + GString *ar = (GString *) p; + + g_string_free(ar, TRUE); +} + +void rspamd_gerror_free_maybe(gpointer p) +{ + GError **err; + + if (p) { + err = (GError **) p; + + if (*err) { + g_error_free(*err); + } + } +} + +/* + * Openblas creates threads that are not supported by + * jemalloc allocator (aside of being bloody stupid). So this hack + * is intended to set number of threads to one by default. + * FIXME: is it legit to do so in ctor? + */ +#ifdef HAVE_OPENBLAS_SET_NUM_THREADS +extern void openblas_set_num_threads(int num_threads); +RSPAMD_CONSTRUCTOR(openblas_thread_fix_ctor) +{ + openblas_set_num_threads(1); +} +#endif +#ifdef HAVE_BLI_THREAD_SET_NUM_THREADS +extern void bli_thread_set_num_threads(int num_threads); +RSPAMD_CONSTRUCTOR(blis_thread_fix_ctor) +{ + bli_thread_set_num_threads(1); +} +#endif + +guint64 +rspamd_hash_seed(void) +{ +#if 0 + static guint64 seed; + + if (seed == 0) { + seed = ottery_rand_uint64 (); + } +#endif + + /* Proved to be random, I promise! */ + /* + * TODO: discover if it worth to use random seed on run + * with ordinary hash function or we need to switch to + * siphash1-3 or other slow cooker function... + */ + return 0xabf9727ba290690bULL; +} + +static inline gdouble +rspamd_double_from_int64(guint64 x) +{ + const union { + guint64 i; + double d; + } u = { + .i = G_GUINT64_CONSTANT(0x3FF) << 52 | x >> 12}; + + return u.d - 1.0; +} + +gdouble +rspamd_random_double(void) +{ + guint64 rnd_int; + + rnd_int = ottery_rand_uint64(); + + return rspamd_double_from_int64(rnd_int); +} + + +static guint64 * +rspamd_fast_random_seed(void) +{ + static guint64 seed; + + if (G_UNLIKELY(seed == 0)) { + ottery_rand_bytes((void *) &seed, sizeof(seed)); + } + + return &seed; +} + +/* wyrand */ +inline uint64_t +rspamd_random_uint64_fast_seed(uint64_t *seed) +{ + *seed += UINT64_C(0xa0761d6478bd642f); +#ifdef __SIZEOF_INT128__ +#if defined(__aarch64__) + uint64_t lo, hi, p = *seed ^ UINT64_C(0xe7037ed1a0b428db), v = *seed; + lo = v * p; + __asm__("umulh %0, %1, %2" + : "=r"(hi) + : "r"(v), "r"(p)); + return lo ^ hi; +#else + __uint128_t t = (__uint128_t) *seed * (*seed ^ UINT64_C(0xe7037ed1a0b428db)); + return (t >> 64) ^ t; +#endif +#else + /* Implementation of 64x64->128-bit multiplication by four 32x32->64 + * bit multiplication. */ + uint64_t lo, hi, p = *seed ^ UINT64_C(0xe7037ed1a0b428db), v = *seed; + uint64_t hv = v >> 32, hp = p >> 32; + uint64_t lv = (uint32_t) v, lp = (uint32_t) p; + uint64_t rh = hv * hp; + uint64_t rm_0 = hv * lp; + uint64_t rm_1 = hp * lv; + uint64_t rl = lv * lp; + uint64_t t; + + /* We could ignore a carry bit here if we did not care about the + same hash for 32-bit and 64-bit targets. */ + t = rl + (rm_0 << 32); + lo = t + (rm_1 << 32); + hi = rh + (rm_0 >> 32) + (rm_1 >> 32); + return lo ^ hi; +#endif +} + +gdouble +rspamd_random_double_fast(void) +{ + return rspamd_random_double_fast_seed(rspamd_fast_random_seed()); +} + +/* xoshiro256+ */ +inline gdouble +rspamd_random_double_fast_seed(guint64 *seed) +{ + return rspamd_double_from_int64(rspamd_random_uint64_fast_seed(seed)); +} + +guint64 +rspamd_random_uint64_fast(void) +{ + return rspamd_random_uint64_fast_seed(rspamd_fast_random_seed()); +} + +void rspamd_random_seed_fast(void) +{ + (void) rspamd_fast_random_seed(); +} + +gdouble +rspamd_time_jitter(gdouble in, gdouble jitter) +{ + if (jitter == 0) { + jitter = in; + } + + return in + jitter * rspamd_random_double(); +} + +gboolean +rspamd_constant_memcmp(const void *a, const void *b, gsize len) +{ + gsize lena, lenb, i; + guint16 d, r = 0, m; + guint16 v; + const guint8 *aa = (const guint8 *) a, + *bb = (const guint8 *) b; + + if (len == 0) { + lena = strlen((const char *) a); + lenb = strlen((const char *) b); + + if (lena != lenb) { + return FALSE; + } + + len = lena; + } + + for (i = 0; i < len; i++) { + v = ((guint16) (guint8) r) + 255; + m = v / 256 - 1; + d = (guint16) ((int) aa[i] - (int) bb[i]); + r |= (d & m); + } + + return (((gint32) (guint16) ((guint32) r + 0x8000) - 0x8000) == 0); +} + +int rspamd_file_xopen(const char *fname, int oflags, guint mode, + gboolean allow_symlink) +{ + struct stat sb; + int fd, flags = oflags; + + if (!(oflags & O_CREAT)) { + if (lstat(fname, &sb) == -1) { + + if (errno != ENOENT) { + return (-1); + } + } + else if (!S_ISREG(sb.st_mode)) { + if (S_ISLNK(sb.st_mode)) { + if (!allow_symlink) { + return -1; + } + } + else { + return -1; + } + } + } + +#ifdef HAVE_OCLOEXEC + flags |= O_CLOEXEC; +#endif + +#ifdef HAVE_ONOFOLLOW + if (!allow_symlink) { + flags |= O_NOFOLLOW; + fd = open(fname, flags, mode); + } + else { + fd = open(fname, flags, mode); + } +#else + fd = open(fname, flags, mode); +#endif + +#ifndef HAVE_OCLOEXEC + int serrno; + if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + serrno = errno; + close(fd); + errno = serrno; + + return -1; + } +#endif + + return (fd); +} + +gpointer +rspamd_file_xmap(const char *fname, guint mode, gsize *size, + gboolean allow_symlink) +{ + gint fd; + struct stat sb; + gpointer map; + + g_assert(fname != NULL); + g_assert(size != NULL); + + if (mode & PROT_WRITE) { + fd = rspamd_file_xopen(fname, O_RDWR, 0, allow_symlink); + } + else { + fd = rspamd_file_xopen(fname, O_RDONLY, 0, allow_symlink); + } + + if (fd == -1) { + return NULL; + } + + if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) { + close(fd); + *size = (gsize) -1; + + return NULL; + } + + if (sb.st_size == 0) { + close(fd); + *size = (gsize) 0; + + return NULL; + } + + map = mmap(NULL, sb.st_size, mode, MAP_SHARED, fd, 0); + close(fd); + + if (map == MAP_FAILED) { + return NULL; + } + + *size = sb.st_size; + + return map; +} + + +gpointer +rspamd_shmem_xmap(const char *fname, guint mode, + gsize *size) +{ + gint fd; + struct stat sb; + gpointer map; + + g_assert(fname != NULL); + g_assert(size != NULL); + +#ifdef HAVE_SANE_SHMEM + if (mode & PROT_WRITE) { + fd = shm_open(fname, O_RDWR, 0); + } + else { + fd = shm_open(fname, O_RDONLY, 0); + } +#else + if (mode & PROT_WRITE) { + fd = open(fname, O_RDWR, 0); + } + else { + fd = open(fname, O_RDONLY, 0); + } +#endif + + if (fd == -1) { + return NULL; + } + + if (fstat(fd, &sb) == -1) { + close(fd); + + return NULL; + } + + map = mmap(NULL, sb.st_size, mode, MAP_SHARED, fd, 0); + close(fd); + + if (map == MAP_FAILED) { + return NULL; + } + + *size = sb.st_size; + + return map; +} + +/* + * A(x - 0.5)^4 + B(x - 0.5)^3 + C(x - 0.5)^2 + D(x - 0.5) + * A = 32, + * B = -6 + * C = -7 + * D = 3 + * y = 32(x - 0.5)^4 - 6(x - 0.5)^3 - 7(x - 0.5)^2 + 3(x - 0.5) + * + * New approach: + * y = ((x - bias)*2)^8 + */ +gdouble +rspamd_normalize_probability(gdouble x, gdouble bias) +{ + gdouble xx; + + xx = (x - bias) * 2.0; + + return pow(xx, 8); +} + +/* + * Calculations from musl libc + */ +guint64 +rspamd_tm_to_time(const struct tm *tm, glong tz) +{ + guint64 result; + gboolean is_leap = FALSE; + gint leaps, y = tm->tm_year, cycles, rem, centuries; + glong offset = (tz / 100) * 3600 + (tz % 100) * 60; + + /* How many seconds in each month from the beginning of the year */ + static const gint secs_through_month[] = { + 0, 31 * 86400, 59 * 86400, 90 * 86400, + 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400, + 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400}; + + /* Convert year */ + if (tm->tm_year - 2ULL <= 136) { + leaps = (y - 68) / 4; + + if (!((y - 68) & 3)) { + leaps--; + is_leap = 1; + } + + result = 31536000 * (y - 70) + 86400 * leaps; + } + else { + cycles = (y - 100) / 400; + rem = (y - 100) % 400; + if (rem < 0) { + cycles--; + rem += 400; + } + + if (!rem) { + is_leap = 1; + centuries = 0; + leaps = 0; + } + else { + if (rem >= 200) { + if (rem >= 300) { + centuries = 3; + rem -= 300; + } + else { + centuries = 2; + rem -= 200; + } + } + else { + if (rem >= 100) { + centuries = 1; + rem -= 100; + } + else { + centuries = 0; + } + } + + if (!rem) { + is_leap = 1; + leaps = 0; + } + else { + leaps = rem / 4U; + rem %= 4U; + is_leap = !rem; + } + } + + leaps += 97 * cycles + 24 * centuries - (gint) is_leap; + result = (y - 100) * 31536000LL + leaps * 86400LL + 946684800 + 86400; + } + + /* Now convert months to seconds */ + result += secs_through_month[tm->tm_mon]; + /* One more day */ + if (is_leap && tm->tm_mon >= 2) { + result += 86400; + } + + result += 86400LL * (tm->tm_mday - 1); + result += 3600LL * tm->tm_hour; + result += 60LL * tm->tm_min; + result += tm->tm_sec; + + /* Now apply tz offset */ + result -= offset; + + return result; +} + + +void rspamd_gmtime(gint64 ts, struct tm *dest) +{ + guint64 days, secs, years; + int remdays, remsecs, remyears; + int leap_400_cycles, leap_100_cycles, leap_4_cycles; + int months; + int wday, yday, leap; + /* From March */ + static const uint8_t days_in_month[] = {31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29}; + static const guint64 leap_epoch = 946684800ULL + 86400 * (31 + 29); + static const guint64 days_per_400y = 365 * 400 + 97; + static const guint64 days_per_100y = 365 * 100 + 24; + static const guint64 days_per_4y = 365 * 4 + 1; + + secs = ts - leap_epoch; + days = secs / 86400; + remsecs = secs % 86400; + + if (remsecs < 0) { + remsecs += 86400; + days--; + } + + wday = (3 + days) % 7; + if (wday < 0) { + wday += 7; + } + + /* Deal with gregorian adjustments */ + leap_400_cycles = days / days_per_400y; + remdays = days % days_per_400y; + + if (remdays < 0) { + remdays += days_per_400y; + leap_400_cycles--; + } + + leap_100_cycles = remdays / days_per_100y; + if (leap_100_cycles == 4) { + /* 400 years */ + leap_100_cycles--; + } + + remdays -= leap_100_cycles * days_per_100y; + + leap_4_cycles = remdays / days_per_4y; + if (leap_4_cycles == 25) { + /* 100 years */ + leap_4_cycles--; + } + remdays -= leap_4_cycles * days_per_4y; + + remyears = remdays / 365; + if (remyears == 4) { + /* Ordinary leap year */ + remyears--; + } + remdays -= remyears * 365; + + leap = !remyears && (leap_4_cycles || !leap_100_cycles); + yday = remdays + 31 + 28 + leap; + + if (yday >= 365 + leap) { + yday -= 365 + leap; + } + + years = remyears + 4 * leap_4_cycles + 100 * leap_100_cycles + + 400ULL * leap_400_cycles; + + for (months = 0; days_in_month[months] <= remdays; months++) { + remdays -= days_in_month[months]; + } + + if (months >= 10) { + months -= 12; + years++; + } + + dest->tm_year = years + 100; + dest->tm_mon = months + 2; + dest->tm_mday = remdays + 1; + dest->tm_wday = wday; + dest->tm_yday = yday; + + dest->tm_hour = remsecs / 3600; + dest->tm_min = remsecs / 60 % 60; + dest->tm_sec = remsecs % 60; +#if !defined(__sun) + dest->tm_gmtoff = 0; + dest->tm_zone = "GMT"; +#endif +} + +void rspamd_localtime(gint64 ts, struct tm *dest) +{ + time_t t = ts; + localtime_r(&t, dest); +} + +gboolean +rspamd_fstring_gzip(rspamd_fstring_t **in) +{ + z_stream strm; + rspamd_fstring_t *buf = *in; + int ret; + unsigned tmp_remain; + unsigned char temp[BUFSIZ]; + + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + MAX_WBITS + 16, MAX_MEM_LEVEL - 1, Z_DEFAULT_STRATEGY); + + if (ret != Z_OK) { + return FALSE; + } + + if (buf->allocated < deflateBound(&strm, buf->len)) { + buf = rspamd_fstring_grow(buf, deflateBound(&strm, buf->len)); + *in = buf; + } + + strm.next_in = buf->str; + strm.avail_in = buf->len; + + strm.next_out = temp; + strm.avail_out = sizeof(temp) > buf->allocated ? buf->allocated : sizeof(temp); + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_ERROR) { + deflateEnd(&strm); + return FALSE; + } + + /* Try to compress in-place */ + tmp_remain = strm.next_out - temp; + if (tmp_remain <= (strm.avail_in ? buf->len - strm.avail_in : buf->allocated)) { + memcpy(buf->str, temp, tmp_remain); + strm.next_out = (unsigned char *) buf->str + tmp_remain; + tmp_remain = 0; + while (ret == Z_OK) { + strm.avail_out = strm.avail_in ? strm.next_in - strm.next_out : ((unsigned char *) buf->str + buf->allocated) - strm.next_out; + ret = deflate(&strm, Z_FINISH); + } + if (ret != Z_BUF_ERROR || strm.avail_in == 0) { + buf->len = strm.next_out - (unsigned char *) buf->str; + *in = buf; + deflateEnd(&strm); + + return ret == Z_STREAM_END; + } + } + + /* + * The case when input and output has caught each other, hold the remaining + * in a temporary buffer and compress it separately + */ + unsigned char *hold = g_malloc(strm.avail_in); + memcpy(hold, strm.next_in, strm.avail_in); + strm.next_in = hold; + if (tmp_remain) { + memcpy(buf->str, temp, tmp_remain); + strm.next_out = (unsigned char *) buf->str + tmp_remain; + } + strm.avail_out = ((unsigned char *) buf->str + buf->allocated) - strm.next_out; + ret = deflate(&strm, Z_FINISH); + g_free(hold); + buf->len = strm.next_out - (unsigned char *) buf->str; + *in = buf; + deflateEnd(&strm); + + return ret == Z_STREAM_END; +} + +gboolean +rspamd_fstring_gunzip(rspamd_fstring_t **in) +{ + z_stream strm; + rspamd_fstring_t *buf = *in, *out = rspamd_fstring_sized_new((*in)->len); + int ret; + + memset(&strm, 0, sizeof(strm)); + ret = inflateInit2(&strm, MAX_WBITS + 16); + + if (ret != Z_OK) { + return FALSE; + } + + strm.next_in = buf->str; + strm.avail_in = buf->len; + + gsize total_out = 0; + + do { + strm.next_out = out->str + total_out; + strm.avail_out = out->allocated - total_out; + + ret = inflate(&strm, Z_NO_FLUSH); + if (ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR) { + break; + } + + gsize out_remain = strm.avail_out; + total_out = out->allocated - out_remain; + if (out_remain == 0 && ret != Z_STREAM_END) { + out = rspamd_fstring_grow(out, out->allocated * 2); + } + + } while (ret != Z_STREAM_END); + + if (ret == Z_STREAM_END) { + *in = out; + out->len = total_out; + rspamd_fstring_free(buf); + } + else { + /* Revert */ + *in = buf; + rspamd_fstring_free(out); + } + + inflateEnd(&strm); + + return ret == Z_STREAM_END; +} + +static gboolean +rspamd_glob_dir(const gchar *full_path, const gchar *pattern, + gboolean recursive, guint rec_len, + GPtrArray *res, GError **err) +{ + glob_t globbuf; + const gchar *path; + static gchar pathbuf[PATH_MAX]; /* Static to help recursion */ + guint i; + gint rc; + static const guint rec_lim = 16; + struct stat st; + + if (rec_len > rec_lim) { + g_set_error(err, g_quark_from_static_string("glob"), EOVERFLOW, + "maximum nesting is reached: %d", rec_lim); + + return FALSE; + } + + memset(&globbuf, 0, sizeof(globbuf)); + + if ((rc = glob(full_path, 0, NULL, &globbuf)) != 0) { + + if (rc != GLOB_NOMATCH) { + g_set_error(err, g_quark_from_static_string("glob"), errno, + "glob %s failed: %s", full_path, strerror(errno)); + globfree(&globbuf); + + return FALSE; + } + else { + globfree(&globbuf); + + return TRUE; + } + } + + for (i = 0; i < globbuf.gl_pathc; i++) { + path = globbuf.gl_pathv[i]; + + if (stat(path, &st) == -1) { + if (errno == EPERM || errno == EACCES || errno == ELOOP) { + /* Silently ignore */ + continue; + } + + g_set_error(err, g_quark_from_static_string("glob"), errno, + "stat %s failed: %s", path, strerror(errno)); + globfree(&globbuf); + + return FALSE; + } + + if (S_ISREG(st.st_mode)) { + g_ptr_array_add(res, g_strdup(path)); + } + else if (recursive && S_ISDIR(st.st_mode)) { + rspamd_snprintf(pathbuf, sizeof(pathbuf), "%s%c%s", + path, G_DIR_SEPARATOR, pattern); + + if (!rspamd_glob_dir(full_path, pattern, recursive, rec_len + 1, + res, err)) { + globfree(&globbuf); + + return FALSE; + } + } + } + + globfree(&globbuf); + + return TRUE; +} + +GPtrArray * +rspamd_glob_path(const gchar *dir, + const gchar *pattern, + gboolean recursive, + GError **err) +{ + gchar path[PATH_MAX]; + GPtrArray *res; + + res = g_ptr_array_new_full(32, (GDestroyNotify) g_free); + rspamd_snprintf(path, sizeof(path), "%s%c%s", dir, G_DIR_SEPARATOR, pattern); + + if (!rspamd_glob_dir(path, pattern, recursive, 0, res, err)) { + g_ptr_array_free(res, TRUE); + + return NULL; + } + + return res; +} + +double +rspamd_set_counter(struct rspamd_counter_data *cd, gdouble value) +{ + gdouble cerr; + + /* Cumulative moving average using per-process counter data */ + if (cd->number == 0) { + cd->mean = 0; + cd->stddev = 0; + } + + cd->mean += (value - cd->mean) / (gdouble) (++cd->number); + cerr = (value - cd->mean) * (value - cd->mean); + cd->stddev += (cerr - cd->stddev) / (gdouble) (cd->number); + + return cd->mean; +} + +float rspamd_set_counter_ema(struct rspamd_counter_data *cd, + float value, + float alpha) +{ + float diff, incr; + + /* Cumulative moving average using per-process counter data */ + if (cd->number == 0) { + cd->mean = 0; + cd->stddev = 0; + } + + diff = value - cd->mean; + incr = diff * alpha; + cd->mean += incr; + cd->stddev = (1.0f - alpha) * (cd->stddev + diff * incr); + cd->number++; + + return cd->mean; +} + +void rspamd_ptr_array_shuffle(GPtrArray *ar) +{ + if (ar->len < 2) { + return; + } + + guint n = ar->len; + + for (guint i = 0; i < n - 1; i++) { + guint j = i + rspamd_random_uint64_fast() % (n - i); + gpointer t = g_ptr_array_index(ar, j); + g_ptr_array_index(ar, j) = g_ptr_array_index(ar, i); + g_ptr_array_index(ar, i) = t; + } +} + +float rspamd_sum_floats(float *ar, gsize *nelts) +{ + float sum = 0.0f; + volatile float c = 0.0f; /* We don't want any optimisations around c */ + gsize cnt = 0; + + for (gsize i = 0; i < *nelts; i++) { + float elt = ar[i]; + + if (!isnan(elt)) { + cnt++; + float y = elt - c; + float t = sum + y; + c = (t - sum) - y; + sum = t; + } + } + + *nelts = cnt; + return sum; +} + +void rspamd_normalize_path_inplace(gchar *path, guint len, gsize *nlen) +{ + const gchar *p, *end, *slash = NULL, *dot = NULL; + gchar *o; + enum { + st_normal = 0, + st_got_dot, + st_got_dot_dot, + st_got_slash, + st_got_slash_slash, + } state = st_normal; + + p = path; + end = path + len; + o = path; + + while (p < end) { + switch (state) { + case st_normal: + if (G_UNLIKELY(*p == '/')) { + state = st_got_slash; + slash = p; + } + else if (G_UNLIKELY(*p == '.')) { + state = st_got_dot; + dot = p; + } + else { + *o++ = *p; + } + p++; + break; + case st_got_slash: + if (G_UNLIKELY(*p == '/')) { + /* Ignore double slash */ + *o++ = *p; + state = st_got_slash_slash; + } + else if (G_UNLIKELY(*p == '.')) { + dot = p; + state = st_got_dot; + } + else { + *o++ = '/'; + *o++ = *p; + slash = NULL; + dot = NULL; + state = st_normal; + } + p++; + break; + case st_got_slash_slash: + if (G_LIKELY(*p != '/')) { + slash = p - 1; + dot = NULL; + state = st_normal; + continue; + } + p++; + break; + case st_got_dot: + if (G_UNLIKELY(*p == '/')) { + /* Remove any /./ or ./ paths */ + if (((o > path && *(o - 1) != '/') || (o == path)) && slash) { + /* Preserve one slash */ + *o++ = '/'; + } + + slash = p; + dot = NULL; + /* Ignore last slash */ + state = st_normal; + } + else if (*p == '.') { + /* Double dot character */ + state = st_got_dot_dot; + } + else { + /* We have something like .some or /.some */ + if (dot && p > dot) { + if (slash == dot - 1 && (o > path && *(o - 1) != '/')) { + /* /.blah */ + memmove(o, slash, p - slash); + o += p - slash; + } + else { + memmove(o, dot, p - dot); + o += p - dot; + } + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + + p++; + break; + case st_got_dot_dot: + if (*p == '/') { + /* We have something like /../ or ../ */ + if (slash) { + /* We need to remove the last component from o if it is there */ + if (o > path + 2 && *(o - 1) == '/') { + slash = rspamd_memrchr(path, '/', o - path - 2); + } + else if (o > path + 1) { + slash = rspamd_memrchr(path, '/', o - path - 1); + } + else { + slash = NULL; + } + + if (slash) { + o = (gchar *) slash; + } + /* Otherwise we keep these dots */ + slash = p; + state = st_got_slash; + } + else { + /* We have something like bla../, so we need to copy it as is */ + if (o > path && dot && p > dot) { + memmove(o, dot, p - dot); + o += p - dot; + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + } + else { + /* We have something like ..bla or ... */ + if (slash) { + *o++ = '/'; + } + + if (dot && p > dot) { + memmove(o, dot, p - dot); + o += p - dot; + } + + slash = NULL; + dot = NULL; + state = st_normal; + continue; + } + + p++; + break; + } + } + + /* Leftover */ + switch (state) { + case st_got_dot_dot: + /* Trailing .. */ + if (slash) { + /* We need to remove the last component from o if it is there */ + if (o > path + 2 && *(o - 1) == '/') { + slash = rspamd_memrchr(path, '/', o - path - 2); + } + else if (o > path + 1) { + slash = rspamd_memrchr(path, '/', o - path - 1); + } + else { + if (o == path) { + /* Corner case */ + *o++ = '/'; + } + + slash = NULL; + } + + if (slash) { + /* Remove last / */ + o = (gchar *) slash; + } + } + else { + /* Corner case */ + if (o == path) { + *o++ = '/'; + } + else { + if (dot && p > dot) { + memmove(o, dot, p - dot); + o += p - dot; + } + } + } + break; + case st_got_dot: + if (slash) { + /* /. -> must be / */ + *o++ = '/'; + } + else { + if (o > path) { + *o++ = '.'; + } + } + break; + case st_got_slash: + *o++ = '/'; + break; + default: +#if 0 + if (o > path + 1 && *(o - 1) == '/') { + o --; + } +#endif + break; + } + + if (nlen) { + *nlen = (o - path); + } +} diff --git a/src/libutil/util.h b/src/libutil/util.h new file mode 100644 index 0000000..7111a07 --- /dev/null +++ b/src/libutil/util.h @@ -0,0 +1,581 @@ +/* + * Copyright 2024 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_UTIL_H +#define RSPAMD_UTIL_H + +#include "config.h" +#include "mem_pool.h" +#include "printf.h" +#include "fstring.h" +#include "addr.h" +#include "str_util.h" + +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif + +#include <time.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_config; + +enum rspamd_exception_type { + RSPAMD_EXCEPTION_NEWLINE = 0, + RSPAMD_EXCEPTION_URL, + RSPAMD_EXCEPTION_GENERIC, +}; +/** + * Structure to point exception in text from processing + */ +struct rspamd_process_exception { + goffset pos; + guint len; + gpointer ptr; + enum rspamd_exception_type type; +}; + +/** + * Create generic socket + * @param af address family + * @param type socket type + * @param protocol socket protocol + * @param async set non-blocking on a socket + * @return socket FD or -1 in case of error + */ +gint rspamd_socket_create(gint af, gint type, gint protocol, gboolean async); + +/* + * Create socket and bind or connect it to specified address and port + */ +gint rspamd_socket_tcp(struct addrinfo *, gboolean is_server, gboolean async); + +/* + * Create socket and bind or connect it to specified address and port + */ +gint rspamd_socket_udp(struct addrinfo *, gboolean is_server, gboolean async); + +/* + * Create and bind or connect unix socket + */ +gint rspamd_socket_unix(const gchar *, + struct sockaddr_un *, + gint type, + gboolean is_server, + gboolean async); + +/** + * Make a universal socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param type type of socket (SO_STREAM or SO_DGRAM) + * @param async make this socket async + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +gint rspamd_socket(const gchar *credits, guint16 port, gint type, + gboolean async, gboolean is_server, gboolean try_resolve); + + +/* + * Create socketpair + */ +gboolean rspamd_socketpair(gint pair[2], gint af); + +/* + * Make specified socket non-blocking + */ +gint rspamd_socket_nonblocking(gint); + +/* + * Make specified socket blocking + */ +gint rspamd_socket_blocking(gint); + +/* + * Poll a sync socket for specified events + */ +gint rspamd_socket_poll(gint fd, gint timeout, short events); + +/* + * Init signals + */ +#ifdef HAVE_SA_SIGINFO + +void rspamd_signals_init(struct sigaction *sa, void (*sig_handler)(gint, + siginfo_t *, + void *)); + +#else +void rspamd_signals_init(struct sigaction *sa, void (*sig_handler)(gint)); +#endif + +/* + * Process title utility functions + */ +gint rspamd_init_title(rspamd_mempool_t *pool, gint argc, gchar *argv[], gchar *envp[]); +gint rspamd_setproctitle(const gchar *fmt, ...); + +#ifndef HAVE_PIDFILE +/* + * Pidfile functions from FreeBSD libutil code + */ +typedef struct rspamd_pidfh_s { + gint pf_fd; +#ifdef HAVE_PATH_MAX + gchar pf_path[PATH_MAX + 1]; +#elif defined(HAVE_MAXPATHLEN) + gchar pf_path[MAXPATHLEN + 1]; +#else + gchar pf_path[1024 + 1]; +#endif + dev_t pf_dev; + ino_t pf_ino; +} rspamd_pidfh_t; + +rspamd_pidfh_t *rspamd_pidfile_open(const gchar *path, + mode_t mode, + pid_t *pidptr); + +gint rspamd_pidfile_write(rspamd_pidfh_t *pfh); + +gint rspamd_pidfile_close(rspamd_pidfh_t *pfh); + +gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh); + +#else +typedef struct pidfh rspamd_pidfh_t; +#define rspamd_pidfile_open pidfile_open +#define rspamd_pidfile_write pidfile_write +#define rspamd_pidfile_close pidfile_close +#define rspamd_pidfile_remove pidfile_remove +#endif + +/* + * Replace %r with rcpt value and %f with from value, new string is allocated in pool + */ +gchar *resolve_stat_filename(rspamd_mempool_t *pool, + gchar *pattern, + gchar *rcpt, + gchar *from); + +const gchar * +rspamd_log_check_time(gdouble start, gdouble end, gint resolution); + +/* + * File locking functions + */ +gboolean rspamd_file_lock(gint fd, gboolean async); + +gboolean rspamd_file_unlock(gint fd, gboolean async); + +/* + * Workarounds for older versions of glib + */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) +void g_ptr_array_unref(GPtrArray *array); +gboolean g_int64_equal(gconstpointer v1, gconstpointer v2); +guint g_int64_hash(gconstpointer v); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) +void g_queue_clear(GQueue *queue); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32)) +void g_queue_free_full(GQueue *queue, GDestroyNotify free_func); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 40)) +void g_ptr_array_insert(GPtrArray *array, gint index_, gpointer data); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 30)) +GPtrArray *g_ptr_array_new_full(guint reserved_size, + GDestroyNotify element_free_func); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32)) +const gchar *g_environ_getenv(gchar **envp, const gchar *variable); +#endif + +/* + * Convert milliseconds to timeval fields + */ +#define msec_to_tv(msec, tv) \ + do { \ + (tv)->tv_sec = (msec) / 1000; \ + (tv)->tv_usec = \ + ((msec) - (tv)->tv_sec * 1000) * 1000; \ + } while (0) +#define double_to_tv(dbl, tv) \ + do { \ + (tv)->tv_sec = (int) (dbl); \ + (tv)->tv_usec = \ + ((dbl) - (int) (dbl)) * 1000 * 1000; \ + } while (0) +#define double_to_ts(dbl, ts) \ + do { \ + (ts)->tv_sec = (int) (dbl); \ + (ts)->tv_nsec = \ + ((dbl) - (int) (dbl)) * 1e9; \ + } while (0) +#define tv_to_msec(tv) ((tv)->tv_sec * 1000LLU + (tv)->tv_usec / 1000LLU) +#define tv_to_double(tv) ((double) (tv)->tv_sec + (tv)->tv_usec / 1.0e6) +#define ts_to_usec(ts) ((ts)->tv_sec * 1000000LLU + \ + (ts)->tv_nsec / 1000LLU) +#define ts_to_double(tv) ((double) (tv)->tv_sec + (tv)->tv_nsec / 1.0e9) + +/** + * Try to allocate a file on filesystem (using fallocate or posix_fallocate) + * @param fd descriptor + * @param offset offset of file + * @param len length to allocate + * @return -1 in case of failure + */ +gint rspamd_fallocate(gint fd, off_t offset, off_t len); + +/** + * Utils for working with threads to be compatible with all glib versions + */ +typedef struct rspamd_mutex_s { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + GMutex mtx; +#else + GStaticMutex mtx; +#endif +} rspamd_mutex_t; + + +/** + * Create new mutex + * @return mutex or NULL + */ +rspamd_mutex_t *rspamd_mutex_new(void); + +/** + * Lock mutex + * @param mtx + */ +void rspamd_mutex_lock(rspamd_mutex_t *mtx); + +/** + * Unlock mutex + * @param mtx + */ +void rspamd_mutex_unlock(rspamd_mutex_t *mtx); + +/** + * Clear rspamd mutex + * @param mtx + */ +void rspamd_mutex_free(rspamd_mutex_t *mtx); + +/** + * Deep copy of one hash table to another + * @param src source hash + * @param dst destination hash + * @param key_copy_func function called to copy or modify keys (or NULL) + * @param value_copy_func function called to copy or modify values (or NULL) + * @param ud user data for copy functions + */ +void rspamd_hash_table_copy(GHashTable *src, GHashTable *dst, + gpointer (*key_copy_func)(gconstpointer data, gpointer ud), + gpointer (*value_copy_func)(gconstpointer data, gpointer ud), + gpointer ud); + + +/** + * Read passphrase from tty + * @param buf buffer to fill with a password + * @param size size of the buffer + * @param echo turn echo on or off + * @param key unused key + * @return size of password read + */ +#define rspamd_read_passphrase(buf, size, echo, key) (rspamd_read_passphrase_with_prompt("Enter passphrase: ", (buf), (size), (echo), (key))) + +/** + * Read passphrase from tty with prompt + * @param prompt prompt to use + * @param buf buffer to fill with a password + * @param size size of the buffer + * @param echo turn echo on or off + * @param key unused key + * @return + */ +gint rspamd_read_passphrase_with_prompt(const gchar *prompt, gchar *buf, gint size, bool echo, gpointer key); + +/** + * Portably return the current clock ticks as seconds + * @return + */ +gdouble rspamd_get_ticks(gboolean rdtsc_ok); + +/** + * Portably return the current virtual clock ticks as seconds + * @return + */ +gdouble rspamd_get_virtual_ticks(void); + + +/** + * Return the real timestamp as unixtime + */ +gdouble rspamd_get_calendar_ticks(void); + +/** + * Special utility to help array freeing in rspamd_mempool + * @param p + */ +void rspamd_ptr_array_free_hard(gpointer p); + +/** + * Special utility to help array freeing in rspamd_mempool + * @param p + */ +void rspamd_array_free_hard(gpointer p); + +/** + * Special utility to help GString freeing in rspamd_mempool + * @param p + */ +void rspamd_gstring_free_hard(gpointer p); + +/** + * Special utility to help GError freeing in rspamd_mempool + * @param p + */ +void rspamd_gerror_free_maybe(gpointer p); + +/** + * Special utility to help GString freeing (without freeing the memory segment) in rspamd_mempool + * @param p + */ +void rspamd_gstring_free_soft(gpointer p); + + +/** + * Returns some statically initialized random hash seed + * @return hash seed + */ +guint64 rspamd_hash_seed(void); + +/** + * Returns random hex string of the specified length + * @param buf + * @param len + */ +void rspamd_random_hex(gchar *buf, guint64 len); + +/** + * Returns + * @param pattern pattern to create (should end with some number of X symbols), modified by this function + * @return + */ +gint rspamd_shmem_mkstemp(gchar *pattern); + +/** + * Return jittered time value + */ +gdouble rspamd_time_jitter(gdouble in, gdouble jitter); + +/** + * Return random double in range [0..1) + * @return + */ +gdouble rspamd_random_double(void); + +/** + * Return random double in range [0..1) using xoroshiro128+ algorithm (not crypto secure) + * @return + */ +gdouble rspamd_random_double_fast(void); +gdouble rspamd_random_double_fast_seed(guint64 *seed); +uint64_t rspamd_random_uint64_fast_seed(uint64_t *seed); +guint64 rspamd_random_uint64_fast(void); + +/** + * Seed fast rng + */ +void rspamd_random_seed_fast(void); + +/** + * Constant time version of memcmp + */ +gboolean rspamd_constant_memcmp(const void *a, const void *b, gsize len); + +/** + * Open file without following symlinks or special stuff + * @param fname filename + * @param oflags open flags + * @param mode mode to open + * @return fd or -1 in case of error + */ +int rspamd_file_xopen(const char *fname, int oflags, guint mode, + gboolean allow_symlink); + +/** + * Map file without following symlinks or special stuff + * @param fname filename + * @param mode mode to open + * @param size target size (must NOT be NULL) + * @return pointer to memory (should be freed using munmap) or NULL in case of error + */ +gpointer rspamd_file_xmap(const char *fname, guint mode, gsize *size, + gboolean allow_symlink); + +/** + * Map named shared memory segment + * @param fname filename + * @param mode mode to open + * @param size target size (must NOT be NULL) + * @return pointer to memory (should be freed using munmap) or NULL in case of error + */ +gpointer rspamd_shmem_xmap(const char *fname, guint mode, + gsize *size); + +/** + * Normalize probabilities using polynomial function + * @param x probability (bias .. 1) + * @return + */ +gdouble rspamd_normalize_probability(gdouble x, gdouble bias); + +/** + * Converts struct tm to time_t + * @param tm + * @param tz timezone in format (hours * 100) + minutes + * @return + */ +guint64 rspamd_tm_to_time(const struct tm *tm, glong tz); + +/** + * Splits unix timestamp into struct tm using GMT timezone + * @param ts + * @param dest + */ +void rspamd_gmtime(gint64 ts, struct tm *dest); + +/** + * Split unix timestamp into struct tm using local timezone + * @param ts + * @param dest + */ +void rspamd_localtime(gint64 ts, struct tm *dest); + +#define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = (__typeof__(cur)) g_ptr_array_index((ar), (i))) || 1); ++(i)) + +/** + * Compresses the input string using gzip+zlib. Old string is replaced and freed + * if compressed. + * @param in + * @return TRUE if a string has been compressed + */ +gboolean rspamd_fstring_gzip(rspamd_fstring_t **in); + +/** + * Compresses the input string using gzip+zlib. Old string is replaced and freed + * if compressed. If not compressed it is untouched. + * @param in + * @return TRUE if a string has been compressed + */ +gboolean rspamd_fstring_gunzip(rspamd_fstring_t **in); + +/** + * Perform globbing searching for the specified path. Allow recursion, + * returns an error if maximum nesting is reached. + * @param pattern + * @param recursive + * @param err + * @return GPtrArray of gchar *, elements are freed when array is freed + */ +GPtrArray *rspamd_glob_path(const gchar *dir, + const gchar *pattern, + gboolean recursive, + GError **err); + +struct rspamd_counter_data { + float mean; + float stddev; + guint64 number; +}; + +/** + * Sets counter's data using exponential moving average + * @param cd counter + * @param value new counter value + * @param alpha decay coefficient (0..1) + * @return new counter value + */ +float rspamd_set_counter_ema(struct rspamd_counter_data *cd, + float value, + float alpha); + +/** + * Sets counter's data using flat moving average + * @param cd counter + * @param value new counter value + * @return new counter value + */ +double rspamd_set_counter(struct rspamd_counter_data *cd, + gdouble value); + +/** + * Shuffle elements in an array inplace + * @param ar + */ +void rspamd_ptr_array_shuffle(GPtrArray *ar); + +enum rspamd_pbkdf_version_id { + RSPAMD_PBKDF_ID_V1 = 1, + RSPAMD_PBKDF_ID_V2 = 2, + RSPAMD_PBKDF_ID_MAX +}; + +struct rspamd_controller_pbkdf { + const char *name; + const char *alias; + const char *description; + int type; /* enum rspamd_cryptobox_pbkdf_type */ + gint id; + guint complexity; + gsize salt_len; + gsize key_len; +}; + +extern const struct rspamd_controller_pbkdf pbkdf_list[]; + +/** + * Sum array of floats using Kahan sum algorithm + * @param ar + * @param nelts + * @return + */ +float rspamd_sum_floats(float *ar, gsize *nelts); + +/** + * Normalize file path removing dot sequences and repeating '/' symbols as + * per rfc3986#section-5.2 + * @param path + * @param len + * @param nlen + */ +void rspamd_normalize_path_inplace(gchar *path, guint len, gsize *nlen); + +#ifdef __cplusplus +} +#endif + +#endif |